Skip to content

feat: collect statistics #241

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 10 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Contributing

## Release

```sh
npm run release
```
4 changes: 2 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@algolia/json-stream-analyzer",
"version": "0.2.17",
"version": "1.0.0-beta.3",
"main": "index.js",
"repository": "[email protected]:algolia/json-stream-analyzer.git",
"author": "Jonathan Montane <[email protected]>",
Expand Down Expand Up @@ -32,7 +32,7 @@
"pre-release": "yarn test && yarn build && cp package.json dist/ && npm publish dist/ --dry-run --access public ",
"re-link": "cd dist/ && (yarn unlink || echo 'no package to unlink') && yarn link && cd .. && yarn link @algolia/json-stream-analyzer",
"pre-release-test": "yarn pre-release && yarn re-link && jest --config jest.pre-release.config.js",
"release": "(npm whoami || (echo 'this command must be run from npm and not yarn' && exit 1)) && yarn pre-release-test && yarn version && cp package.json dist/ && cp README.md dist/ && cp LICENSE dist/ && npm publish dist/ --access public && git push origin master"
"release": "(npm whoami || (echo 'this command must be run from npm and not yarn' && exit 1)) && yarn pre-release-test && yarn version && cp package.json dist/ && cp README.md dist/ && cp LICENSE dist/ && npm publish dist/ --access public && git push origin"
},
"engines": {
"node": ">=14.0.0"
Expand Down
115 changes: 115 additions & 0 deletions src/convert.test.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import convertToSchema from './convert';
import type { ModelOptions } from './interfaces';
import type { ObjectType } from './types';

describe('convertToSchema', () => {
it('transforms Array into ArrayType', () => {
Expand Down Expand Up @@ -43,3 +45,116 @@ describe('convertToSchema', () => {
expect(converted.counter).toBe(1);
});
});

describe('statistics', () => {
it('should collect statistics', () => {
const options: ModelOptions = {
collectStatistics: { array: true, boolean: true },
};
const converted = convertToSchema(
{ bool: true, arr: ['foo', 'bar'] },
undefined,
options
) as ObjectType;

expect(converted.type).toBe('Object');

const simplifiedSchema = JSON.parse(JSON.stringify(converted.schema));
expect(simplifiedSchema).toMatchInlineSnapshot(`
Object {
"arr": Object {
"counter": 1,
"stats": Object {
"lengths": Object {
"2": 1,
},
},
"type": "Array",
"types": Object {
"String": Object {
"counter": 1,
"type": "String",
},
},
},
"bool": Object {
"counter": 1,
"stats": Object {
"trueVal": 1,
},
"type": "Boolean",
},
}
`);
});
});

describe('modifiers', () => {
it('should allow modification of object', () => {
const options: ModelOptions = {
modifier: (path, content) => {
if (
path.length !== 1 ||
path[0] !== 'foo' ||
typeof content !== 'object'
) {
return content;
}

let copy: Record<string, any> = {};
for (const [key, val] of Object.entries(content)) {
if (typeof val === 'object') {
copy = { ...copy, ...val };
} else {
copy[key] = val;
}
}
return copy;
},
};
const converted = convertToSchema(
{
foo: {
shouldRemove: {
foo: 2,
},
alsoRemove: {
bar: 1,
},
notRemoved: true,
},
},
undefined,
options
) as ObjectType;

expect(converted.type).toBe('Object');

const simplifiedSchema = JSON.parse(JSON.stringify(converted.schema));
expect(simplifiedSchema).toMatchInlineSnapshot(`
Object {
"foo": Object {
"counter": 1,
"schema": Object {
"bar": Object {
"counter": 1,
"type": "Number",
},
"foo": Object {
"counter": 1,
"type": "Number",
},
"notRemoved": Object {
"counter": 1,
"stats": Object {
"trueVal": 0,
},
"type": "Boolean",
},
},
"type": "Object",
},
}
`);
});
});
41 changes: 35 additions & 6 deletions src/convert.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import type { SchemaType, SchemaObject } from './interfaces';
import type { SchemaType, SchemaObject, ModelOptions } from './interfaces';
import {
ArrayType,
BooleanType,
Expand All @@ -9,13 +9,29 @@ import {
StringType,
} from './types';

const convertToSchema = (content: any, tag?: any): SchemaType => {
const convertToSchema = (
content: any,
tag?: any,
options?: ModelOptions,
path: string[] = []
): SchemaType => {
if (options?.modifier) {
// eslint-disable-next-line no-param-reassign
content = options.modifier(path, content);
}

if (typeof content === 'number') {
return new NumberType({ counter: 1, tag });
}

if (typeof content === 'boolean') {
return new BooleanType({ counter: 1, tag });
return new BooleanType({
counter: 1,
tag,
stats: options?.collectStatistics?.boolean
? { trueVal: content === true ? 1 : 0 }
: undefined,
});
}

if (typeof content === 'string') {
Expand All @@ -33,7 +49,7 @@ const convertToSchema = (content: any, tag?: any): SchemaType => {
types = { Missing: new MissingType({ counter: 1, tag }) };
} else {
types = content.reduce((partial, item) => {
const schema = convertToSchema(item, tag);
const schema = convertToSchema(item, tag, options, [...path]);
const update: SchemaObject = {};
if (partial[schema.type]) {
update[schema.type] = partial[schema.type].combine(schema, {
Expand All @@ -46,12 +62,25 @@ const convertToSchema = (content: any, tag?: any): SchemaType => {
return { ...partial, ...update };
}, {});
}
return new ArrayType({ counter: 1, tag }, types);

return new ArrayType(
{
counter: 1,
tag,
stats: options?.collectStatistics?.array
? { lengths: { [content.length]: 1 } }
: undefined,
},
types
);
}

const schema: SchemaObject = Object.entries(content).reduce(
(schemas: SchemaObject, [key, subContent]) => {
return { ...schemas, [key]: convertToSchema(subContent, tag) };
return {
...schemas,
[key]: convertToSchema(subContent, tag, options, [...path, key]),
};
},
{}
);
Expand Down
11 changes: 11 additions & 0 deletions src/interfaces.ts
Original file line number Diff line number Diff line change
Expand Up @@ -116,3 +116,14 @@ export interface Model {
addToModel: (record: any) => void;
traverseSchema: (path: string[]) => { schema?: SchemaType; path: string[] };
}

export interface ModelArgs {
tag: (record: any) => any;
}
export interface ModelOptions {
collectStatistics?: {
array?: boolean;
boolean?: boolean;
};
modifier?: (path: string[], node: any) => any;
}
23 changes: 16 additions & 7 deletions src/models/SimpleTag.ts
Original file line number Diff line number Diff line change
@@ -1,17 +1,26 @@
import convertToSchema from '../convert';
import type { SchemaType, Diagnostic, Model } from '../interfaces';
import type {
SchemaType,
Diagnostic,
Model,
ModelOptions,
ModelArgs,
} from '../interfaces';

export class SimpleTagModel implements Model {
tag: (record: any) => any;
options: ModelOptions;
schema?: SchemaType;

constructor({ tag }: { tag: (record: any) => any }) {
this.tag = tag;
constructor(options: ModelArgs & ModelOptions) {
this.options = options;
this.tag = options.tag;
}

tag: ModelArgs['tag'] = () => null;

convert = (record: any): SchemaType => {
const tag = this.tag(record);
return convertToSchema(record, tag);
return convertToSchema(record, tag, this.options);
};

combineTag = (firstTag: any): any => {
Expand All @@ -32,7 +41,7 @@ export class SimpleTagModel implements Model {

diagnoseRecord = (record: any): Diagnostic[] => {
const tag = this.tag(record);
const recordSchema = convertToSchema(record, tag);
const recordSchema = convertToSchema(record, tag, this.options);

let combined;
if (this.schema) {
Expand All @@ -55,7 +64,7 @@ export class SimpleTagModel implements Model {
}
};

traverseSchema = (path: string[]) => {
traverseSchema = (path: string[]): ReturnType<SchemaType['traverse']> => {
if (!this.schema) {
return { path, schema: this.schema };
}
Expand Down
28 changes: 28 additions & 0 deletions src/types/ArrayType.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,19 @@ describe('ArrayType simple test case', () => {
expect(combined.counter).toBe(2);
});

it('combines lengths', () => {
const b1 = new ArrayType({ stats: { lengths: { 1: 1 } } });
const b2 = new ArrayType({ stats: { lengths: { 2: 1 } } });
const b3 = new ArrayType({ stats: { lengths: { 2: 1 } } });

let combined = b1.combine(b2) as ArrayType;
combined = combined.combine(b3) as ArrayType;

expect(combined.type).toBe('Array');
expect(combined.counter).toBe(3);
expect(combined.stats).toStrictEqual({ lengths: { 1: 1, 2: 2 } });
});

it('combine does not mutate inputs', () => {
const b1 = new ArrayType();
const b2 = new ArrayType();
Expand Down Expand Up @@ -128,12 +141,27 @@ describe('ArrayType simple test case', () => {

describe('Simple Array Type test case', () => {
it('defines correct schema for string arrays', () => {
const converted = convertToSchema(['someText', 'someText'], undefined, {
collectStatistics: {
array: true,
},
}) as ArrayType;

expect(converted.type).toBe('Array');
expect(converted.types.String).toBeDefined();
expect(converted.types.String.counter).toBe(1);
expect(converted.counter).toBe(1);
expect(converted.stats).toStrictEqual({ lengths: { 2: 1 } });
});

it('ignore array length by default', () => {
const converted = convertToSchema(['someText', 'someText']) as ArrayType;

expect(converted.type).toBe('Array');
expect(converted.types.String).toBeDefined();
expect(converted.types.String.counter).toBe(1);
expect(converted.counter).toBe(1);
expect(converted.stats).toStrictEqual({ lengths: {} });
});

it('defines correct schema for boolean arrays', () => {
Expand Down
Loading