Skip to content

Commit bc56a97

Browse files
committed
feat: optional field count threshold
1 parent 05c0df2 commit bc56a97

File tree

2 files changed

+80
-31
lines changed

2 files changed

+80
-31
lines changed

src/schema-analyzer.ts

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@ type AllSchemaParseOptions = {
167167
storeValues: boolean;
168168
signal?: AbortSignal;
169169
sampleLengthLimit: number;
170+
distinctFieldsAbortThreshold?: number;
170171
};
171172
export type SchemaParseOptions = Partial<AllSchemaParseOptions>;
172173

@@ -469,6 +470,7 @@ export class SchemaAnalyzer {
469470
semanticTypes: SemanticTypeMap;
470471
options: AllSchemaParseOptions;
471472
documentsAnalyzed = 0;
473+
fieldsCount = 0;
472474
schemaAnalysisRoot: SchemaAnalysisRoot = {
473475
fields: Object.create(null),
474476
count: 0
@@ -508,6 +510,14 @@ export class SchemaAnalyzer {
508510
}
509511
}
510512

513+
increaseFieldCount() {
514+
if (!this.options.distinctFieldsAbortThreshold) return;
515+
this.fieldsCount++;
516+
if (this.fieldsCount > this.options.distinctFieldsAbortThreshold) {
517+
throw new Error(`Schema analysis aborted: Fields count above ${this.options.distinctFieldsAbortThreshold}`);
518+
}
519+
}
520+
511521
getSemanticType(value: BSONValue, path: string[]) {
512522
// Pass value to semantic type detectors, return first match or undefined.
513523
const returnValue = Object.entries(this.semanticTypes)
@@ -580,6 +590,7 @@ export class SchemaAnalyzer {
580590
count: 0,
581591
types: Object.create(null)
582592
};
593+
this.increaseFieldCount();
583594
}
584595
const field = schema[fieldName];
585596

test/bloated.test.ts

Lines changed: 69 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -14,40 +14,78 @@ function generateRandomString(length: number) {
1414
}
1515

1616
describe('bloated documents', function() {
17-
it('really long string is cropped', async function() {
18-
const documents = [{
19-
str: generateRandomString(20000)
20-
}];
21-
const schema = await getSchema(documents);
22-
const stringLength = ((schema.fields[0].types[0] as PrimitiveSchemaType).values[0] as string).length;
23-
assert.ok(stringLength <= 10000);
24-
});
17+
describe('sizeable sample values', function() {
18+
it('really long string is cropped', async function() {
19+
const documents = [{
20+
str: generateRandomString(20000)
21+
}];
22+
const schema = await getSchema(documents);
23+
const stringLength = ((schema.fields[0].types[0] as PrimitiveSchemaType).values[0] as string).length;
24+
assert.ok(stringLength <= 10000);
25+
});
2526

26-
it('really long code is cropped', async function() {
27-
const documents = [{
28-
code: new Code(generateRandomString(20000))
29-
}];
30-
const schema = await getSchema(documents);
31-
const codeLength = ((schema.fields[0].types[0] as PrimitiveSchemaType).values[0] as Code).code.length;
32-
assert.ok(codeLength <= 10000);
33-
});
27+
it('really long code is cropped', async function() {
28+
const documents = [{
29+
code: new Code(generateRandomString(20000))
30+
}];
31+
const schema = await getSchema(documents);
32+
const codeLength = ((schema.fields[0].types[0] as PrimitiveSchemaType).values[0] as Code).code.length;
33+
assert.ok(codeLength <= 10000);
34+
});
3435

35-
it('really long binary is cropped', async function() {
36-
const documents = [{
37-
binData: new Binary(Buffer.from(generateRandomString(20000)), 2)
38-
}];
39-
const schema = await getSchema(documents);
40-
const binary = ((schema.fields[0].types[0] as PrimitiveSchemaType).values[0] as Binary);
41-
assert.ok(binary.length() <= 10000);
42-
assert.strictEqual(binary.sub_type, 2);
36+
it('really long binary is cropped', async function() {
37+
const documents = [{
38+
binData: new Binary(Buffer.from(generateRandomString(20000)), 2)
39+
}];
40+
const schema = await getSchema(documents);
41+
const binary = ((schema.fields[0].types[0] as PrimitiveSchemaType).values[0] as Binary);
42+
assert.ok(binary.length() <= 10000);
43+
assert.strictEqual(binary.sub_type, 2);
44+
});
45+
46+
it('the limit is configurable', async function() {
47+
const documents = [{
48+
str: generateRandomString(20000)
49+
}];
50+
const schema = await getSchema(documents, { sampleLengthLimit: 5 });
51+
const stringLength = ((schema.fields[0].types[0] as PrimitiveSchemaType).values[0] as string).length;
52+
assert.ok(stringLength === 5);
53+
});
4354
});
4455

45-
it('the limit is configurable', async function() {
46-
const documents = [{
47-
str: generateRandomString(20000)
48-
}];
49-
const schema = await getSchema(documents, { sampleLengthLimit: 5 });
50-
const stringLength = ((schema.fields[0].types[0] as PrimitiveSchemaType).values[0] as string).length;
51-
assert.ok(stringLength === 5);
56+
describe.only('high complexity', function() {
57+
it('aborts after reaching the given limit', async function() {
58+
const documents = [{
59+
field1: 'abc',
60+
field2: 'bca',
61+
field3: 'cba',
62+
field4: 'cab',
63+
field5: 'bac'
64+
}];
65+
try {
66+
await getSchema(documents, { distinctFieldsAbortThreshold: 4 });
67+
assert.fail('Analysis did not throw');
68+
} catch (error) {
69+
assert.strictEqual((error as Error).message, 'Schema analysis aborted: Fields count above 4');
70+
}
71+
});
72+
73+
it('aborts after reaching the given limit - nested fields', async function() {
74+
const documents = [{
75+
field1: {
76+
field2: {
77+
field3: 'abc',
78+
field4: 'bca'
79+
},
80+
field5: 'cab'
81+
}
82+
}];
83+
try {
84+
await getSchema(documents, { distinctFieldsAbortThreshold: 4 });
85+
assert.fail('Analysis did not throw');
86+
} catch (error) {
87+
assert.strictEqual((error as Error).message, 'Schema analysis aborted: Fields count above 4');
88+
}
89+
});
5290
});
5391
});

0 commit comments

Comments
 (0)