Skip to content

Commit 8244e00

Browse files
authored
fix: crop bloated code and binary COMPASS-8902 (#221)
1 parent 9372e5a commit 8244e00

File tree

2 files changed

+70
-7
lines changed

2 files changed

+70
-7
lines changed

src/schema-analyzer.ts

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,6 @@ function getBSONType(value: any): SchemaBSONType {
209209
const bsonType = value?._bsontype
210210
? value._bsontype
211211
: Object.prototype.toString.call(value).replace(/^\[object (\w+)\]$/, '$1');
212-
213212
if (bsonType === 'Object') {
214213
// In the resulting schema we rename `Object` to `Document`.
215214
return 'Document';
@@ -324,10 +323,30 @@ function simplifiedSchema(fields: SchemaAnalysisFieldsMap): SimplifiedSchema {
324323
return finalizeDocumentFieldSchema(fields);
325324
}
326325

327-
function cropStringAt10kCharacters(value: string) {
328-
return value.charCodeAt(10000 - 1) === value.codePointAt(10000 - 1)
329-
? value.slice(0, 10000)
330-
: value.slice(0, 10000 - 1);
326+
function cropString(value: string, limit: number) {
327+
if (limit < 1) return '';
328+
return value.charCodeAt(limit - 1) === value.codePointAt(10000 - 1)
329+
? value.slice(0, limit)
330+
: value.slice(0, limit - 1);
331+
}
332+
333+
function getCappedValue(bsonType: SchemaBSONType, value: BSONValue) {
334+
if (bsonType === 'String') {
335+
return cropString(value as string, 10000);
336+
}
337+
if (bsonType === 'Binary') {
338+
value = value as Binary;
339+
return value.buffer.length > 10000
340+
? new Binary(value.buffer.slice(0, 10000), value.sub_type)
341+
: value;
342+
}
343+
if (bsonType === 'Code') {
344+
value = value as Code;
345+
return (value.code.length >= 10000)
346+
? new Code(cropString(value.code, 10000), value.scope)
347+
: value;
348+
}
349+
return value;
331350
}
332351

333352
function computeHasDuplicatesForType(type: SchemaAnalysisType, unique?: number) {
@@ -525,12 +544,12 @@ export class SchemaAnalyzer {
525544
} else if (this.options.storeValues && !isNullType(type)) {
526545
// When the `storeValues` option is enabled, store some example values.
527546
if (!type.values) {
528-
type.values = bsonType === 'String'
547+
type.values = ['String', 'Binary', 'Code'].includes(bsonType)
529548
? Reservoir(100) : Reservoir(10000);
530549
}
531550

532551
type.values.pushSome(
533-
type.name === 'String' ? cropStringAt10kCharacters(value as string) : value
552+
getCappedValue(type.bsonType, value)
534553
);
535554
}
536555
};

test/bloated.test.ts

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
import assert from 'assert';
2+
import { Binary, Code } from 'bson';
3+
4+
import type { PrimitiveSchemaType } from '../src/schema-analyzer';
5+
import getSchema from '../src';
6+
7+
function generateRandomString(length: number) {
8+
const chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789';
9+
let result = '';
10+
for (let i = 0; i < length; i++) {
11+
result += chars.charAt(Math.floor(Math.random() * chars.length));
12+
}
13+
return result;
14+
}
15+
16+
describe('bloated documents', function() {
17+
it('really long string is cropped', async function() {
18+
const documents = [{
19+
str: generateRandomString(20000)
20+
}];
21+
const schema = await getSchema(documents);
22+
const stringLength = ((schema.fields[0].types[0] as PrimitiveSchemaType).values[0] as string).length;
23+
assert.ok(stringLength <= 10000);
24+
});
25+
26+
it('really long code is cropped', async function() {
27+
const documents = [{
28+
code: new Code(generateRandomString(20000))
29+
}];
30+
const schema = await getSchema(documents);
31+
const codeLength = ((schema.fields[0].types[0] as PrimitiveSchemaType).values[0] as Code).code.length;
32+
assert.ok(codeLength <= 10000);
33+
});
34+
35+
it('really long binary is cropped', async function() {
36+
const documents = [{
37+
binData: new Binary(Buffer.from(generateRandomString(20000)), 2)
38+
}];
39+
const schema = await getSchema(documents);
40+
const binary = ((schema.fields[0].types[0] as PrimitiveSchemaType).values[0] as Binary);
41+
assert.ok(binary.length() <= 10000);
42+
assert.strictEqual(binary.sub_type, 2);
43+
});
44+
});

0 commit comments

Comments
 (0)