diff --git a/src/index.ts b/src/index.ts index 1611280..c9301a1 100644 --- a/src/index.ts +++ b/src/index.ts @@ -37,7 +37,7 @@ async function analyzeDocuments( */ async function parseSchema( source: AnyIterable, - options?: SchemaParseOptions + options?: Partial ): Promise { return (await getCompletedSchemaAnalyzer(source, options)).getResult(); } diff --git a/src/schema-analyzer.ts b/src/schema-analyzer.ts index 50e5370..d06a1fc 100644 --- a/src/schema-analyzer.ts +++ b/src/schema-analyzer.ts @@ -162,10 +162,18 @@ type SemanticTypeMap = { [typeName: string]: SemanticTypeFunction | boolean; }; -export type SchemaParseOptions = { - semanticTypes?: boolean | SemanticTypeMap; - storeValues?: boolean; +type AllSchemaParseOptions = { + semanticTypes: boolean | SemanticTypeMap; + storeValues: boolean; signal?: AbortSignal; + storedValuesLengthLimit: number; +}; +export type SchemaParseOptions = Partial; + +const defaultSchemaParseOptions: AllSchemaParseOptions = { + semanticTypes: false, + storeValues: true, + storedValuesLengthLimit: 10000 }; /** @@ -331,25 +339,25 @@ function simplifiedSchema(fields: SchemaAnalysisFieldsMap): SimplifiedSchema { function cropString(value: string, limit: number) { if (limit < 1) return ''; - return value.charCodeAt(limit - 1) === value.codePointAt(10000 - 1) + return value.charCodeAt(limit - 1) === value.codePointAt(limit - 1) ? value.slice(0, limit) : value.slice(0, limit - 1); } -function getCappedValue(bsonType: SchemaBSONType, value: BSONValue) { +function getCappedValue(bsonType: SchemaBSONType, value: BSONValue, limit: number) { if (bsonType === 'String') { - return cropString(value as string, 10000); + return cropString(value as string, limit); } if (bsonType === 'Binary') { value = value as Binary; - return value.buffer.length > 10000 - ? new Binary(value.buffer.slice(0, 10000), value.sub_type) + return value.buffer.length > limit + ? new Binary(value.buffer.slice(0, limit), value.sub_type) : value; } if (bsonType === 'Code') { value = value as Code; - return (value.code.length >= 10000) - ? new Code(cropString(value.code, 10000), value.scope) + return (value.code.length >= limit) + ? new Code(cropString(value.code, limit), value.scope) : value; } return value; @@ -459,7 +467,7 @@ function finalizeSchema(schemaAnalysis: SchemaAnalysisRoot): SchemaField[] { export class SchemaAnalyzer { semanticTypes: SemanticTypeMap; - options: SchemaParseOptions; + options: AllSchemaParseOptions; documentsAnalyzed = 0; schemaAnalysisRoot: SchemaAnalysisRoot = { fields: Object.create(null), @@ -474,7 +482,7 @@ export class SchemaAnalyzer { constructor(options?: SchemaParseOptions) { // Set default options. - this.options = { semanticTypes: false, storeValues: true, ...options }; + this.options = { ...defaultSchemaParseOptions, ...options }; this.semanticTypes = { ...semanticTypes @@ -555,7 +563,7 @@ export class SchemaAnalyzer { } type.values.pushSome( - getCappedValue(type.bsonType, value) + getCappedValue(type.bsonType, value, this.options.storedValuesLengthLimit) ); } }; diff --git a/test/bloated.test.ts b/test/bloated.test.ts index 17adfeb..0793ba4 100644 --- a/test/bloated.test.ts +++ b/test/bloated.test.ts @@ -41,4 +41,13 @@ describe('bloated documents', function() { assert.ok(binary.length() <= 10000); assert.strictEqual(binary.sub_type, 2); }); + + it('the limit is configurable', async function() { + const documents = [{ + str: generateRandomString(20000) + }]; + const schema = await getSchema(documents, { storedValuesLengthLimit: 5 }); + const stringLength = ((schema.fields[0].types[0] as PrimitiveSchemaType).values[0] as string).length; + assert.ok(stringLength === 5); + }); });