@@ -209,7 +209,6 @@ function getBSONType(value: any): SchemaBSONType {
209209 const bsonType = value ?. _bsontype
210210 ? value . _bsontype
211211 : Object . prototype . toString . call ( value ) . replace ( / ^ \[ o b j e c t ( \w + ) \] $ / , '$1' ) ;
212-
213212 if ( bsonType === 'Object' ) {
214213 // In the resulting schema we rename `Object` to `Document`.
215214 return 'Document' ;
@@ -324,10 +323,29 @@ function simplifiedSchema(fields: SchemaAnalysisFieldsMap): SimplifiedSchema {
324323 return finalizeDocumentFieldSchema ( fields ) ;
325324}
326325
327- function cropStringAt10kCharacters ( value : string ) {
328- return value . charCodeAt ( 10000 - 1 ) === value . codePointAt ( 10000 - 1 )
329- ? value . slice ( 0 , 10000 )
330- : value . slice ( 0 , 10000 - 1 ) ;
326+ function cropString ( value : string , limit : number ) {
327+ if ( limit < 1 ) return '' ;
328+ return value . charCodeAt ( limit - 1 ) === value . codePointAt ( 10000 - 1 )
329+ ? value . slice ( 0 , limit )
330+ : value . slice ( 0 , limit - 1 ) ;
331+ }
332+
333+ function getCappedValue ( bsonType : SchemaBSONType , value : BSONValue ) {
334+ if ( bsonType === 'String' ) {
335+ return cropString ( value as string , 10000 ) ;
336+ }
337+ if ( bsonType === 'Binary' ) {
338+ value = value as Binary ;
339+ return value . buffer . length > 10000
340+ ? new Binary ( value . buffer . slice ( 0 , 10000 ) , value . sub_type )
341+ : value ;
342+ }
343+ if ( bsonType === 'Code' ) {
344+ value = value as Code ;
345+ return ( value . code . length >= 10000 )
346+ ? new Code ( cropString ( value . code , 10000 ) , value . scope )
347+ : value ;
348+ }
331349}
332350
333351function computeHasDuplicatesForType ( type : SchemaAnalysisType , unique ?: number ) {
@@ -530,7 +548,7 @@ export class SchemaAnalyzer {
530548 }
531549
532550 type . values . pushSome (
533- type . name === 'String' ? cropStringAt10kCharacters ( value as string ) : value
551+ getCappedValue ( type . bsonType , value )
534552 ) ;
535553 }
536554 } ;
0 commit comments