1- import JSON5 from 'json5' ;
21import { removeNewlines , removeTrailingComments } from '../utils' ;
32import { getSimplifiedSchema } from 'mongodb-schema' ;
43import type { SimplifiedSchema } from 'mongodb-schema' ;
54import { JSDOM , VirtualConsole } from 'jsdom' ;
6- import { createHash } from 'crypto' ;
7- import { BSON } from 'bson' ;
8-
9- abstract class CustomTypeProcessor {
10- public abstract process ( json : string ) : string ;
11-
12- public abstract canRevive ( value : unknown ) : boolean ;
13-
14- public abstract revive ( value : unknown ) : unknown ;
15- }
16-
17- abstract class RegexCustomTypeProcessor extends CustomTypeProcessor {
18- private regex : RegExp ;
19- private replacementPrefix : string ;
20- protected abstract reviveCore ( value : string ) : unknown ;
21-
22- protected constructor ( regex : string ) {
23- super ( ) ;
24- this . regex = new RegExp ( regex , 'g' ) ;
25- this . replacementPrefix =
26- '!rctp-' + createHash ( 'sha256' ) . update ( regex ) . digest ( 'base64' ) ;
27- }
28-
29- public process ( json : string ) : string {
30- return json . replace ( this . regex , `"${ this . replacementPrefix } $1"` ) ;
31- }
32-
33- public canRevive ( value : unknown ) : boolean {
34- return (
35- typeof value === 'string' && value . startsWith ( this . replacementPrefix )
36- ) ;
37- }
38-
39- public revive ( value : string ) : unknown {
40- return this . reviveCore ( value . replace ( this . replacementPrefix , '' ) ) ;
41- }
42- }
43-
44- class IsoDateProcessor extends RegexCustomTypeProcessor {
45- constructor ( ) {
46- super ( '\\bISODate\\(\\s*"([^"]*)"\\s*\\)' ) ;
47- }
48-
49- public reviveCore ( value : string ) : Date | undefined {
50- const isoDateRegex =
51- / ^ (?< Y > \d { 4 } ) - ? (?< M > \d { 2 } ) - ? (?< D > \d { 2 } ) ( [ T ] (?< h > \d { 2 } ) ( : ? (?< m > \d { 2 } ) ( : ? ( (?< s > \d { 2 } ) ( \. (?< ms > \d + ) ) ? ) ) ? ) ? (?< tz > Z | ( [ + - ] ) ( \d { 2 } ) : ? ( \d { 2 } ) ? ) ? ) ? $ / ;
52- const match = isoDateRegex . exec ( value ) ;
53- if ( match !== null && match . groups !== undefined ) {
54- // Normalize the representation because ISO-8601 accepts e.g.
55- // '20201002T102950Z' without : and -, but `new Date()` does not.
56- const { Y, M, D, h, m, s, ms, tz } = match . groups ;
57- const normalized = `${ Y } -${ M } -${ D } T${ h || '00' } :${ m || '00' } :${
58- s || '00'
59- } .${ ms || '000' } ${ tz || 'Z' } `;
60- const date = new Date ( normalized ) ;
61- // Make sure we're in the range 0000-01-01T00:00:00.000Z - 9999-12-31T23:59:59.999Z
62- if (
63- date . getTime ( ) >= - 62167219200000 &&
64- date . getTime ( ) <= 253402300799999
65- ) {
66- return date ;
67- }
68- }
69-
70- return new Date ( value ) ;
71- }
72- }
73-
74- class DateProcessor extends RegexCustomTypeProcessor {
75- constructor ( ) {
76- super ( '\\b(?:new )?Date\\(\\s*"([^"]*)"\\s*\\)' ) ;
77- }
78-
79- public reviveCore ( value : string ) : Date {
80- return new Date ( value ) ;
81- }
82- }
83-
84- class ObjectIdProcessor extends RegexCustomTypeProcessor {
85- constructor ( ) {
86- super ( '\\bObjectId\\(\\s*"([^"]*)"\\s*\\)' ) ;
87- }
88-
89- public reviveCore ( value : string ) : unknown {
90- return new BSON . ObjectId ( value ) ;
91- }
92- }
93-
94- class UUIDProcessor extends RegexCustomTypeProcessor {
95- constructor ( ) {
96- super ( '\\bUUID\\(\\s*"([^"]*)"\\s*\\)' ) ;
97- }
98-
99- public reviveCore ( value : string ) : unknown {
100- return new BSON . UUID ( value ) ;
101- }
102- }
103-
104- class NumberDecimalProcessor extends RegexCustomTypeProcessor {
105- constructor ( ) {
106- super ( '\\b(?:NumberDecimal|Decimal128)\\(\\s*"?([^"]*)"?\\s*\\)' ) ;
107- }
108-
109- public reviveCore ( value : string ) : unknown {
110- return new BSON . Decimal128 ( value ) ;
111- }
112- }
113-
114- class UndefinedProcessor extends RegexCustomTypeProcessor {
115- constructor ( ) {
116- super ( '\\bundefined\\b' ) ;
117- }
118-
119- public reviveCore ( ) : unknown {
120- return undefined ;
121- }
122- }
123-
124- class NullProcessor extends RegexCustomTypeProcessor {
125- constructor ( ) {
126- super ( '\\bnull\\b' ) ;
127- }
128-
129- public reviveCore ( ) : unknown {
130- return null ;
131- }
132- }
133-
134- class BinDataProcessor extends CustomTypeProcessor {
135- private regex = / \b (?: n e w ) ? B i n D a t a \( \s * ( \d * ) , " ( [ ^ " ) ] * ) " \s * \) / g;
136- private replacementPrefix =
137- '!bdp-' + createHash ( 'sha256' ) . update ( this . regex . source ) . digest ( 'base64' ) ;
138-
139- public process ( json : string ) : string {
140- return json . replace ( this . regex , `"${ this . replacementPrefix } $1-$2"` ) ;
141- }
142-
143- public canRevive ( value : unknown ) : boolean {
144- return (
145- typeof value === 'string' && value . startsWith ( this . replacementPrefix )
146- ) ;
147- }
148-
149- public revive ( value : string ) : unknown {
150- const match = / (?< subType > \d ) - (?< base64 > .* ) / . exec (
151- value . replace ( this . replacementPrefix , '' ) ,
152- ) ;
153-
154- if ( match && match . groups ) {
155- const { subType, base64 } = match . groups ;
156- return BSON . Binary . createFromBase64 ( base64 , parseInt ( subType , 10 ) ) ;
157- }
158-
159- throw new Error ( `Invalid BinData format: ${ value } ` ) ;
160- }
161- }
162-
163- class NumberIntProcessor extends RegexCustomTypeProcessor {
164- constructor ( ) {
165- super ( '\\b(?:NumberInt|Int32)\\(\\s*"?(\\d*)\\"?\\s*\\)' ) ;
166- }
167-
168- public reviveCore ( value : string ) : unknown {
169- return new BSON . Int32 ( value ) ;
170- }
171- }
172-
173- class NumberLongProcessor extends RegexCustomTypeProcessor {
174- constructor ( ) {
175- super ( '\\bNumberLong\\(\\s*"?([\\d.]*)"?\\s*\\)' ) ;
176- }
177-
178- public reviveCore ( value : string ) : unknown {
179- return new BSON . Long ( value ) ;
180- }
181- }
182-
183- class TimestampProcessor extends RegexCustomTypeProcessor {
184- constructor ( ) {
185- super ( '\\bTimestamp\\(\\s*(\\d*,\\s*\\d*)\\s*\\)' ) ;
186- }
187-
188- public reviveCore ( value : string ) : unknown {
189- const match = / (?< t > \d * ) , \s (?< i > \d * ) / . exec ( value ) ;
190-
191- if ( match && match . groups ) {
192- const { t, i } = match . groups ;
193- return new BSON . Timestamp ( { t : parseInt ( t , 10 ) , i : parseInt ( i , 10 ) } ) ;
194- }
195-
196- throw new Error ( `Invalid Timestamp format: ${ value } ` ) ;
197- }
198- }
5+ import parse , { ParseMode } from '@mongodb-js/shell-bson-parser' ;
1996
2007export class DocsCrawler {
2018 constructor ( private readonly url : string ) {
@@ -208,19 +15,6 @@ export class DocsCrawler {
20815 private virtualConsole : VirtualConsole ;
20916
21017 private fuzzyParse ( json : string ) : unknown [ ] | undefined {
211- try {
212- const result = JSON5 . parse ( json ) ;
213- if ( Array . isArray ( result ) ) {
214- return result ;
215- }
216-
217- if ( typeof result === 'object' ) {
218- return [ result ] ;
219- }
220- } catch {
221- // Ignore parse errors
222- }
223-
22418 // Sometimes the snippet will end with ellipsis instead of json
22519 json = json . replace ( / \. \. \. $ / g, '' ) ;
22620
@@ -235,45 +29,30 @@ export class DocsCrawler {
23529 // Insert commas between array elements
23630 json = json . replace ( / \} \s * \{ / g, '},{' ) ;
23731
238- const processors : CustomTypeProcessor [ ] = [
239- new IsoDateProcessor ( ) ,
240- new DateProcessor ( ) ,
241- new ObjectIdProcessor ( ) ,
242- new UUIDProcessor ( ) ,
243- new NumberDecimalProcessor ( ) ,
244- new UndefinedProcessor ( ) ,
245- new NullProcessor ( ) ,
246- new BinDataProcessor ( ) ,
247- new NumberIntProcessor ( ) ,
248- new NumberLongProcessor ( ) ,
249- new TimestampProcessor ( ) ,
250- ] ;
32+ try {
33+ let result = parse ( json , { mode : ParseMode . Loose } ) ;
25134
252- for ( const processor of processors ) {
253- json = processor . process ( json ) ;
254- }
35+ if ( ! Array . isArray ( result ) ) {
36+ result = [ result ] ;
37+ }
25538
256- try {
257- // The docs use quoted/unquoted shell syntax inconsistently, so use JSON5 instead of regular JSON
258- // to parse the documents.
259- const result = JSON5 . parse ( json , ( key , value ) => {
260- for ( const processor of processors ) {
261- if ( processor . canRevive ( value ) ) {
262- return processor . revive ( value ) ;
263- }
39+ if ( result . length > 0 ) {
40+ const firstDoc = result [ 0 ] ;
41+ if (
42+ typeof firstDoc !== 'object' ||
43+ Object . keys ( firstDoc as object ) . find ( ( k ) => k . startsWith ( '$' ) )
44+ ) {
45+ // If the array doesn't contain objects or the object keys start with $,
46+ // we're likely dealing with an aggregation pipeline rather than an insertion code.
47+ // return undefined and let the caller move on to the next section.
48+ throw new Error (
49+ 'Result is not an array of documents or contains aggregation pipeline stages' ,
50+ ) ;
26451 }
26552
266- return value ;
267- } ) ;
268-
269- if ( Array . isArray ( result ) ) {
27053 return result ;
27154 }
27255
273- if ( typeof result === 'object' ) {
274- return [ result ] ;
275- }
276-
27756 // eslint-disable-next-line @typescript-eslint/restrict-template-expressions
27857 throw new Error ( `Unexpected json output: ${ result } ` ) ;
27958 } catch ( e ) {
0 commit comments