3
3
/* eslint-disable no-console */
4
4
/* eslint-disable @typescript-eslint/no-var-requires */
5
5
6
- const { stream : parseSchemaStream , schemaStats } = require ( '../' ) ;
6
+ const { SchemaAnalyzer, schemaStats : _schemaStats } = require ( '../' ) ;
7
+ const schemaStats = _schemaStats . default ;
7
8
8
9
const { MongoClient } = require ( 'mongodb' ) ;
9
- const sample = require ( 'mongodb-collection-sample' ) ;
10
10
const toNS = require ( 'mongodb-ns' ) ;
11
11
const yaml = require ( 'js-yaml' ) ;
12
12
const pkg = require ( '../package.json' ) ;
@@ -36,12 +36,6 @@ const argv = require('yargs')
36
36
describe : 'Print the computed schema to stdout.' ,
37
37
default : true
38
38
} )
39
- . option ( 'r' , {
40
- alias : 'repeat' ,
41
- type : 'number' ,
42
- describe : 'Repeat experiment n times.' ,
43
- default : 1
44
- } )
45
39
. option ( 's' , {
46
40
alias : 'stats' ,
47
41
type : 'boolean' ,
@@ -75,9 +69,9 @@ const argv = require('yargs')
75
69
. help ( 'h' )
76
70
. wrap ( 100 )
77
71
. example (
78
- '$0 localhost:27017 mongodb.fanclub --number 1000 --repeat 5 -- stats ' +
72
+ '$0 localhost:27017 mongodb.fanclub --number 1000 --stats ' +
79
73
'--no-output' , 'analyze 1000 docs from the mongodb.fanclub ' +
80
- 'collection, repeat 5 times and only show statistics.'
74
+ 'collection and only show statistics.'
81
75
)
82
76
. example (
83
77
'$0 localhost:27017 test.foo --format table' ,
@@ -129,13 +123,23 @@ function getTable(schema) {
129
123
}
130
124
131
125
const bar = new ProgressBar ( 'analyzing [:bar] :percent :etas ' , {
132
- total : argv . number * argv . repeat ,
126
+ total : argv . number ,
133
127
width : 60 ,
134
128
complete : '=' ,
135
129
incomplete : ' ' ,
136
130
clear : true
137
131
} ) ;
138
132
133
+ function sample ( collection , size = 1000 ) {
134
+ return collection . aggregate ( [ {
135
+ $sample : {
136
+ size
137
+ }
138
+ } ] , {
139
+ allowDiskUse : true
140
+ } ) ;
141
+ }
142
+
139
143
const client = new MongoClient ( uri ) ;
140
144
141
145
( async function main ( ) {
@@ -158,46 +162,30 @@ const client = new MongoClient(uri);
158
162
promoteValues : argv . promote
159
163
} ;
160
164
161
- let schema ;
162
165
const schemaOptions = {
163
166
storeValues : argv . values ,
164
167
semanticTypes : argv . semanticTypes
165
168
} ;
166
169
170
+ const analyzer = new SchemaAnalyzer ( schemaOptions ) ;
167
171
try {
168
- for ( let i = 0 ; i < argv . repeat ; i ++ ) {
169
- await new Promise ( ( resolve , reject ) => {
170
- const source = argv . sampling
171
- ? sample ( db , ns . collection , options )
172
- : db . collection ( ns . collection ) . find ( options . query , {
173
- promoteValues : options . promoteValues
174
- } ) . limit ( options . size ) . stream ( ) ;
175
-
176
- source
177
- . once ( 'data' , function ( ) {
178
- ts = new Date ( ) ;
179
- } )
180
- . pipe ( parseSchemaStream ( schemaOptions ) )
181
- . on ( 'progress' , function ( ) {
182
- bar . tick ( ) ;
183
- } )
184
- . on ( 'data' , function ( data ) {
185
- schema = data ;
186
- } )
187
- . on ( 'error' , function ( err ) {
188
- reject ( err ) ;
189
- } )
190
- . on ( 'end' , function ( ) {
191
- const duration = new Date ( ) - ts ;
192
- resolve ( duration ) ;
193
- } ) ;
194
- } ) ;
172
+ const input = argv . sampling
173
+ ? sample ( db . collection ( ns . collection ) , sampleSize )
174
+ : db . collection ( ns . collection ) . find ( options . query , {
175
+ promoteValues : options . promoteValues
176
+ } ) . limit ( options . size ) ;
177
+
178
+ for await ( const doc of input ) {
179
+ bar . tick ( ) ;
180
+ analyzer . analyzeDoc ( doc ) ;
195
181
}
196
182
} catch ( err ) {
197
183
console . error ( 'error:' , err . message ) ;
198
184
process . exit ( 1 ) ;
199
185
}
200
186
187
+ const schema = analyzer . getResult ( ) ;
188
+
201
189
if ( argv . output ) {
202
190
let output = '' ;
203
191
if ( argv . format === 'yaml' ) {
@@ -209,6 +197,7 @@ const client = new MongoClient(uri);
209
197
}
210
198
console . log ( output ) ;
211
199
}
200
+
212
201
if ( argv . stats ) {
213
202
let branchOutput = '[' ;
214
203
const branchingFactors = schemaStats . branch ( schema ) ;
@@ -218,11 +207,14 @@ const client = new MongoClient(uri);
218
207
branchOutput += branchingFactors . join ( ',' ) + ']' ;
219
208
}
220
209
221
- console . error ( 'execution count: ' + argv . repeat ) ;
222
210
console . error ( 'toplevel fields:' , schema . fields . length ) ;
223
211
console . error ( 'branching factors:' , branchOutput ) ;
224
212
console . error ( 'schema width: ' + schemaStats . width ( schema ) ) ;
225
213
console . error ( 'schema depth: ' + schemaStats . depth ( schema ) ) ;
226
214
}
215
+
216
+ console . dir ( analyzer . getSchemaPaths ( ) ) ;
217
+ console . dir ( analyzer . getSimplifiedSchema ( ) , { depth : null } ) ;
218
+
227
219
client . close ( ) ;
228
220
} ) ( ) ;
0 commit comments