Skip to content

Commit c7b07ea

Browse files
authored
chore: Fix bin/mongodb-schema script (#206)
* fix bin/mongodb-schema * remove mongodb-collection-sample
1 parent c15e4b7 commit c7b07ea

File tree

4 files changed

+61
-565
lines changed

4 files changed

+61
-565
lines changed

bin/mongodb-schema

Lines changed: 32 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,10 @@
33
/* eslint-disable no-console */
44
/* eslint-disable @typescript-eslint/no-var-requires */
55

6-
const { stream: parseSchemaStream, schemaStats } = require('../');
6+
const { SchemaAnalyzer, schemaStats: _schemaStats } = require('../');
7+
const schemaStats = _schemaStats.default;
78

89
const { MongoClient } = require('mongodb');
9-
const sample = require('mongodb-collection-sample');
1010
const toNS = require('mongodb-ns');
1111
const yaml = require('js-yaml');
1212
const pkg = require('../package.json');
@@ -36,12 +36,6 @@ const argv = require('yargs')
3636
describe: 'Print the computed schema to stdout.',
3737
default: true
3838
})
39-
.option('r', {
40-
alias: 'repeat',
41-
type: 'number',
42-
describe: 'Repeat experiment n times.',
43-
default: 1
44-
})
4539
.option('s', {
4640
alias: 'stats',
4741
type: 'boolean',
@@ -75,9 +69,9 @@ const argv = require('yargs')
7569
.help('h')
7670
.wrap(100)
7771
.example(
78-
'$0 localhost:27017 mongodb.fanclub --number 1000 --repeat 5 --stats ' +
72+
'$0 localhost:27017 mongodb.fanclub --number 1000 --stats ' +
7973
'--no-output', 'analyze 1000 docs from the mongodb.fanclub ' +
80-
'collection, repeat 5 times and only show statistics.'
74+
'collection and only show statistics.'
8175
)
8276
.example(
8377
'$0 localhost:27017 test.foo --format table',
@@ -129,13 +123,23 @@ function getTable(schema) {
129123
}
130124

131125
const bar = new ProgressBar('analyzing [:bar] :percent :etas ', {
132-
total: argv.number * argv.repeat,
126+
total: argv.number,
133127
width: 60,
134128
complete: '=',
135129
incomplete: ' ',
136130
clear: true
137131
});
138132

133+
function sample(collection, size = 1000) {
134+
return collection.aggregate([{
135+
$sample: {
136+
size
137+
}
138+
}], {
139+
allowDiskUse: true
140+
});
141+
}
142+
139143
const client = new MongoClient(uri);
140144

141145
(async function main() {
@@ -158,46 +162,30 @@ const client = new MongoClient(uri);
158162
promoteValues: argv.promote
159163
};
160164

161-
let schema;
162165
const schemaOptions = {
163166
storeValues: argv.values,
164167
semanticTypes: argv.semanticTypes
165168
};
166169

170+
const analyzer = new SchemaAnalyzer(schemaOptions);
167171
try {
168-
for (let i = 0; i < argv.repeat; i++) {
169-
await new Promise((resolve, reject) => {
170-
const source = argv.sampling
171-
? sample(db, ns.collection, options)
172-
: db.collection(ns.collection).find(options.query, {
173-
promoteValues: options.promoteValues
174-
}).limit(options.size).stream();
175-
176-
source
177-
.once('data', function() {
178-
ts = new Date();
179-
})
180-
.pipe(parseSchemaStream(schemaOptions))
181-
.on('progress', function() {
182-
bar.tick();
183-
})
184-
.on('data', function(data) {
185-
schema = data;
186-
})
187-
.on('error', function(err) {
188-
reject(err);
189-
})
190-
.on('end', function() {
191-
const duration = new Date() - ts;
192-
resolve(duration);
193-
});
194-
});
172+
const input = argv.sampling
173+
? sample(db.collection(ns.collection), sampleSize)
174+
: db.collection(ns.collection).find(options.query, {
175+
promoteValues: options.promoteValues
176+
}).limit(options.size);
177+
178+
for await (const doc of input) {
179+
bar.tick();
180+
analyzer.analyzeDoc(doc);
195181
}
196182
} catch (err) {
197183
console.error('error:', err.message);
198184
process.exit(1);
199185
}
200186

187+
const schema = analyzer.getResult();
188+
201189
if (argv.output) {
202190
let output = '';
203191
if (argv.format === 'yaml') {
@@ -209,6 +197,7 @@ const client = new MongoClient(uri);
209197
}
210198
console.log(output);
211199
}
200+
212201
if (argv.stats) {
213202
let branchOutput = '[';
214203
const branchingFactors = schemaStats.branch(schema);
@@ -218,11 +207,14 @@ const client = new MongoClient(uri);
218207
branchOutput += branchingFactors.join(',') + ']';
219208
}
220209

221-
console.error('execution count: ' + argv.repeat);
222210
console.error('toplevel fields:', schema.fields.length);
223211
console.error('branching factors:', branchOutput);
224212
console.error('schema width: ' + schemaStats.width(schema));
225213
console.error('schema depth: ' + schemaStats.depth(schema));
226214
}
215+
216+
console.dir(analyzer.getSchemaPaths());
217+
console.dir(analyzer.getSimplifiedSchema(), { depth: null });
218+
227219
client.close();
228220
})();

0 commit comments

Comments
 (0)