-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathingestor.js
More file actions
executable file
·350 lines (336 loc) · 16.5 KB
/
ingestor.js
File metadata and controls
executable file
·350 lines (336 loc) · 16.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
const { program: ingestor } = require('commander');
const ingest = require('./ingest');
const ingest_mpu = require('./ingest_mpu');
const ingest_buckets = require('./ingest_buckets');
const ingest_bucketd = require('./ingest_bucketd');
const readall = require('./readall');
const deleteall = require('./deleteall');
const deleteversions = require('./deleteversions');
function parseIntOpt(value, dummyPrevious) {
return parseInt(value, 10);
}
function parseFloatOpt(value, dummyPrevious) {
return parseFloat(value, 10);
}
ingestor.version('0.1');
ingestor.command('ingest')
.option('--endpoint <endpoint...>', 'endpoint URL(s)')
.option('--bucket <bucket>', 'bucket name')
.option('--profile [profile]', 'aws/credentials profile', 'default')
.option('--workers [n]', 'how many parallel workers', parseIntOpt, 10)
.option('--count [n]', 'how many objects total', parseIntOpt, 100)
.option('--size [n]', 'size of individual objects in bytes', parseIntOpt, 1000)
.option('--prefix [prefix]', 'key prefix', '')
.option('--prefix-exists', 'read/rewrite/delete existing keys from the specified prefix, created from a previous ingestor invocation with the same parameters', false)
.option('--limit-per-delimiter [limit]',
'max number of object to group in a single delimiter range',
parseIntOpt, 0)
.option('--rate-limit [n]',
'limit rate of operations (in op/s)', parseIntOpt, 0)
.option('--csv-stats [filename]', 'output file for stats in CSV format')
.option('--csv-stats-interval [n]',
'interval in seconds between each CSV stats output line',
parseIntOpt, 10)
.option('--clickhouse-endpoint [endpoint]', 'ClickHouse endpoint URL such as "http://localhost:8123"')
.option('--one-object', 'hammer on a single object', false)
.option('--delete-after-put', 'send deletes after objects are put', false)
.option('--add-tags', 'add a random number of tags', false)
.option('--hash-keys', 'hash keys after the prefix with a MD5 sum to make them unordered', false)
.option('--append-key-hash', 'append a key MD5 hash after each key component, to lengthen the keys without changing their relative order', false)
.option('--keys-from-file [path]', 'read keys from file')
.option('--mpu-parts [nbparts]', 'create MPU objects with this many parts',
parseIntOpt, 0)
.option('--mpu-fuzz-repeat-complete-prob [probability]',
'repeat an extra time the complete-mpu requests with this probability ' +
'(it can lead to more than one extra complete-mpu for the same request)',
parseFloatOpt, 0)
.option('--random', 'randomize keys when reading from a file', false)
.option('--verbose', 'increase verbosity', false)
.option('--object-lock', 'lock ingested objects for one year in GOVERNANCE mode (the bucket must have object-lock enabled)', false)
.option('--rewrite-percent', 'probability percentage of rewrites over existing objects', 0)
.option('--median-sequence-length <length>', 'with --random: introduce probabilistic sequentiality in accesses (read/write) where consecutive keys are accessed with the given median sequence length (in number of keys)',
parseFloatOpt, 0)
.action(options => {
if (!options.endpoint ||
!options.bucket ||
isNaN(options.workers) ||
isNaN(options.count) || options.count <= 0 ||
isNaN(options.size)) {
if (!options.endpoint) {
console.error('option --endpoint is missing');
}
if (!options.bucket) {
console.error('option --bucket is missing');
}
if (isNaN(options.workers)) {
console.error('value of option --workers must be an integer');
}
if (isNaN(options.count) || options.count <= 0) {
console.error('value of option --count must be a strictly positive integer');
}
if (isNaN(options.size)) {
console.error('value of option --size must be an integer');
}
ingestor.outputHelp();
process.exit(1);
}
options.readPercent = 0;
options.deletePercent = 0;
ingest(options, code => process.exit(code));
});
ingestor.command('ingest_mpu')
.option('--endpoint <endpoint...>', 'endpoint URL(s)')
.option('--bucket <bucket>', 'bucket name')
.option('--profile [profile]', 'aws/credentials profile', 'default')
.option('--workers [n]', 'how many parallel workers', parseIntOpt, 10)
.option('--parts [nparts]', 'number of parts', parseIntOpt, 10)
.option('--size [n]', 'size of individual parts in bytes', parseIntOpt, 1000)
.option('--prefix [prefix]', 'key prefix', '')
.option('--no-complete', 'do not complete the MPU', true)
.option('--abort', 'abort the MPU instead of completing it', false)
.action(options => {
if (!options.endpoint ||
!options.bucket ||
isNaN(options.workers) ||
isNaN(options.parts) ||
isNaN(options.size)) {
if (!options.endpoint) {
console.error('option --endpoint is missing');
}
if (!options.bucket) {
console.error('option --bucket is missing');
}
if (isNaN(options.workers)) {
console.error('value of option --workers must be an integer');
}
if (isNaN(options.parts)) {
console.error('value of option --parts must be an integer');
}
if (isNaN(options.size)) {
console.error('value of option --size must be an integer');
}
ingestor.outputHelp();
process.exit(1);
}
ingest_mpu(options, code => process.exit(code));
});
ingestor.command('ingest_buckets')
.option('--endpoint <endpoint...>', 'endpoint URL(s)')
.option('--profile [profile]', 'aws/credentials profile', 'default')
.option('--workers [n]', 'how many parallel workers', parseIntOpt, 10)
.option('--count [n]', 'how many objects total', parseIntOpt, 100)
.option('--prefix [prefix]', 'bucket prefix', '')
.option('--rate-limit [n]',
'limit rate of operations (in op/s)', parseIntOpt, 0)
.option('--csv-stats [filename]', 'output file for stats in CSV format')
.option('--csv-stats-interval [n]',
'interval in seconds between each CSV stats output line',
parseIntOpt, 10)
.action(options => {
if (!options.endpoint ||
isNaN(options.workers) ||
isNaN(options.count) || options.count <= 0) {
if (!options.endpoint) {
console.error('option --endpoint is missing');
}
if (isNaN(options.workers)) {
console.error('value of option --workers must be an integer');
}
if (isNaN(options.count) || options.count <= 0) {
console.error('value of option --count must be a strictly positive integer');
}
ingestor.outputHelp();
process.exit(1);
}
ingest_buckets(options, code => process.exit(code));
});
ingestor.command('ingest_bucketd')
.option('--endpoint <endpoint...>', 'bucketd endpoint URL(s)')
.option('--bucket <bucket>', 'bucket name')
.option('--workers [n]', 'how many parallel workers', parseIntOpt, 10)
.option('--count [n]', 'how many objects total', parseIntOpt, 100)
.option('--prefix [prefix]', 'key prefix', '')
.option('--prefix-exists', 'read/rewrite/delete existing keys from the specified prefix, created from a previous ingestor invocation with the same parameters', false)
.option('--limit-per-delimiter [limit]',
'max number of object to group in a single delimiter range',
parseIntOpt, 0)
.option('--rate-limit [n]',
'limit rate of operations (in op/s)', parseIntOpt, 0)
.option('--csv-stats [filename]', 'output file for stats in CSV format')
.option('--csv-stats-interval [n]',
'interval in seconds between each CSV stats output line',
parseIntOpt, 10)
.option('--clickhouse-endpoint [endpoint]', 'ClickHouse endpoint URL such as "http://localhost:8123"')
.option('--one-object', 'hammer on a single object', false)
.option('--hash-keys', 'hash keys after the prefix with a MD5 sum to make them unordered', false)
.option('--append-key-hash', 'append a key MD5 hash after each key component, to lengthen the keys without changing their relative order', false)
.option('--keys-from-file [path]', 'read keys from file')
.option('--random', 'randomize keys when reading from a file', false)
.option('--verbose', 'increase verbosity', false)
.option('--versioned', 'use versioned PUT', false)
.option('--read-percent <rp>', 'probability percentage of reads over existing objects',
parseIntOpt, 0)
.option('--rewrite-percent <rwp>', 'probability percentage of rewrites over existing objects',
parseIntOpt, 0)
.option('--delete-percent <dp>', 'probability percentage of deletes over existing objects',
parseIntOpt, 0)
.option('--median-sequence-length <length>', 'with --random: introduce probabilistic sequentiality in accesses (read/write) where consecutive keys are accessed with the given median sequence length (in number of keys)',
parseFloatOpt, 0)
.action(options => {
if (!options.endpoint ||
!options.bucket ||
isNaN(options.workers) ||
isNaN(options.count) || options.count <= 0 ||
isNaN(options.count)) {
if (!options.endpoint) {
console.error('option --endpoint is missing');
}
if (!options.bucket) {
console.error('option --bucket is missing');
}
if (isNaN(options.workers)) {
console.error('value of option --workers must be an integer');
}
if (isNaN(options.count) || options.count <= 0) {
console.error('value of option --count must be a strictly positive integer');
}
ingestor.outputHelp();
process.exit(1);
}
const sumPercent = options.readPercent + options.rewritePercent + options.deletePercent;
if (sumPercent > 100) {
console.error(`sum of --read-percent, --rewrite-percent and --delete-percent exceed 100 (${sumPercent})`);
process.exit(1);
}
ingest_bucketd(options, code => process.exit(code));
});
ingestor.command('readall')
.option('--endpoint <endpoint...>', 'endpoint URL(s)')
.option('--bucket <bucket>', 'bucket name')
.option('--prefix [prefix]', 'key prefix')
.option('--limit-per-delimiter [limit]',
'max number of object to group in a single delimiter range',
parseIntOpt, 0)
.option('--profile [profile]', 'aws/credentials profile', 'default')
.option('--workers [n]', 'how many parallel workers', parseIntOpt, 10)
.option('--count [n]', 'how many objects total', parseIntOpt, 100)
.option('--rate-limit [n]',
'limit rate of operations (in op/s)', parseIntOpt, 0)
.option('--csv-stats [filename]', 'output file for stats in CSV format')
.option('--csv-stats-interval [n]',
'interval in seconds between each CSV stats output line',
parseIntOpt, 10)
.option('--random',
'randomize reads, while still reading all keys exactly once',
false)
.option('--keys-from-file [path]', 'read keys from file')
.option('--median-sequence-length <length>', 'with --random: introduce probabilistic sequentiality in accesses (read/write) where consecutive keys are accessed with the given median sequence length (in number of keys)',
parseFloatOpt, 0)
.action(options => {
if (!options.endpoint ||
!options.bucket ||
isNaN(options.workers) ||
isNaN(options.count) || options.count <= 0) {
if (!options.endpoint) {
console.error('option --endpoint is missing');
}
if (!options.bucket) {
console.error('option --bucket is missing');
}
if (isNaN(options.workers)) {
console.error('value of option --workers must be an integer');
}
if (isNaN(options.count) || options.count <= 0) {
console.error('value of option --count must be a strictly positive integer');
}
ingestor.outputHelp();
process.exit(1);
}
readall(options, code => process.exit(code));
});
ingestor.command('deleteall')
.option('--endpoint <endpoint...>', 'endpoint URL(s)')
.option('--bucket <bucket>', 'bucket name')
.option('--prefix [prefix]', 'key prefix')
.option('--limit-per-delimiter [limit]',
'max number of object to group in a single delimiter range',
parseIntOpt, 0)
.option('--profile [profile]', 'aws/credentials profile', 'default')
.option('--workers [n]', 'how many parallel workers', parseIntOpt, 10)
.option('--count [n]', 'how many objects total', parseIntOpt, 100)
.option('--rate-limit [n]',
'limit rate of operations (in op/s)', parseIntOpt, 0)
.option('--csv-stats [filename]', 'output file for stats in CSV format')
.option('--csv-stats-interval [n]',
'interval in seconds between each CSV stats output line',
parseIntOpt, 10)
.option('--random',
'randomize deletes, while still deleting all keys exactly once',
false)
.option('--keys-from-file [path]', 'read keys from file')
.option('--median-sequence-length <length>', 'with --random: introduce probabilistic sequentiality in accesses (read/write) where consecutive keys are accessed with the given median sequence length (in number of keys)',
parseFloatOpt, 0)
.action(options => {
if (!options.endpoint ||
!options.bucket ||
isNaN(options.workers) ||
isNaN(options.count) || options.count <= 0) {
if (!options.endpoint) {
console.error('option --endpoint is missing');
}
if (!options.bucket) {
console.error('option --bucket is missing');
}
if (isNaN(options.workers)) {
console.error('value of option --workers must be an integer');
}
if (isNaN(options.count) || options.count <= 0) {
console.error('value of option --count must be a strictly positive integer');
}
ingestor.outputHelp();
process.exit(1);
}
deleteall(options, code => process.exit(code));
});
ingestor.command('deleteversions')
.option('--endpoint <endpoint...>', 'endpoint URL(s)')
.option('--bucket <bucket>', 'bucket name')
.option('--prefix [prefix]', 'key prefix')
.option('--profile [profile]', 'aws/credentials profile', 'default')
.option('--workers [n]', 'how many parallel workers', parseIntOpt, 10)
.option('--rate-limit [n]',
'limit rate of operations (in op/s)', parseIntOpt, 0)
.option('--csv-stats [filename]', 'output file for stats in CSV format')
.option('--csv-stats-interval [n]',
'interval in seconds between each CSV stats output line',
parseIntOpt, 10)
.option('--random',
'randomize deletes, while still deleting all keys exactly once',
false)
.option('--batch-size [count]',
'size of individual batches in number of objects (default is no batching)')
.option('--bypass-governance-retention', false)
.action(options => {
if (!options.endpoint ||
!options.bucket ||
isNaN(options.workers)) {
if (!options.endpoint) {
console.error('option --endpoint is missing');
}
if (!options.bucket) {
console.error('option --bucket is missing');
}
if (isNaN(options.workers)) {
console.error('value of option --workers must be an integer');
}
ingestor.outputHelp();
process.exit(1);
}
deleteversions(options, code => process.exit(code));
});
const commandName = process.argv[2];
if (!ingestor.commands.find(cmd => cmd._name === commandName)) {
ingestor.outputHelp();
process.exit(1);
}
ingestor.parse(process.argv);