Skip to content

Commit bd6210b

Browse files
author
moxious
authored
Merge pull request #8 from moxious/record-consumer
Batch data faking utilities for load testing
2 parents 3ce149f + 2bbd7a2 commit bd6210b

File tree

10 files changed

+208
-31
lines changed

10 files changed

+208
-31
lines changed

README.md

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@ Usage: run-workload.js -p password
2020
[--ms milliseconds] how many milliseconds to test for
2121
[--workload /path/to/workload.json] probability table spec
2222
[--query CYPHER_QUERY] single cypher query to run
23+
[--schema /path/to/schema.json] schema for generated records (only used with
24+
--query)
25+
[--batchsize [1000]] number of records from schema to generate per batch
2326
[--concurrency c] how many concurrent queries to run (default: 10)
2427
[--checkpoint cn] how often to print results in milliseconds (default: 5000)
2528
[--fail-fast] if specified, the work will stop after encountering one
@@ -35,6 +38,9 @@ Options:
3538
-a address to connect to [default: "localhost"]
3639
-u username [default: "neo4j"]
3740
-p password [required]
41+
-d database
42+
--schema batch schema file
43+
--batchsize number of records per batch, usable only with schema
3844
-n number of hits on the database
3945
--ms number of milliseconds to execute
4046
--workload absolute path to JSON probability table/workload
@@ -69,12 +75,28 @@ program arguments, as in, `npm run graph-workload -- --n 20`
6975

7076
# Examples
7177

72-
Create a lot of nodes as fast as possible:
78+
## Create a lot of nodes as fast as possible:
7379

7480
```
7581
npm run graph-workload -- -a localhost -u neo4j -p admin --query 'Unwind range(1,1000000) as id create (n);' -n 50 --concurrency 4
7682
```
7783

84+
## Write custom data generated in batches
85+
86+
Fake/mock data can be generated with functions from [fakerjs](https://www.npmjs.com/package/faker).
87+
88+
Using this technique you can generate your own data and create
89+
custom load patterns. Similar to other Neo4j utilities, the batch will be present in the query form: "UNWIND batch AS event".
90+
91+
```
92+
npm run graph-workload -- -a localhost -u neo4j -p admin \
93+
--query 'CREATE (t:Test) SET t += event' \
94+
--batchsize 1000 \
95+
--schema /absolute/path/to/schemas/myschema.json
96+
```
97+
98+
See `src/schemas/user.json` as an example of a schema you can use in this way. Keys are field names to generate, values are the faker functions used to populate that field.
99+
78100
# Neo4j 4.0 / Multidatabase
79101

80102
As of Neo4j 4.0, sessions support multi-database. Use the `-d` or `--database` argument to direct

package.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "graph-workload",
3-
"version": "0.4.1",
3+
"version": "0.5.0",
44
"description": "Tool for generating workloads running on Neo4j",
55
"main": "src/run-workload.js",
66
"scripts": {
@@ -18,14 +18,14 @@
1818
},
1919
"homepage": "https://github.com/moxious/graph-workload",
2020
"dependencies": {
21-
"bluebird": "^3.5.3",
21+
"bluebird": "^3.7.2",
2222
"dimsum": "^0.2.2",
2323
"es6-promise-pool": "^2.5.0",
2424
"faker": "^4.1.0",
2525
"generic-pool": "^3.6.1",
26-
"lodash": "^4.17.15",
26+
"lodash": "^4.17.19",
2727
"moment": "^2.24.0",
28-
"neo4j-driver": "^4.0.2",
28+
"neo4j-driver": "^4.1.1",
2929
"randomstring": "^1.1.5",
3030
"uuid": "^3.3.2",
3131
"yargs": "^12.0.5"

src/SimpleQueryStrategy.js

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
const Strategy = require('./Strategy');
2+
const _ = require('lodash');
23

34
/**
45
* Represents a container class for a strategy that is just running some
@@ -10,6 +11,8 @@ module.exports = class SimpleQueryStrategy extends Strategy {
1011
this.name = props.name || 'SimpleQuery';
1112
this.query = props.query;
1213
this.params = props.params || {};
14+
this.generator = props.generator;
15+
this.batchSize = props.batchSize;
1316

1417
if (!(props.mode === 'READ') && !(props.mode === 'WRITE')) {
1518
throw new Error('Mode must be READ or WRITE');
@@ -20,7 +23,17 @@ module.exports = class SimpleQueryStrategy extends Strategy {
2023

2124
run() {
2225
const f = (s) => {
23-
const txRunner = tx => tx.run(this.query, this.params);
26+
const txRunner = tx => {
27+
if (this.generator) {
28+
const batch = this.generator.generate(this.batchSize);
29+
console.log('batchrun with ', batch.length, 'elements');
30+
return tx.run(
31+
`UNWIND $batch AS event ${this.query}`,
32+
_.merge({ batch }, this.params)
33+
);
34+
}
35+
return tx.run(this.query, this.params);
36+
};
2437

2538
if (this.props.mode === 'READ') {
2639
return s.readTransaction(txRunner);

src/datasource/MockData.js

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
const _ = require('lodash');
2+
const faker = require('faker');
3+
4+
class MockData {
5+
/**
6+
* Create a mock data source.
7+
* @param {Object} schema a map of field names to data types
8+
*/
9+
constructor(schema) {
10+
this.schema = _.cloneDeep(schema);
11+
this.validateSchema();
12+
this.columns = Object.keys(schema);
13+
}
14+
15+
generate(batchSize=1) {
16+
const r = [];
17+
for (let i=0; i<batchSize; i++) {
18+
const record = _.zipObject(this.columns, _.map(this.columns, col => this.schema[col]()));
19+
r.push(record);
20+
}
21+
return r;
22+
}
23+
24+
/**
25+
* Validate that for each schema field, there is a corresponding faker function.
26+
* For example, there is a faker.address.city() function. So if the user specifies
27+
* 'address.city' we will find that function.
28+
* @throws Error when an unidentified function is specified.
29+
*/
30+
validateSchema() {
31+
if (_.isNil(this.schema) || _.isEmpty(this.schema)) {
32+
throw new Error('Empty or invalid schema specified');
33+
}
34+
35+
Object.keys(this.schema).forEach(fieldName => {
36+
const val = this.schema[fieldName];
37+
if (!val) { throw new Error(`Field name ${fieldName} in schema has no specification`); }
38+
const parts = val.split('.');
39+
40+
let f = faker;
41+
parts.forEach(part => {
42+
f = f[part];
43+
if (!f) {
44+
throw new Error(`Invalid schema: Unknown function part '${part}' in field type ${fieldName}`);
45+
}
46+
});
47+
48+
// If we've worked through all parts we have our terminal function.
49+
this.schema[fieldName] = f;
50+
});
51+
}
52+
}
53+
54+
module.exports = MockData;

src/run-configuration.js

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
const terminateAfter = require('./termination-condition');
22
const ProbabilityTable = require('./stats/ProbabilityTable');
33
const _ = require('lodash');
4+
const MockData = require('./datasource/MockData');
45

56
const usageStr = `
67
Usage: run-workload.js -p password
@@ -10,6 +11,8 @@ Usage: run-workload.js -p password
1011
[--ms milliseconds] how many milliseconds to test for
1112
[--workload /path/to/workload.json] probability table spec
1213
[--query CYPHER_QUERY] single cypher query to run
14+
[--schema /path/to/schema.json] schema for generated records (only used with --query)
15+
[--batchsize [1000]] number of records from schema to generate per batch
1316
[--concurrency c] how many concurrent queries to run (default: 10)
1417
[--checkpoint cn] how often to print results in milliseconds (default: 5000)
1518
[--fail-fast] if specified, the work will stop after encountering one failure.
@@ -106,7 +109,9 @@ const generateFromArgs = (args) => {
106109
// We don't know where to connect...
107110
(!process.env.NEO4J_URI && !args.a) ||
108111
// Don't know what password to use...
109-
(!process.env.NEO4J_PASSWORD && !args.p)
112+
(!process.env.NEO4J_PASSWORD && !args.p) ||
113+
// You can't specify a schema if you don't have a query to consume it.
114+
(args.schema && !args.query)
110115
);
111116

112117
if (badlyConfigured) {
@@ -118,10 +123,15 @@ const generateFromArgs = (args) => {
118123
const concurrency = chooseConcurrency(args);
119124
const probabilityTable = chooseProbabilityTable(args);
120125

126+
const schema = args.schema ? require(args.schema) : null;
127+
const batchSize = args.batchSize ? args.batchSize : 1000;
121128
const failFast = ('fail-fast' in args) ? args['fail-fast'] : false;
122129

123130
// Merge sub-objects.
124131
const obj = _.merge({
132+
generator: schema ? new MockData(schema) : null,
133+
batchSize,
134+
runcheckpoint: args.runcheckpoint,
125135
checkpointFreq: args.checkpoint || process.env.CHECKPOINT_FREQUENCY || 5000,
126136
failFast,
127137
phase: 'NOT_STARTED',
@@ -149,12 +159,16 @@ module.exports = {
149159
.describe('u', 'username')
150160
.describe('p', 'password')
151161
.describe('d', 'database')
162+
.describe('runcheckpoint', 'whether to run db checkpointing or not')
163+
.describe('schema', 'batch schema file')
164+
.describe('batchsize', 'number of records per batch, usable only with schema')
152165
.describe('n', 'number of hits on the database')
153166
.describe('ms', 'number of milliseconds to execute')
154167
.describe('workload', 'absolute path to JSON probability table/workload')
155168
.describe('query', 'Cypher query to run')
156169
.default('concurrency', 10)
157170
.default('checkpoint', 5000)
171+
.default('runcheckpoint', false)
158172
.demandOption(['p'])
159173
.argv;
160174
},

src/schemas/users.json

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
{
2+
"id": "random.uuid",
3+
"firstName": "name.firstName",
4+
"lastName": "name.lastName",
5+
"email": "internet.email",
6+
"job": "name.jobTitle",
7+
"phone": "phone.phoneNumber",
8+
"favoriteColor": "commerce.color"
9+
}

src/strategies.js

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ const builder = (sessionPool, runConfig) => {
5555
if (label === 'custom') {
5656
chosenTable[label] = new SimpleQueryStrategy({
5757
sessionPool,
58+
generator: runConfig.generator,
59+
batchSize: runConfig.batchSize,
5860
query: runConfig.query,
5961
mode: runConfig.mode || 'WRITE',
6062
});

src/workload.js

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,19 @@ class Workload {
7676
const runStrategySetupPromises = Object.keys(this.strategyTable)
7777
.map(stratName => this.strategyTable[stratName].setup(this.driver));
7878

79-
return Promise.all(runStrategySetupPromises);
79+
return Promise.all(runStrategySetupPromises)
80+
.then(results => {
81+
if (this.runConfig.runcheckpoint) {
82+
const opts = {};
83+
if (this.runConfig.database) { opts.database = this.runConfig.database; }
84+
const session = this.driver.session(opts);
85+
console.log('Calling db.checkpoint method to flush prior to run');
86+
return session.run('CALL db.checkpoint()')
87+
.then(session.close);
88+
}
89+
90+
return results;
91+
});
8092
}
8193

8294
getRunConfiguration() { return this.runConfig; }

test/datasource/MockData_test.js

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
const expect = require('chai').expect;
2+
const mocks = require('../mocks');
3+
const chai = require('chai');
4+
const chaiAsPromised = require("chai-as-promised");
5+
chai.use(chaiAsPromised);
6+
7+
const MockData = require('../../src/datasource/MockData');
8+
9+
describe('Mock Data', function() {
10+
const goodSchema = {
11+
streetAddress: 'address.streetAddress',
12+
city: 'address.city',
13+
state: 'address.state',
14+
country: 'address.country',
15+
};
16+
17+
it('can create something with a good schema', () => {
18+
const md = new MockData(goodSchema);
19+
expect(md).to.be.ok;
20+
});
21+
22+
it('refuses empty schema', () => {
23+
expect(() => new MockData()).to.throw(Error);
24+
});
25+
26+
it('refuses wrong schema', () => {
27+
expect(() => new MockData({
28+
foo: 'i.do.not.exist',
29+
})).to.throw(Error);
30+
});
31+
32+
it('can generate a batch', () => {
33+
const md = new MockData(goodSchema);
34+
35+
console.log('generating');
36+
const r = md.generate(10);
37+
console.log(r);
38+
// expect(r).to.be.an(Array);
39+
expect(r.length).to.equal(10);
40+
for (let i=0; i<r.length; i++) {
41+
Object.keys(goodSchema).forEach(field => {
42+
expect(r[i][field]).to.be.ok;
43+
});
44+
}
45+
});
46+
});

0 commit comments

Comments
 (0)