Skip to content

Commit 1a7d05b

Browse files
authored
Merge pull request #24 from bvaughn/performance-tuning-2
Performance tuning
2 parents a4199cd + bc5e29d commit 1a7d05b

File tree

12 files changed

+358
-253
lines changed

12 files changed

+358
-253
lines changed

benchmarks/regression-test.js

Lines changed: 70 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -24,30 +24,67 @@ var filter = process.argv.length === 3
2424
var benchmarks = [];
2525

2626
function setupBenchmarks(corpus) {
27+
// Index strategies
2728
initBenchmark({
2829
corpus,
29-
indexStrategy: 'PrefixIndexStrategy',
30-
searchIndex: 'UnorderedSearchIndex'
30+
indexStrategy: 'PrefixIndexStrategy'
3131
});
3232
initBenchmark({
3333
corpus,
34-
indexStrategy: 'AllSubstringsIndexStrategy',
35-
searchIndex: 'UnorderedSearchIndex'
34+
indexStrategy: 'AllSubstringsIndexStrategy'
3635
});
3736
initBenchmark({
3837
corpus,
39-
indexStrategy: 'ExactWordIndexStrategy',
38+
indexStrategy: 'PrefixIndexStrategy'
39+
});
40+
41+
// Search indices
42+
initBenchmark({
43+
corpus,
4044
searchIndex: 'TfIdfSearchIndex'
4145
});
4246
initBenchmark({
4347
corpus,
44-
indexStrategy: 'ExactWordIndexStrategy',
4548
searchIndex: 'UnorderedSearchIndex'
4649
});
4750

51+
// Tokenizers
52+
initBenchmark({
53+
corpus,
54+
tokenizer: 'SimpleTokenizer'
55+
});
56+
initBenchmark({
57+
corpus,
58+
tokenizer: 'StemmingTokenizer'
59+
});
60+
initBenchmark({
61+
corpus,
62+
tokenizer: 'StopWordsTokenizer'
63+
});
64+
4865
runNextTest();
4966
}
5067

68+
function identity(text) {
69+
return text;
70+
}
71+
72+
function createTokenizer(module, tokenizer) {
73+
switch (tokenizer) {
74+
case 'SimpleTokenizer':
75+
return new module.SimpleTokenizer();
76+
case 'StemmingTokenizer':
77+
return new module.StemmingTokenizer(
78+
identity,
79+
new module.SimpleTokenizer()
80+
);
81+
case 'StopWordsTokenizer':
82+
return new module.StopWordsTokenizer(
83+
new module.SimpleTokenizer()
84+
);
85+
}
86+
}
87+
5188
function createBenchmark() {
5289
return new Benchmark.Suite()
5390
.on('cycle', (event) => {
@@ -68,47 +105,48 @@ function runNextTest() {
68105

69106
function initBenchmark({
70107
corpus,
71-
indexStrategy,
72-
searchIndex
108+
indexStrategy = 'PrefixIndexStrategy',
109+
searchIndex = 'UnorderedSearchIndex',
110+
tokenizer = 'SimpleTokenizer'
73111
}) {
74-
if (
75-
filter &&
76-
!indexStrategy.match(filter) &&
77-
!searchIndex.match(filter)
78-
) {
79-
return;
80-
}
81-
82-
console.log(`Initializing benchmark\t${indexStrategy}\t${searchIndex}`);
83-
84112
initBenchmarkForCreateIndex({
85113
corpus,
86114
indexStrategy,
87-
searchIndex
115+
searchIndex,
116+
tokenizer
88117
});
89118
initBenchmarkForSearch({
90119
corpus,
91120
indexStrategy,
92-
searchIndex
121+
searchIndex,
122+
tokenizer
93123
});
94124
}
95125

96126
function initBenchmarkForCreateIndex({
97127
corpus,
98128
indexStrategy,
99-
searchIndex
129+
searchIndex,
130+
tokenizer
100131
}) {
132+
var label = `index\t${indexStrategy}\t${searchIndex}\t${tokenizer}`;
133+
134+
if (filter && !label.match(filter)) {
135+
return;
136+
}
137+
101138
var benchmark = createBenchmark();
102139

103140
versions.forEach(version => {
104141
var IndexStrategy = version.module[indexStrategy];
105142
var Search = version.module.Search;
106143
var SearchIndex = version.module[searchIndex];
107144

108-
benchmark.add(`[${version.label}]\tCreate index\t${searchIndex}\t${indexStrategy}`, () => {
145+
benchmark.add(`[${version.label}]\t${label}`, () => {
109146
var search = new Search('isbn');
110147
search.indexStrategy = new IndexStrategy();
111148
search.searchIndex = new SearchIndex('isbn');
149+
search.tokenizer = createTokenizer(version.module, tokenizer);
112150
search.addIndex('title');
113151
search.addIndex('author');
114152
search.addDocuments(corpus);
@@ -121,8 +159,15 @@ function initBenchmarkForCreateIndex({
121159
function initBenchmarkForSearch({
122160
corpus,
123161
indexStrategy,
124-
searchIndex
162+
searchIndex,
163+
tokenizer
125164
}) {
165+
var label = `search\t${indexStrategy}\t${searchIndex}\t${tokenizer}`;
166+
167+
if (filter && !label.match(filter)) {
168+
return;
169+
}
170+
126171
var searchTerms = ['letter', 'world', 'wife', 'love', 'foobar'];
127172
var searchTermsLength = searchTerms.length;
128173

@@ -136,11 +181,12 @@ function initBenchmarkForSearch({
136181
var search = new Search('isbn');
137182
search.indexStrategy = new IndexStrategy();
138183
search.searchIndex = new SearchIndex('isbn');
184+
search.tokenizer = createTokenizer(version.module, tokenizer);
139185
search.addIndex('title');
140186
search.addIndex('author');
141187
search.addDocuments(corpus);
142188

143-
benchmark.add(`[${version.label}]\tSearch strings\t${searchIndex}\t${indexStrategy}`, () => {
189+
benchmark.add(`[${version.label}]\t${label}`, () => {
144190
for (var i = 0, length = searchTermsLength; i < length; i++) {
145191
search.search(searchTerms[i]);
146192
}

benchmarks/yarn.lock

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,10 @@ benchmark@^2.1.3:
1313
lodash "^4.17.3"
1414
platform "^1.3.3"
1515

16+
17+
version "1.2.1"
18+
resolved "https://registry.yarnpkg.com/bindings/-/bindings-1.2.1.tgz#14ad6113812d2d37d72e67b4cacb4bb726505f11"
19+
1620
js-search@^1.3.5:
1721
version "1.3.5"
1822
resolved "https://registry.yarnpkg.com/js-search/-/js-search-1.3.5.tgz#b489c05aabc4b22cdb75484fe75ac49fa94feff2"
@@ -25,6 +29,17 @@ lunr@^0.7.2:
2529
version "0.7.2"
2630
resolved "https://registry.yarnpkg.com/lunr/-/lunr-0.7.2.tgz#79a30e932e216cba163541ee37a3607c12cd7281"
2731

32+
microtime@^2.1.2:
33+
version "2.1.2"
34+
resolved "https://registry.yarnpkg.com/microtime/-/microtime-2.1.2.tgz#9c955d0781961ab13a1b6f9a82b080f5d7ecd83b"
35+
dependencies:
36+
bindings "1.2.x"
37+
nan "2.4.x"
38+
39+
40+
version "2.4.0"
41+
resolved "https://registry.yarnpkg.com/nan/-/nan-2.4.0.tgz#fb3c59d45fe4effe215f0b890f8adf6eb32d2232"
42+
2843
platform@^1.3.3:
2944
version "1.3.3"
3045
resolved "https://registry.yarnpkg.com/platform/-/platform-1.3.3.tgz#646c77011899870b6a0903e75e997e8e51da7461"

source/IndexStrategy/AllSubstringsIndexStrategy.js

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,14 @@ export class AllSubstringsIndexStrategy implements IIndexStrategy {
1212
*/
1313
expandToken(token : string) : Array<string> {
1414
var expandedTokens = [];
15+
var string;
1516

1617
for (var i = 0, length = token.length; i < length; ++i) {
18+
string = '';
19+
1720
for (var j = i; j < length; ++j) {
18-
expandedTokens.push(token.substring(i, j + 1));
21+
string += token.charAt(j);
22+
expandedTokens.push(string);
1923
}
2024
}
2125

source/IndexStrategy/PrefixIndexStrategy.js

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,11 @@ export class PrefixIndexStrategy implements IIndexStrategy {
1212
*/
1313
expandToken(token : string) : Array<string> {
1414
var expandedTokens = [];
15+
var string = '';
1516

1617
for (var i = 0, length = token.length; i < length; ++i) {
17-
expandedTokens.push(token.substring(0, i + 1));
18+
string += token.charAt(i);
19+
expandedTokens.push(string);
1820
}
1921

2022
return expandedTokens;

source/Search.js

Lines changed: 42 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ export class Search {
2525
/**
2626
* Array containing either a property name or a path (list of property names) to a nested value
2727
*/
28-
searchableFields : Array<string|Array<string>>;
28+
_searchableFields : Array<string|Array<string>>;
2929

3030
_searchIndex : ISearchIndex;
3131
_tokenizer : ITokenizer;
@@ -46,7 +46,7 @@ export class Search {
4646
this._tokenizer = new SimpleTokenizer();
4747

4848
this._documents = [];
49-
this.searchableFields = [];
49+
this._searchableFields = [];
5050
}
5151

5252
/**
@@ -128,7 +128,7 @@ export class Search {
128128
*/
129129
addDocuments(documents : Array<Object>) : void {
130130
this._documents = this._documents.concat(documents);
131-
this.indexDocuments_(documents, this.searchableFields);
131+
this.indexDocuments_(documents, this._searchableFields);
132132
}
133133

134134
/**
@@ -137,7 +137,7 @@ export class Search {
137137
* @param field Searchable field or field path. Pass a string to index a top-level field and an array of strings for nested fields.
138138
*/
139139
addIndex(field : string|Array<string>) {
140-
this.searchableFields.push(field);
140+
this._searchableFields.push(field);
141141
this.indexDocuments_(this._documents, [field]);
142142
}
143143

@@ -154,24 +154,30 @@ export class Search {
154154

155155
/**
156156
* @param documents
157-
* @param searchableFields Array containing property names and paths (lists of property names) to nested values
157+
* @param _searchableFields Array containing property names and paths (lists of property names) to nested values
158158
* @private
159159
*/
160-
indexDocuments_(documents : Array<Object>, searchableFields : Array<string|Array<string>>) : void {
160+
indexDocuments_(documents : Array<Object>, _searchableFields : Array<string|Array<string>>) : void {
161161
this._initialized = true;
162162

163+
var indexStrategy = this._indexStrategy;
164+
var sanitizer = this._sanitizer;
165+
var searchIndex = this._searchIndex;
166+
var tokenizer = this._tokenizer;
167+
var uidFieldName = this._uidFieldName;
168+
163169
for (var di = 0, numDocuments = documents.length; di < numDocuments; di++) {
164-
var document : Object = documents[di];
165-
var uid : string = document[this._uidFieldName];
170+
var doc = documents[di];
171+
var uid = doc[uidFieldName];
166172

167-
for (var sfi = 0, numSearchableFields = searchableFields.length; sfi < numSearchableFields; sfi++) {
168-
var fieldValue : any;
169-
var searchableField : string|Array<string> = searchableFields[sfi];
173+
for (var sfi = 0, numSearchableFields = _searchableFields.length; sfi < numSearchableFields; sfi++) {
174+
var fieldValue;
175+
var searchableField = _searchableFields[sfi];
170176

171177
if (searchableField instanceof Array) {
172-
fieldValue = Search.getNestedFieldValue(document, searchableField);
178+
fieldValue = getNestedFieldValue(doc, searchableField);
173179
} else {
174-
fieldValue = document[searchableField];
180+
fieldValue = doc[searchableField];
175181
}
176182

177183
if (
@@ -183,45 +189,45 @@ export class Search {
183189
}
184190

185191
if (typeof fieldValue === 'string') {
186-
var fieldTokens : Array<string> = this._tokenizer.tokenize(this._sanitizer.sanitize(fieldValue));
192+
var fieldTokens = tokenizer.tokenize(sanitizer.sanitize(fieldValue));
187193

188194
for (var fti = 0, numFieldValues = fieldTokens.length; fti < numFieldValues; fti++) {
189-
var fieldToken : string = fieldTokens[fti];
190-
var expandedTokens : Array<string> = this._indexStrategy.expandToken(fieldToken);
195+
var fieldToken = fieldTokens[fti];
196+
var expandedTokens = indexStrategy.expandToken(fieldToken);
191197

192198
for (var eti = 0, nummExpandedTokens = expandedTokens.length; eti < nummExpandedTokens; eti++) {
193199
var expandedToken = expandedTokens[eti];
194200

195-
this._searchIndex.indexDocument(expandedToken, uid, document);
201+
searchIndex.indexDocument(expandedToken, uid, doc);
196202
}
197203
}
198204
}
199205
}
200206
}
201207
}
208+
}
202209

203-
/**
204-
* Find and return a nested object value.
205-
*
206-
* @param object to crawl
207-
* @param path Property path
208-
* @returns {any}
209-
*/
210-
static getNestedFieldValue(object : Object, path : Array<string>) {
211-
path = path || [];
212-
object = object || {};
210+
/**
211+
* Find and return a nested object value.
212+
*
213+
* @param object to crawl
214+
* @param path Property path
215+
* @returns {any}
216+
*/
217+
function getNestedFieldValue(object : Object, path : Array<string>) {
218+
path = path || [];
219+
object = object || {};
213220

214-
var value = object;
221+
var value = object;
215222

216-
// walk down the property path
217-
for (var i = 0; i < path.length; i++) {
218-
value = value[path[i]];
223+
// walk down the property path
224+
for (var i = 0; i < path.length; i++) {
225+
value = value[path[i]];
219226

220-
if (value == null) {
221-
return null;
222-
}
227+
if (value == null) {
228+
return null;
223229
}
224-
225-
return value;
226230
}
231+
232+
return value;
227233
}

0 commit comments

Comments
 (0)