Skip to content

Commit 8d2cda6

Browse files
authored
feat: regression test (#10)
1 parent da16d5e commit 8d2cda6

19 files changed

+423
-2
lines changed

.vscode/launch.json

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,18 @@
44
{
55
"type": "node",
66
"request": "launch",
7-
"name": "Launch Program",
7+
"name": "Launch Basic Usage",
88
"skipFiles": ["<node_internals>/**"],
99
"program": "${workspaceFolder}/dist/basic-usage.js",
1010
"outFiles": ["${workspaceFolder}/**/*.js"]
11+
},
12+
{
13+
"type": "node",
14+
"request": "launch",
15+
"name": "Run Regression Test",
16+
"skipFiles": ["<node_internals>/**"],
17+
"program": "${workspaceFolder}/dist/regression-test/main.js",
18+
"outFiles": ["${workspaceFolder}/**/*.js"]
1119
}
1220
]
1321
}

.vscode/settings.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,5 +9,8 @@
99
"liveServer.settings.port": 5501,
1010
"[json]": {
1111
"editor.defaultFormatter": "esbenp.prettier-vscode"
12+
},
13+
"[plaintext]": {
14+
"editor.renderControlCharacters": false
1215
}
1316
}

package-lock.json

Lines changed: 10 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,4 +74,4 @@
7474
"dist",
7575
"usage-examples"
7676
]
77-
}
77+
}

src/regression-test/main.ts

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
/*
2+
Regression tests. Run this file and check the git diff of the output files to see what changed.
3+
*/
4+
5+
import { readFileSync, writeFileSync } from 'fs';
6+
import { Config } from '../config.js';
7+
import { EntityMatch } from '../interfaces/entity-match.js';
8+
import { EntityResult } from '../interfaces/entity-result.js';
9+
import { Meta } from '../interfaces/meta.js';
10+
import { Query } from '../interfaces/query.js';
11+
import { SearcherFactory } from '../searcher-factory.js';
12+
13+
const outputPath = './src/regression-test/output';
14+
const shortColumnWidth = 8;
15+
const wideColumnWidth = 40;
16+
const RLI = '\u2067';
17+
const LRI = '\u2066';
18+
const PDI = '\u2069';
19+
20+
const text = readFileSync('./data/world-ctvs.txt', 'utf8');
21+
const lines = text.split('\n').slice(1);
22+
const entities = lines.map((l, index) => ({ id: index, name: l }));
23+
24+
interface GeoEntity {
25+
id: number;
26+
name: string;
27+
}
28+
29+
const config = Config.createDefaultConfig();
30+
config.normalizerConfig.allowCharacter = (_) => true;
31+
const searcher = SearcherFactory.createSearcher<GeoEntity, number>(config);
32+
33+
console.log(`Indexing ${entities.length} entities...`);
34+
const indexingMeta: Meta = searcher.indexEntities(
35+
entities,
36+
(e) => e.id,
37+
(e) => e.name.split(';')
38+
);
39+
writeFileSync(`${outputPath}/_indexing-meta.txt`, metaToJson(indexingMeta), { encoding: 'utf8' });
40+
41+
console.log('Running queries...');
42+
43+
runQuery('carcassonne-prefix', 'carcasso');
44+
runQuery('carcassonne-infix', 'cassonn');
45+
runQuery('carcassonne-suffix', 'sonne');
46+
runQuery('munich-insertion', 'muniich');
47+
runQuery('boston-deletion', 'bostn');
48+
runQuery('boulder-creek-substitution', 'boulder creak');
49+
runQuery('tübingen-transposition', 'tübignen');
50+
runQuery('tokyo', '東京都');
51+
runQuery('tokyo-prefix', '東京');
52+
runQuery('tbilisi', 'თბილისი');
53+
runQuery('tbilisi-deletion', 'თბიისი');
54+
runQuery('kuwait-city', 'مدينة الكويت', true);
55+
runQuery('kuwait-city-prefix', 'مدينة الك', true);
56+
57+
console.log('Finished.');
58+
59+
function runQuery(queryName: string, queryString: string, rtl: boolean = false) {
60+
const query: Query = new Query(queryString);
61+
const result: EntityResult<GeoEntity> = searcher.getMatches(query);
62+
console.log(`'${queryString}' (${queryName}): ${result.matches.length} matches.`);
63+
const queryJson = JSON.stringify(query, null, 2);
64+
const metaJson = metaToJson(result.meta);
65+
const matchesString = matchesToString(result.matches, rtl);
66+
const output = `${queryJson}\n\n${metaJson}\n\n${matchesString}`;
67+
writeFileSync(`${outputPath}/${queryName}.txt`, output);
68+
}
69+
70+
function metaToJson(meta: Meta): string {
71+
return JSON.stringify(Object.fromEntries(meta.allEntries), null, 2);
72+
}
73+
74+
function matchesToString(matches: EntityMatch<GeoEntity>[], rtl: boolean): string {
75+
const header =
76+
padRight('Rank', shortColumnWidth) +
77+
padRight('Entity', wideColumnWidth) +
78+
padRight('Matched String', wideColumnWidth) +
79+
padRight('Quality', shortColumnWidth) +
80+
'\n\n';
81+
const matchesString = matches.map((m, i) => matchToString(m, i + 1, rtl)).join('\n');
82+
return header + matchesString;
83+
}
84+
85+
function matchToString(match: EntityMatch<GeoEntity>, rank: number, rtl: boolean): string {
86+
if (!rtl) {
87+
return (
88+
padRight(rank.toString(), shortColumnWidth) +
89+
padRight(match.entity.name, wideColumnWidth) +
90+
padRight(match.matchedString, wideColumnWidth) +
91+
padRight(match.quality.toFixed(2), shortColumnWidth)
92+
);
93+
}
94+
return (
95+
padAndMark(rank.toString(), shortColumnWidth, false) +
96+
padAndMark(match.entity.name, wideColumnWidth, true) +
97+
padAndMark(match.matchedString, wideColumnWidth, true) +
98+
padAndMark(match.quality.toFixed(2), shortColumnWidth, false)
99+
);
100+
}
101+
102+
function padAndMark(s: string, targetWidth: number, rtl: boolean): string {
103+
const padded = rtl ? padLeft(s, targetWidth) : padRight(s, targetWidth);
104+
const mark = rtl ? RLI : LRI;
105+
return mark + padded + PDI;
106+
}
107+
108+
function padRight(s: string, targetWidth: number): string {
109+
return s + ' '.repeat(Math.max(0, targetWidth - s.length));
110+
}
111+
112+
function padLeft(s: string, targetWidth: number): string {
113+
return ' '.repeat(Math.max(0, targetWidth - s.length)) + s;
114+
}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
{
2+
"numberOfInvalidTerms": 1,
3+
"numberOfDistinctTerms": 1190185,
4+
"normalizationDuration": 1342,
5+
"numberOfSurrogateCharacters": 46,
6+
"numberOfEntities": 1237154,
7+
"numberOfTerms": 1237486,
8+
"indexingDuration": 6741
9+
}
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
{
2+
"string": "bostn",
3+
"topN": 10,
4+
"minQuality": 0.3
5+
}
6+
7+
{
8+
"queryDuration": 2
9+
}
10+
11+
Rank Entity Matched String Quality
12+
13+
1 Bosti Bosti 0.67
14+
2 Bosto Bosto 0.67
15+
3 Bosta Bosta 0.67
16+
4 Bost Bost 0.63
17+
5 Boston Boston 0.54
18+
6 Boston Boston 0.54
19+
7 Boston Boston 0.54
20+
8 Boston Boston 0.54
21+
9 Bostel Bostel 0.54
22+
10 Bostan Bostan 0.54
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
{
2+
"string": "boulder creak",
3+
"topN": 10,
4+
"minQuality": 0.3
5+
}
6+
7+
{
8+
"queryDuration": 2
9+
}
10+
11+
Rank Entity Matched String Quality
12+
13+
1 Boulder Creek Boulder Creek 0.75
14+
2 Boulder City Boulder City 0.61
15+
3 Boulder Boulder 0.54
16+
4 Bouldercombe Bouldercombe 0.54
17+
5 South Boulder South Boulder 0.54
18+
6 Boulder Boulder 0.54
19+
7 Boulder Hill Boulder Hill 0.54
20+
8 The Boulders The Boulders 0.47
21+
9 Boulders Boulders 0.47
22+
10 Boulder Junction Boulder Junction 0.45
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
{
2+
"string": "cassonn",
3+
"topN": 10,
4+
"minQuality": 0.3
5+
}
6+
7+
{
8+
"queryDuration": 4
9+
}
10+
11+
Rank Entity Matched String Quality
12+
13+
1 Cassone Cassone 0.75
14+
2 Casson Casson 0.71
15+
3 Cassou Cassou 0.59
16+
4 Cassola Cassola 0.59
17+
5 Cassop Cassop 0.59
18+
6 Canossa Canossa 0.59
19+
7 Cassoneca Cassoneca 0.57
20+
8 Cassongue Cassongue 0.57
21+
9 Carcassonne Carcassonne 0.55
22+
10 Cassoday Cassoday 0.53
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
{
2+
"string": "carcasso",
3+
"topN": 10,
4+
"minQuality": 0.3
5+
}
6+
7+
{
8+
"queryDuration": 9
9+
}
10+
11+
Rank Entity Matched String Quality
12+
13+
1 Carcasse Carcasse 0.74
14+
2 Carcassonne Carcassonne 0.63
15+
3 Carasso Carasso 0.63
16+
4 Carcas Carcas 0.63
17+
5 Carcasí Carcasí 0.63
18+
6 Carcasi Carcasi 0.63
19+
7 Carcaboso Carcaboso 0.57
20+
8 Casaracra Casaracra 0.57
21+
9 Cassou Cassou 0.53
22+
10 Caracase Caracase 0.53

0 commit comments

Comments
 (0)