|
| 1 | +/* |
| 2 | + Regression tests. Run this file and check the git diff of the output files to see what changed. |
| 3 | +*/ |
| 4 | + |
| 5 | +import { readFileSync, writeFileSync } from 'fs'; |
| 6 | +import { Config } from '../config.js'; |
| 7 | +import { EntityMatch } from '../interfaces/entity-match.js'; |
| 8 | +import { EntityResult } from '../interfaces/entity-result.js'; |
| 9 | +import { Meta } from '../interfaces/meta.js'; |
| 10 | +import { Query } from '../interfaces/query.js'; |
| 11 | +import { SearcherFactory } from '../searcher-factory.js'; |
| 12 | + |
| 13 | +const outputPath = './src/regression-test/output'; |
| 14 | +const shortColumnWidth = 8; |
| 15 | +const wideColumnWidth = 40; |
| 16 | +const RLI = '\u2067'; |
| 17 | +const LRI = '\u2066'; |
| 18 | +const PDI = '\u2069'; |
| 19 | + |
| 20 | +const text = readFileSync('./data/world-ctvs.txt', 'utf8'); |
| 21 | +const lines = text.split('\n').slice(1); |
| 22 | +const entities = lines.map((l, index) => ({ id: index, name: l })); |
| 23 | + |
| 24 | +interface GeoEntity { |
| 25 | + id: number; |
| 26 | + name: string; |
| 27 | +} |
| 28 | + |
| 29 | +const config = Config.createDefaultConfig(); |
| 30 | +config.normalizerConfig.allowCharacter = (_) => true; |
| 31 | +const searcher = SearcherFactory.createSearcher<GeoEntity, number>(config); |
| 32 | + |
| 33 | +console.log(`Indexing ${entities.length} entities...`); |
| 34 | +const indexingMeta: Meta = searcher.indexEntities( |
| 35 | + entities, |
| 36 | + (e) => e.id, |
| 37 | + (e) => e.name.split(';') |
| 38 | +); |
| 39 | +writeFileSync(`${outputPath}/_indexing-meta.txt`, metaToJson(indexingMeta), { encoding: 'utf8' }); |
| 40 | + |
| 41 | +console.log('Running queries...'); |
| 42 | + |
| 43 | +runQuery('carcassonne-prefix', 'carcasso'); |
| 44 | +runQuery('carcassonne-infix', 'cassonn'); |
| 45 | +runQuery('carcassonne-suffix', 'sonne'); |
| 46 | +runQuery('munich-insertion', 'muniich'); |
| 47 | +runQuery('boston-deletion', 'bostn'); |
| 48 | +runQuery('boulder-creek-substitution', 'boulder creak'); |
| 49 | +runQuery('tübingen-transposition', 'tübignen'); |
| 50 | +runQuery('tokyo', '東京都'); |
| 51 | +runQuery('tokyo-prefix', '東京'); |
| 52 | +runQuery('tbilisi', 'თბილისი'); |
| 53 | +runQuery('tbilisi-deletion', 'თბიისი'); |
| 54 | +runQuery('kuwait-city', 'مدينة الكويت', true); |
| 55 | +runQuery('kuwait-city-prefix', 'مدينة الك', true); |
| 56 | + |
| 57 | +console.log('Finished.'); |
| 58 | + |
| 59 | +function runQuery(queryName: string, queryString: string, rtl: boolean = false) { |
| 60 | + const query: Query = new Query(queryString); |
| 61 | + const result: EntityResult<GeoEntity> = searcher.getMatches(query); |
| 62 | + console.log(`'${queryString}' (${queryName}): ${result.matches.length} matches.`); |
| 63 | + const queryJson = JSON.stringify(query, null, 2); |
| 64 | + const metaJson = metaToJson(result.meta); |
| 65 | + const matchesString = matchesToString(result.matches, rtl); |
| 66 | + const output = `${queryJson}\n\n${metaJson}\n\n${matchesString}`; |
| 67 | + writeFileSync(`${outputPath}/${queryName}.txt`, output); |
| 68 | +} |
| 69 | + |
| 70 | +function metaToJson(meta: Meta): string { |
| 71 | + return JSON.stringify(Object.fromEntries(meta.allEntries), null, 2); |
| 72 | +} |
| 73 | + |
| 74 | +function matchesToString(matches: EntityMatch<GeoEntity>[], rtl: boolean): string { |
| 75 | + const header = |
| 76 | + padRight('Rank', shortColumnWidth) + |
| 77 | + padRight('Entity', wideColumnWidth) + |
| 78 | + padRight('Matched String', wideColumnWidth) + |
| 79 | + padRight('Quality', shortColumnWidth) + |
| 80 | + '\n\n'; |
| 81 | + const matchesString = matches.map((m, i) => matchToString(m, i + 1, rtl)).join('\n'); |
| 82 | + return header + matchesString; |
| 83 | +} |
| 84 | + |
| 85 | +function matchToString(match: EntityMatch<GeoEntity>, rank: number, rtl: boolean): string { |
| 86 | + if (!rtl) { |
| 87 | + return ( |
| 88 | + padRight(rank.toString(), shortColumnWidth) + |
| 89 | + padRight(match.entity.name, wideColumnWidth) + |
| 90 | + padRight(match.matchedString, wideColumnWidth) + |
| 91 | + padRight(match.quality.toFixed(2), shortColumnWidth) |
| 92 | + ); |
| 93 | + } |
| 94 | + return ( |
| 95 | + padAndMark(rank.toString(), shortColumnWidth, false) + |
| 96 | + padAndMark(match.entity.name, wideColumnWidth, true) + |
| 97 | + padAndMark(match.matchedString, wideColumnWidth, true) + |
| 98 | + padAndMark(match.quality.toFixed(2), shortColumnWidth, false) |
| 99 | + ); |
| 100 | +} |
| 101 | + |
| 102 | +function padAndMark(s: string, targetWidth: number, rtl: boolean): string { |
| 103 | + const padded = rtl ? padLeft(s, targetWidth) : padRight(s, targetWidth); |
| 104 | + const mark = rtl ? RLI : LRI; |
| 105 | + return mark + padded + PDI; |
| 106 | +} |
| 107 | + |
| 108 | +function padRight(s: string, targetWidth: number): string { |
| 109 | + return s + ' '.repeat(Math.max(0, targetWidth - s.length)); |
| 110 | +} |
| 111 | + |
| 112 | +function padLeft(s: string, targetWidth: number): string { |
| 113 | + return ' '.repeat(Math.max(0, targetWidth - s.length)) + s; |
| 114 | +} |
0 commit comments