diff --git a/package-lock.json b/package-lock.json index 6f4dd66..1eef5f7 100644 --- a/package-lock.json +++ b/package-lock.json @@ -10,6 +10,7 @@ "license": "MIT", "dependencies": { "@modelcontextprotocol/sdk": "1.19.1", + "fastest-levenshtein": "1.0.16", "zod": "3.25.76" }, "bin": { @@ -6014,6 +6015,15 @@ "dev": true, "license": "MIT" }, + "node_modules/fastest-levenshtein": { + "version": "1.0.16", + "resolved": "https://registry.npmjs.org/fastest-levenshtein/-/fastest-levenshtein-1.0.16.tgz", + "integrity": "sha512-eRnCtTTtGZFpQCwhJiUOuxPQWRXVKYDn0b2PeHfXL6/Zi53SLAzAHfVhVWK2AryC/WH05kGfxhFIPvTF0SXQzg==", + "license": "MIT", + "engines": { + "node": ">= 4.9.1" + } + }, "node_modules/fastq": { "version": "1.19.1", "resolved": "https://registry.npmjs.org/fastq/-/fastq-1.19.1.tgz", diff --git a/package.json b/package.json index aca326c..5f47a80 100644 --- a/package.json +++ b/package.json @@ -48,6 +48,7 @@ "license": "MIT", "dependencies": { "@modelcontextprotocol/sdk": "1.19.1", + "fastest-levenshtein": "1.0.16", "zod": "3.25.76" }, "devDependencies": { diff --git a/src/__tests__/__snapshots__/server.search.test.ts.snap b/src/__tests__/__snapshots__/server.search.test.ts.snap new file mode 100644 index 0000000..65a7e5f --- /dev/null +++ b/src/__tests__/__snapshots__/server.search.test.ts.snap @@ -0,0 +1,434 @@ +// Jest Snapshot v1, https://jestjs.io/docs/snapshot-testing + +exports[`findClosest should attempt to find a closest match, all empty string items 1`] = ` +{ + "match": null, + "query": "test", +} +`; + +exports[`findClosest should attempt to find a closest match, empty haystack 1`] = ` +{ + "match": null, + "query": "Button", +} +`; + +exports[`findClosest should attempt to find a closest match, empty needle 1`] = ` +{ + "match": null, + "query": "", +} +`; + +exports[`findClosest should attempt to find a closest match, exact match 1`] = ` +{ + "match": "Alert", + "query": "Alert", +} +`; + +exports[`findClosest should attempt to find a closest match, match spacing 1`] = ` +{ + "match": "dolor sit", + "query": "dolor sit", +} +`; + +exports[`findClosest should attempt to find a closest match, multiple matches 1`] = ` +{ + "match": "Badge", + "query": "badge", +} +`; + +exports[`findClosest should attempt to find a closest match, multiple matches with case insensitive search 1`] = ` +{ + "match": "Badge", + "query": "BADGE", +} +`; + +exports[`findClosest should attempt to find a closest match, non-existent needle 1`] = ` +{ + "match": "Alert", + "query": "lorem", +} +`; + +exports[`findClosest should attempt to find a closest match, non-existent needle with case insensitive search 1`] = ` +{ + "match": "Alert", + "query": "LOREM", +} +`; + +exports[`findClosest should attempt to find a closest match, null items 1`] = ` +{ + "match": null, + "query": "Button", +} +`; + +exports[`findClosest should attempt to find a closest match, partial query 1`] = ` +{ + "match": "Button", + "query": "but", +} +`; + +exports[`findClosest should attempt to find a closest match, typo 1`] = ` +{ + "match": "Button", + "query": "buton", +} +`; + +exports[`findClosest should attempt to find a closest match, undefined items 1`] = ` +{ + "match": null, + "query": "Button", +} +`; + +exports[`fuzzySearch should fuzzy match, contains match multiple 1`] = ` +[ + { + "distance": 1, + "item": "AlertGroup", + "matchType": "suffix", + }, + { + "distance": 1, + "item": "BadgeGroup", + "matchType": "suffix", + }, + { + "distance": 1, + "item": "ButtonGroup", + "matchType": "suffix", + }, +] +`; + +exports[`fuzzySearch should fuzzy match, deduplicate by normalized value 1`] = ` +[ + { + "distance": 0, + "item": "Button", + "matchType": "exact", + }, +] +`; + +exports[`fuzzySearch should fuzzy match, duplicate items 1`] = ` +[ + { + "distance": 0, + "item": "Button", + "matchType": "exact", + }, +] +`; + +exports[`fuzzySearch should fuzzy match, empty items 1`] = `[]`; + +exports[`fuzzySearch should fuzzy match, empty query 1`] = `[]`; + +exports[`fuzzySearch should fuzzy match, empty query against maxDistance 1`] = ` +[ + { + "distance": 1, + "item": "A", + "matchType": "fuzzy", + }, + { + "distance": 2, + "item": "AB", + "matchType": "fuzzy", + }, +] +`; + +exports[`fuzzySearch should fuzzy match, empty query extended distance 1`] = ` +[ + { + "distance": 4, + "item": "Card", + "matchType": "fuzzy", + }, + { + "distance": 5, + "item": "Alert", + "matchType": "fuzzy", + }, + { + "distance": 5, + "item": "Badge", + "matchType": "fuzzy", + }, + { + "distance": 6, + "item": "Button", + "matchType": "fuzzy", + }, + { + "distance": 10, + "item": "AlertGroup", + "matchType": "fuzzy", + }, + { + "distance": 10, + "item": "BadgeGroup", + "matchType": "fuzzy", + }, + { + "distance": 10, + "item": "CardHeader", + "matchType": "fuzzy", + }, + { + "distance": 11, + "item": "ButtonGroup", + "matchType": "fuzzy", + }, +] +`; + +exports[`fuzzySearch should fuzzy match, exact match 1`] = ` +[ + { + "distance": 0, + "item": "Button", + "matchType": "exact", + }, + { + "distance": 1, + "item": "ButtonGroup", + "matchType": "prefix", + }, +] +`; + +exports[`fuzzySearch should fuzzy match, exact match case-insensitive 1`] = ` +[ + { + "distance": 0, + "item": "Button", + "matchType": "exact", + }, + { + "distance": 1, + "item": "ButtonGroup", + "matchType": "prefix", + }, +] +`; + +exports[`fuzzySearch should fuzzy match, fuzzy match within distance 1`] = ` +[ + { + "distance": 5, + "item": "Badge", + "matchType": "fuzzy", + }, + { + "distance": 6, + "item": "Alert", + "matchType": "fuzzy", + }, + { + "distance": 6, + "item": "Card", + "matchType": "fuzzy", + }, + { + "distance": 8, + "item": "AlertGroup", + "matchType": "fuzzy", + }, + { + "distance": 8, + "item": "BadgeGroup", + "matchType": "fuzzy", + }, + { + "distance": 10, + "item": "CardHeader", + "matchType": "fuzzy", + }, +] +`; + +exports[`fuzzySearch should fuzzy match, length-delta precheck for maxDistance 1`] = `[]`; + +exports[`fuzzySearch should fuzzy match, match within max results 1`] = ` +[ + { + "distance": 1, + "item": "Alert", + "matchType": "prefix", + }, + { + "distance": 1, + "item": "AlertGroup", + "matchType": "prefix", + }, +] +`; + +exports[`fuzzySearch should fuzzy match, match within restricted distance 1`] = ` +[ + { + "distance": 0, + "item": "Button", + "matchType": "exact", + }, + { + "distance": 1, + "item": "ButtonGroup", + "matchType": "prefix", + }, +] +`; + +exports[`fuzzySearch should fuzzy match, matches are alphabetized 1`] = ` +[ + { + "distance": 1, + "item": "Button", + "matchType": "prefix", + }, + { + "distance": 1, + "item": "ButtonGroup", + "matchType": "prefix", + }, +] +`; + +exports[`fuzzySearch should fuzzy match, matches are normalized 1`] = ` +[ + { + "distance": 0, + "item": "resume", + "matchType": "exact", + }, + { + "distance": 0, + "item": "RESUME", + "matchType": "exact", + }, + { + "distance": 0, + "item": "Résumé", + "matchType": "exact", + }, +] +`; + +exports[`fuzzySearch should fuzzy match, mixed types by maxDistance 1`] = ` +[ + { + "distance": 1, + "item": "Button", + "matchType": "prefix", + }, + { + "distance": 1, + "item": "ButtonGroup", + "matchType": "prefix", + }, +] +`; + +exports[`fuzzySearch should fuzzy match, multiple words 1`] = ` +[ + { + "distance": 1, + "item": "BadgeGroup", + "matchType": "fuzzy", + }, +] +`; + +exports[`fuzzySearch should fuzzy match, multiple words maxDistance 1`] = ` +[ + { + "distance": 2, + "item": "Lorem Ipsum Dolor Sit", + "matchType": "contains", + }, +] +`; + +exports[`fuzzySearch should fuzzy match, negative maxDistance 1`] = `[]`; + +exports[`fuzzySearch should fuzzy match, null items 1`] = `[]`; + +exports[`fuzzySearch should fuzzy match, prefix match 1`] = ` +[ + { + "distance": 1, + "item": "Button", + "matchType": "prefix", + }, + { + "distance": 1, + "item": "ButtonGroup", + "matchType": "prefix", + }, +] +`; + +exports[`fuzzySearch should fuzzy match, prefix match multiple 1`] = ` +[ + { + "distance": 1, + "item": "Button", + "matchType": "prefix", + }, + { + "distance": 1, + "item": "ButtonGroup", + "matchType": "prefix", + }, +] +`; + +exports[`fuzzySearch should fuzzy match, single item 1`] = ` +[ + { + "distance": 0, + "item": "BUTTON", + "matchType": "exact", + }, +] +`; + +exports[`fuzzySearch should fuzzy match, suffix match 1`] = ` +[ + { + "distance": 1, + "item": "CardHeader", + "matchType": "suffix", + }, +] +`; + +exports[`fuzzySearch should fuzzy match, trimmed query 1`] = ` +[ + { + "distance": 0, + "item": "Button", + "matchType": "exact", + }, + { + "distance": 1, + "item": "ButtonGroup", + "matchType": "prefix", + }, +] +`; + +exports[`fuzzySearch should fuzzy match, undefined items 1`] = `[]`; diff --git a/src/__tests__/server.search.test.ts b/src/__tests__/server.search.test.ts new file mode 100644 index 0000000..dbe9b4f --- /dev/null +++ b/src/__tests__/server.search.test.ts @@ -0,0 +1,325 @@ +import { normalizeString, fuzzySearch, findClosest } from '../server.search'; + +describe('normalizeString', () => { + it('should normalize a string', () => { + expect(normalizeString('résumé')).toBe(normalizeString('resume')); + }); + + it('should have memo property', () => { + expect(normalizeString.memo).toBeDefined(); + }); +}); + +describe('findClosest', () => { + const components = ['Button', 'ButtonGroup', 'Badge', 'BadgeGroup', 'Alert', 'AlertGroup']; + + it.each([ + { + description: 'undefined items', + query: 'Button', + items: undefined + }, + { + description: 'null items', + query: 'Button', + items: null + }, + { + description: 'empty haystack', + query: 'Button', + items: [] + }, + { + description: 'empty needle', + query: '', + items: components + }, + { + description: 'non-existent needle', + query: 'lorem', + items: components + }, + { + description: 'non-existent needle with case insensitive search', + query: 'LOREM', + items: components + }, + { + description: 'exact match', + query: 'Alert', + items: components + }, + { + description: 'partial query', + query: 'but', + items: components + }, + { + description: 'typo', + query: 'buton', + items: components + }, + { + description: 'multiple matches', + query: 'badge', + items: components + }, + { + description: 'multiple matches with case insensitive search', + query: 'BADGE', + items: components + }, + { + description: 'match spacing', + query: 'dolor sit', + items: ['sit', 'dolor', 'dolor sit'] + }, + { + description: 'all empty string items', + query: 'test', + items: ['', '', ''] + } + ])('should attempt to find a closest match, $description', ({ query, items }) => { + expect({ + query, + match: findClosest(query, items as string[]) + }).toMatchSnapshot(); + }); + + it('should handle normalizeFn errors in findClosest', () => { + const throwingNormalizeFn = () => { + throw new Error('Normalization failed'); + }; + + expect(() => { + findClosest('button', ['Button', 'Badge'], { normalizeFn: throwingNormalizeFn }); + }).toThrow('Normalization failed'); + }); +}); + +describe('fuzzySearch', () => { + const components = ['Button', 'ButtonGroup', 'Badge', 'BadgeGroup', 'Alert', 'AlertGroup', 'Card', 'CardHeader']; + + it.each([ + { + description: 'undefined items', + query: 'Button', + items: undefined, + options: undefined + }, + { + description: 'null items', + query: 'Button', + items: null, + options: undefined + }, + { + description: 'exact match', + query: 'Button', + items: components, + options: undefined + }, + { + description: 'exact match case-insensitive', + query: 'button', + items: components, + options: undefined + }, + { + description: 'prefix match', + query: 'but', + items: components, + options: undefined + }, + { + description: 'prefix match multiple', + query: 'butt', + items: components, + options: { + maxDistance: 10 + } + }, + { + description: 'contains match multiple', + query: 'roup', + items: components, + options: { + maxDistance: 10 + } + }, + { + description: 'fuzzy match within distance', + query: 'button', + items: components, + options: { + maxDistance: 10, + isExactMatch: false, + isPrefixMatch: false, + isSuffixMatch: false, + isContainsMatch: false, + isFuzzyMatch: true + } + }, + { + description: 'match within max results', + query: 'a', + items: components, + options: { + maxDistance: 10, + maxResults: 2, + isFuzzyMatch: true + } + }, + { + description: 'match within restricted distance', + query: 'button', + items: components, + options: { + maxDistance: 1 + } + }, + { + description: 'empty query', + query: '', + items: components, + options: { + isFuzzyMatch: true + } + }, + { + description: 'empty query extended distance', + query: '', + items: components, + options: { + maxDistance: 20, + isFuzzyMatch: true + } + }, + { + description: 'trimmed query', + query: ' button ', + items: components, + options: undefined + }, + { + description: 'empty items', + query: 'button', + items: [], + options: undefined + }, + { + description: 'single item', + query: 'button', + items: ['BUTTON'], + options: undefined + }, + { + description: 'multiple words maxDistance', + query: 'ipsum dolor', + items: ['Lorem Ipsum Dolor Sit'], + options: { + maxDistance: 10 + } + }, + { + description: 'multiple words', + query: 'badge group', + items: ['BadgeGroup'], + options: { + isFuzzyMatch: true, + maxDistance: 2 + } + }, + { + description: 'negative maxDistance', + query: 'button', + items: ['Button'], + options: { + maxDistance: -1 + } + }, + { + description: 'empty query against maxDistance', + query: '', + items: ['A', 'AB', 'ABCDE', 'ABCDEFG'], + options: { + maxDistance: 3, + isFuzzyMatch: true + } + }, + { + description: 'length-delta precheck for maxDistance', + query: 'AB', + items: ['ABCDEFGH'], + options: { + maxDistance: 2, + isExactMatch: false, + isPrefixMatch: false, + isSuffixMatch: false, + isFuzzyMatch: true + } + }, + { + description: 'duplicate items', + query: 'button', + items: ['Button', 'Button', 'Button'], + options: { + maxDistance: 10 + } + }, + { + description: 'suffix match', + query: 'header', + items: ['Card', 'CardHeader'], + options: { + isExactMatch: false, + isPrefixMatch: false, + isContainsMatch: false, + isFuzzyMatch: false + } + }, + { + description: 'mixed types by maxDistance', + query: 'butto', + items: ['Button', 'ButtonGroup', 'Burrito'], + options: { + maxDistance: 1, + isFuzzyMatch: true + } + }, + { + description: 'matches are alphabetized', + query: 'butt', + items: ['ButtonGroup', 'Button'], + options: { + maxDistance: 10 + } + }, + { + description: 'matches are normalized', + query: 'resume', + items: ['Résumé', 'resume', 'RESUME'], + options: undefined + }, + { + description: 'deduplicate by normalized value', + query: 'button', + items: ['Button', 'button', 'BUTTON'], + options: { + deduplicateByNormalized: true + } + } + ])('should fuzzy match, $description', ({ query, items, options }) => { + expect(fuzzySearch(query, items as string[], options)).toMatchSnapshot(); + }); + + it('should handle normalizeFn errors in fuzzySearch', () => { + const throwingNormalizeFn = () => { + throw new Error('Normalization failed'); + }; + + expect(() => { + fuzzySearch('button', ['Button', 'Badge'], { normalizeFn: throwingNormalizeFn }); + }).toThrow('Normalization failed'); + }); +}); diff --git a/src/server.helpers.ts b/src/server.helpers.ts index 761eaf5..b7d18f3 100644 --- a/src/server.helpers.ts +++ b/src/server.helpers.ts @@ -19,4 +19,7 @@ const generateHash = (content: unknown) => */ const isPromise = (obj: unknown) => /^\[object (Promise|Async|AsyncFunction)]/.test(Object.prototype.toString.call(obj)); -export { generateHash, isPromise }; +export { + generateHash, + isPromise +}; diff --git a/src/server.search.ts b/src/server.search.ts new file mode 100644 index 0000000..ad07901 --- /dev/null +++ b/src/server.search.ts @@ -0,0 +1,224 @@ +import { distance, closest } from 'fastest-levenshtein'; +import { memo } from './server.caching'; + +/** + * normalizeString function interface + */ +interface NormalizeString { + (str: string): string; + memo: (str: string) => string; +} + +/** + * Options for closest search + */ +interface ClosestSearchOptions { + normalizeFn?: (str: string) => string; +} + +/** + * Fuzzy search result using fastest-levenshtein + */ +interface FuzzySearchResult { + item: string; + distance: number; + matchType: 'exact' | 'prefix' | 'suffix' | 'contains' | 'fuzzy'; +} + +/** + * Options for fuzzy search + * + * - `maxDistance` - Maximum edit distance for a match. Distance is defined as + * - exact = 0 + * - prefix = 1 + * - suffix = 1 + * - contains = 2 + * - fuzzy = Levenshtein edit distance + * - `maxResults` - Maximum number of results to return + * - `normalizeFn` - Function to normalize strings (default: `normalizeString`) + * - `isExactMatch` | `isPrefixMatch` | `isSuffixMatch` | `isContainsMatch` | `isFuzzyMatch` - Enable specific match modes + * - `deduplicateByNormalized` - If true, deduplicate results by normalized value instead of original string (default: false) + */ +interface FuzzySearchOptions { + maxDistance?: number; + maxResults?: number; + normalizeFn?: (str: string) => string; + isExactMatch?: boolean; + isPrefixMatch?: boolean; + isSuffixMatch?: boolean; + isContainsMatch?: boolean; + isFuzzyMatch?: boolean; + deduplicateByNormalized?: boolean; +} + +/** + * Internal lightweight normalization: trim, lowercase, remove diacritics (a sign/accent character), squash separators + * + * - Functions `findClosest` and `fuzzySearch` use this internally. + * - Can be overridden in the `findClosest` and `fuzzySearch` related options for custom normalization. + * - Function has a `memo` property to allow use as a memoized function. + * + * @param str + */ +const normalizeString: NormalizeString = (str: string) => String(str || '') + .trim() + .toLowerCase() + .normalize('NFKD') + .replace(/[\u0300-\u036f]/g, '') + .replace(/[\s_-]+/g, ' ') + .replace(/\s+/g, ' '); + +/** + * Memoized version of normalizeString + */ +normalizeString.memo = memo(normalizeString, { cacheLimit: 25 }); + +/** + * Find the closest match using fastest-levenshtein's closest function. + * + * - Returns the **first** original item whose normalized value equals the best normalized candidate. + * - If multiple items normalize to the same value, only the first occurrence in the array is returned. + * - For multiple matches, use `fuzzySearch` instead. + * - Null/undefined items are normalized to empty strings to prevent runtime errors. + * + * @param query - Search query string + * @param items - Array of strings to search + * @param {ClosestSearchOptions} options - Search configuration options + * @returns {string | null} Closest matching string or null + * + * @example + * ```typescript + * const result = findClosest('button', ['Button', 'ButtonGroup', 'Badge']); + * // Returns: 'Button' (the closest match) + * ``` + */ +const findClosest = ( + query: string, + items: string[] = [], + { + normalizeFn = normalizeString.memo + }: ClosestSearchOptions = {} +) => { + const normalizedQuery = normalizeFn(query); + + if (!normalizedQuery || !Array.isArray(items) || items.length === 0) { + return null; + } + + const normalizedItems = items.map(item => (item ? normalizeFn(item) : '')); + const closestMatch = closest(normalizedQuery, normalizedItems); + + return items[normalizedItems.indexOf(closestMatch)] || null; +}; + +/** + * Fuzzy search using fastest-levenshtein + * + * - Exact/prefix/suffix/contains are evaluated first with constant distances (0/1/1/2). + * - Fuzzy distance is computed only when earlier classifications fail and only when the + * string length delta is within `maxDistance` (cheap lower-bound check). + * - Global filter: result included only if its type is enabled AND distance <= maxDistance. + * - Negative `maxDistance` values intentionally filter out all results, including exact matches. + * - Empty-query fallback is allowed when `isFuzzyMatch` is true (items with length <= maxDistance can match). + * + * @param query - Search query string + * @param items - Array of strings to search + * @param {FuzzySearchOptions} options - Search configuration options + * @returns {FuzzySearchResult[]} Array of matching strings with distance and match type + * + * @example + * ```typescript + * const results = fuzzySearch('button', ['Button', 'ButtonGroup', 'Badge'], { + * maxDistance: 3, + * maxResults: 5 + * }); + * // Returns: [{ item: 'Button', distance: 0, matchType: 'exact' }, ...] + * ``` + */ +const fuzzySearch = ( + query: string, + items: string[] = [], + { + maxDistance = 3, + maxResults = 10, + normalizeFn = normalizeString.memo, + isExactMatch = true, + isPrefixMatch = true, + isSuffixMatch = true, + isContainsMatch = true, + isFuzzyMatch = false, + deduplicateByNormalized = false + }: FuzzySearchOptions = {} +): FuzzySearchResult[] => { + const normalizedQuery = normalizeFn(query); + const seenItem = new Set(); + const results: FuzzySearchResult[] = []; + + items?.forEach(item => { + const normalizedItem = normalizeFn(item); + const deduplicationKey = deduplicateByNormalized ? normalizedItem : item; + + if (seenItem.has(deduplicationKey)) { + return; + } + + seenItem.add(deduplicationKey); + let editDistance = 0; + let matchType: FuzzySearchResult['matchType'] | undefined; + + if (normalizedItem === normalizedQuery) { + matchType = 'exact'; + } else if (normalizedQuery !== '' && normalizedItem.startsWith(normalizedQuery)) { + matchType = 'prefix'; + editDistance = 1; + } else if (normalizedQuery !== '' && normalizedItem.endsWith(normalizedQuery)) { + matchType = 'suffix'; + editDistance = 1; + } else if (normalizedQuery !== '' && normalizedItem.includes(normalizedQuery)) { + matchType = 'contains'; + editDistance = 2; + } else if (isFuzzyMatch && Math.abs(normalizedItem.length - normalizedQuery.length) <= maxDistance) { + matchType = 'fuzzy'; + editDistance = distance(normalizedQuery, normalizedItem); + } + + if (matchType === undefined) { + return; + } + + const isIncluded = (matchType === 'exact' && isExactMatch) || + (matchType === 'prefix' && isPrefixMatch) || + (matchType === 'suffix' && isSuffixMatch) || + (matchType === 'contains' && isContainsMatch) || + (matchType === 'fuzzy' && isFuzzyMatch); + + if (editDistance <= maxDistance && isIncluded) { + results.push({ + item, + distance: editDistance, + matchType + }); + } + }); + + // Sort by distance (lowest first), then alphabetically + results.sort((a, b) => { + if (a.distance !== b.distance) { + return a.distance - b.distance; + } + + return a.item.localeCompare(b.item); + }); + + return results.slice(0, maxResults); +}; + +export { + normalizeString, + fuzzySearch, + findClosest, + type NormalizeString, + type ClosestSearchOptions, + type FuzzySearchResult, + type FuzzySearchOptions +};