Skip to content

Commit 330a1dc

Browse files
Add camelCase splitting and punctuation normalization to search
1 parent 43a763d commit 330a1dc

File tree

2 files changed

+54
-19
lines changed

2 files changed

+54
-19
lines changed

frontend/src/pages/cohorts.tsx

Lines changed: 31 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -20,17 +20,24 @@ const HighlightedText = ({text, searchTerms, searchMode}: {text: string, searchT
2020
return <span dangerouslySetInnerHTML={{__html: highlightedHtml}} />;
2121
};
2222

23+
// Normalize text for fuzzy matching: split camelCase and replace common separators with spaces
24+
const normalizeText = (text: string): string =>
25+
text
26+
.replace(/([a-z])([A-Z])/g, '$1 $2') // camelCase: aB → a B
27+
.replace(/([A-Z]+)([A-Z][a-z])/g, '$1 $2') // acronym boundary: ABc → A Bc
28+
.replace(/[_\-.,]/g, ' ');
29+
2330
// Helper function to check if text matches search terms (reusable)
2431
const matchesSearchTerms = (text: string | null | undefined, searchTerms: string[], searchMode: 'or' | 'and' | 'exact'): boolean => {
2532
if (!text || searchTerms.length === 0) return false;
26-
const textLower = String(text).toLowerCase();
33+
const textNorm = normalizeText(String(text).toLowerCase());
2734

2835
if (searchMode === 'exact') {
29-
return textLower.includes(searchTerms.join(' ').toLowerCase());
36+
return textNorm.includes(normalizeText(searchTerms.join(' ').toLowerCase()));
3037
} else if (searchMode === 'and') {
31-
return searchTerms.every(term => textLower.includes(term.toLowerCase()));
38+
return searchTerms.every(term => textNorm.includes(normalizeText(term.toLowerCase())));
3239
} else { // 'or' mode
33-
return searchTerms.some(term => textLower.includes(term.toLowerCase()));
40+
return searchTerms.some(term => textNorm.includes(normalizeText(term.toLowerCase())));
3441
}
3542
};
3643

@@ -119,9 +126,21 @@ const SearchResultsDisplay = React.memo(({cohortsData, searchTerms, searchMode,
119126

120127
// Format cohort metadata results: "CohortA (study objective, morbidity), CohortB (institution)"
121128
const formatCohortResults = () => {
122-
return results.matchedCohorts
123-
.map(({cohortId, sections}) => `${cohortId} (${sections.join(', ')})`)
124-
.join(', ');
129+
return (
130+
<span>
131+
{results.matchedCohorts.map(({cohortId, sections}, idx) => (
132+
<span key={cohortId}>
133+
{cohortId} ({sections.map((s, i) => (
134+
<span key={i}>
135+
<em>{s}</em>
136+
{i < sections.length - 1 && ', '}
137+
</span>
138+
))})
139+
{idx < results.matchedCohorts.length - 1 && ', '}
140+
</span>
141+
))}
142+
</span>
143+
);
125144
};
126145

127146
// Format variable results: "var1, var2 (CohortA); var3 (CohortB)"
@@ -138,7 +157,7 @@ const SearchResultsDisplay = React.memo(({cohortsData, searchTerms, searchMode,
138157
Search matched <strong className="text-primary">{results.matchedCohorts.length}</strong> cohort{results.matchedCohorts.length !== 1 ? 's' : ''} metadata
139158
</span>
140159
{results.matchedCohorts.length > 0 && (
141-
<div className="mt-1 text-xs text-gray-600 dark:text-gray-400">
160+
<div className="mt-1 text-gray-600 dark:text-gray-400">
142161
<strong>Studies metadata:</strong> {formatCohortResults()}
143162
</div>
144163
)}
@@ -153,7 +172,7 @@ const SearchResultsDisplay = React.memo(({cohortsData, searchTerms, searchMode,
153172
Search matched <strong className="text-primary">{results.totalVariables}</strong> variable description{results.totalVariables !== 1 ? 's' : ''} in <strong className="text-primary">{cohortsWithVarMatches}</strong> cohort{cohortsWithVarMatches !== 1 ? 's' : ''}
154173
</span>
155174
{results.totalVariables > 0 && (
156-
<div className="mt-1 text-xs text-gray-600 dark:text-gray-400 max-h-20 overflow-y-auto">
175+
<div className="mt-1 text-gray-600 dark:text-gray-400 max-h-20 overflow-y-auto">
157176
<strong>Variables:</strong> {formatVariableResults()}
158177
</div>
159178
)}
@@ -168,7 +187,7 @@ const SearchResultsDisplay = React.memo(({cohortsData, searchTerms, searchMode,
168187
Search matched <strong className="text-primary">{results.matchedCohorts.length}</strong> cohort{results.matchedCohorts.length !== 1 ? 's' : ''} metadata and <strong className="text-primary">{results.totalVariables}</strong> variable description{results.totalVariables !== 1 ? 's' : ''} in <strong className="text-primary">{cohortsWithVarMatches}</strong> cohort{cohortsWithVarMatches !== 1 ? 's' : ''}
169188
</span>
170189
{(results.matchedCohorts.length > 0 || results.totalVariables > 0) && (
171-
<div className="mt-1 text-xs text-gray-600 dark:text-gray-400 max-h-24 overflow-y-auto">
190+
<div className="mt-1 text-gray-600 dark:text-gray-400 max-h-24 overflow-y-auto">
172191
{results.matchedCohorts.length > 0 && (
173192
<div><strong>Studies metadata:</strong> {formatCohortResults()}</div>
174193
)}
@@ -202,7 +221,7 @@ const EquivalentVariableNames = React.memo(({cohortsData, searchTerms, searchMod
202221

203222
Object.entries(cohortsData).forEach(([_cohortId, cohortData]) => {
204223
Object.entries(cohortData.variables || {}).forEach(([varName, varData]) => {
205-
const nameMatches = matchesSearchTerms(varName, searchTerms, 'and');
224+
const nameMatches = matchesSearchTerms(varName, searchTerms, 'and') || matchesSearchTerms(varData.concept_name, searchTerms, 'and');
206225
if (nameMatches && varData.concept_code) {
207226
const code = varData.concept_code.trim().toUpperCase();
208227
if (code) {
@@ -787,7 +806,7 @@ export default function CohortsList() {
787806

788807
{/* Search Results Display */}
789808
{searchInput.trim() && (
790-
<div className="mt-2 p-2 bg-base-200 rounded-lg text-lg">
809+
<div className="mt-2 p-2 bg-base-200 rounded-lg text-base">
791810
<div className="flex items-start gap-3">
792811
<span className="text-gray-600 dark:text-gray-400 mt-0.5">🔍</span>
793812
<div className="flex-1">

frontend/src/utils/search.ts

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,29 @@ export interface SearchResult {
77
highlightedText?: string;
88
}
99

10+
/**
11+
* Normalize text for fuzzy matching: split camelCase and replace common separators with spaces
12+
*/
13+
const normalizeText = (text: string): string =>
14+
text
15+
.replace(/([a-z])([A-Z])/g, '$1 $2') // camelCase: aB → a B
16+
.replace(/([A-Z]+)([A-Z][a-z])/g, '$1 $2') // acronym boundary: ABc → A Bc
17+
.replace(/[_\-.,]/g, ' ');
18+
1019
/**
1120
* Create a regex pattern for word boundary matching
21+
* Treats common punctuation as word boundaries so e.g. "body_mass" matches "body mass"
1222
* @param term - Search term
1323
* @returns RegExp for word boundary matching
1424
*/
1525
const createWordBoundaryRegex = (term: string): RegExp => {
16-
// Escape special regex characters
17-
const escapedTerm = term.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
18-
// Create word boundary regex (case insensitive)
19-
return new RegExp(`\\b${escapedTerm}`, 'gi');
26+
// Normalize punctuation in the term to spaces, then split into sub-terms
27+
const normalized = normalizeText(term);
28+
// Escape special regex characters in each part
29+
const parts = normalized.split(/\s+/).filter(p => p.length > 0);
30+
// Join with a pattern that matches any word boundary or common punctuation
31+
const pattern = parts.map(p => p.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')).join('[_\\-.,—\\s]*');
32+
return new RegExp(`\\b${pattern}`, 'gi');
2033
};
2134

2235
/**
@@ -29,28 +42,31 @@ const createWordBoundaryRegex = (term: string): RegExp => {
2942
export const matchesSearchTerms = (text: string, searchTerms: string[], searchMode: 'or' | 'and' | 'exact' = 'or'): boolean => {
3043
if (!text || searchTerms.length === 0) return true;
3144

45+
// Normalize punctuation in text for matching
46+
const textNorm = normalizeText(text);
47+
3248
if (searchMode === 'exact') {
3349
// For exact phrase, join terms back and search as single phrase
3450
const fullPhrase = searchTerms.join(' ');
3551
if (!fullPhrase.trim()) return true;
3652
const regex = createWordBoundaryRegex(fullPhrase.trim());
37-
return regex.test(text);
53+
return regex.test(text) || regex.test(textNorm);
3854
}
3955

4056
if (searchMode === 'and') {
4157
// AND logic - all terms must match
4258
return searchTerms.every(term => {
4359
if (!term.trim()) return true;
4460
const regex = createWordBoundaryRegex(term.trim());
45-
return regex.test(text);
61+
return regex.test(text) || regex.test(textNorm);
4662
});
4763
}
4864

4965
// OR logic - any term matches (default)
5066
return searchTerms.some(term => {
5167
if (!term.trim()) return true;
5268
const regex = createWordBoundaryRegex(term.trim());
53-
return regex.test(text);
69+
return regex.test(text) || regex.test(textNorm);
5470
});
5571
};
5672

0 commit comments

Comments
 (0)