ui: Improve fuzzy matching on camelcase strings by splitting them into tokens (#5054)

stevegolton · web-flow · commit bb324c7a6bb7 · 2026-03-09T14:51:04.000Z
Fuzzy currently doesn't work well for CamelCase strings e.g. searching
for 'case' would not match 'CamelCase', because MiniSearch works on
tokens but only splits on spaces and punctuation (e.g. underscores).

This PR adds a new tokenizer that splits CamelCase strings as well e.g.
'Camel' and 'Case'.
diff --git a/ui/src/base/fuzzy.ts b/ui/src/base/fuzzy.ts
@@ -42,7 +42,9 @@ export class FuzzyFinder<T> {
     const docs = items.map((item, i) => ({id: i, text: keyLookup(item)}));
     this.miniSearch = new MiniSearch({
       fields: ['text'],
+      tokenize: camelCaseTokenize,
       searchOptions: {
+        tokenize: camelCaseTokenize,
         // Allow 1 edit for short terms, ~20% for longer ones.
         fuzzy: (term: string) =>
           term.length <= 3 ? 1 : Math.ceil(term.length * 0.2),
@@ -73,6 +75,24 @@ export class FuzzyFinder<T> {
   }
 }
 
+// Tokenize text by splitting on whitespace/punctuation AND camelCase boundaries.
+// E.g. "dev.perfetto.LiveMemory" -> ["dev", "perfetto", "Live", "Memory"]
+// This allows searching for "memory" to match "LiveMemory".
+function camelCaseTokenize(text: string): string[] {
+  // First split on non-alphanumeric characters (dots, spaces, underscores, etc.)
+  const coarseTokens = text.split(/[^a-zA-Z0-9]+/).filter(Boolean);
+  const tokens: string[] = [];
+  for (const token of coarseTokens) {
+    // Split camelCase: insert boundary before uppercase letter preceded by
+    // a lowercase letter, or before an uppercase letter followed by a
+    // lowercase letter when preceded by uppercase (e.g. "XMLParser" ->
+    // ["XML", "Parser"]).
+    const parts = token.split(/(?<=[a-z])(?=[A-Z])|(?<=[A-Z])(?=[A-Z][a-z])/);
+    tokens.push(...parts);
+  }
+  return tokens;
+}
+
 // Given a query (possibly multi-word) and candidate text, compute highlight
 // segments. Each query token is first tried as a substring match, then falls
 // back to sequential character matching.
diff --git a/ui/src/base/fuzzy_unittest.ts b/ui/src/base/fuzzy_unittest.ts
@@ -83,6 +83,38 @@ describe('FuzzyFinder', () => {
   });
 });
 
+describe('FuzzyFinder camelCase tokenization', () => {
+  const items = [
+    'dev.perfetto.LiveMemory',
+    'dev.perfetto.RecordTraceV2',
+    'com.android.XMLParser',
+  ];
+  const finder = new FuzzyFinder(items, (x) => x);
+
+  it('finds camelCase sub-word', () => {
+    const result = finder.find('memory');
+    expect(result).toEqual(
+      expect.arrayContaining([
+        expect.objectContaining({item: 'dev.perfetto.LiveMemory'}),
+      ]),
+    );
+  });
+
+  it('finds dotted segment', () => {
+    const result = finder.find('perfetto');
+    expect(result.length).toBeGreaterThanOrEqual(2);
+  });
+
+  it('finds uppercase acronym split', () => {
+    const result = finder.find('parser');
+    expect(result).toEqual(
+      expect.arrayContaining([
+        expect.objectContaining({item: 'com.android.XMLParser'}),
+      ]),
+    );
+  });
+});
+
 test('fuzzyMatch', () => {
   expect(fuzzyMatch('foo bar baz', 'foo')).toEqual({
     matches: true,