JuliaDocs · fingolfin · Oct 31, 2025 · Jun 25, 2025 · Jun 25, 2025 · Jun 25, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -17,6 +17,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## Version [v1.13.0] - 2025-06-19
 
+### Changed
+
+* Improved the search tokenizer and custom trimmer to improve search results. ([#2744])
+
 ### Added
 
 * Added new type `RawHTMLHeadContent` to `HTML` format object, which allows to add raw HTML to the head of the HTML output, by passing it as a element in the `assets` keyword argument. ([#2726])
@@ -2129,6 +2133,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 [#2726]: https://github.com/JuliaDocs/Documenter.jl/issues/2726
 [#2729]: https://github.com/JuliaDocs/Documenter.jl/issues/2729
 [#2737]: https://github.com/JuliaDocs/Documenter.jl/issues/2737
+[#2744]: https://github.com/JuliaDocs/Documenter.jl/issues/2744
 [#2748]: https://github.com/JuliaDocs/Documenter.jl/issues/2748
 [#2750]: https://github.com/JuliaDocs/Documenter.jl/issues/2750
 [JuliaLang/julia#36953]: https://github.com/JuliaLang/julia/issues/36953

diff --git a/assets/html/js/search.js b/assets/html/js/search.js
@@ -192,10 +192,10 @@ function worker_function(documenterSearchIndex, documenterBaseURL, filters) {
     processTerm: (term) => {
       let word = stopWords.has(term) ? null : term;
       if (word) {
-        // custom trimmer that doesn't strip @ and !, which are used in julia macro and function names
+        // custom trimmer that doesn't strip special characters `@!+-*/^&|%<>=:.` which are used in julia macro and function names.
         word = word
-          .replace(/^[^a-zA-Z0-9@!]+/, "")
-          .replace(/[^a-zA-Z0-9@!]+$/, "");
+          .replace(/^[^a-zA-Z0-9@!+\-/*^&%|<>._=:]+/, "")
+          .replace(/[^a-zA-Z0-9@!+\-/*^&%|<>._=:]+$/, "");
 
         word = word.toLowerCase();
       }
@@ -204,7 +204,52 @@ function worker_function(documenterSearchIndex, documenterBaseURL, filters) {
     },
     // add . as a separator, because otherwise "title": "Documenter.Anchors.add!", would not
     // find anything if searching for "add!", only for the entire qualification
-    tokenize: (string) => string.split(/[\s\-\.]+/),
+    tokenize: (string) => {
+      const tokens = [];
+      let remaining = string;
+
+      // julia specific patterns
+      const patterns = [
+        // Module qualified names (e.g., Base.sort, Module.Submodule. function)
+        /\b[A-Za-z0-9_1*(?:\.[A-Z][A-Za-z0-9_1*)*\.[a-z_][A-Za-z0-9_!]*\b/g,
+        // Macro calls (e.g., @time, @async)
+        /@[A-Za-z0-9_]*/g,
+        // Type parameters (e.g., Array{T,N}, Vector{Int})
+        /\b[A-Za-z0-9_]*\{[^}]+\}/g,
+        // Function names with module qualification (e.g., Base.+, Base.:^)
+        /\b[A-Za-z0-9_]*\.:[A-Za-z0-9_!+\-*/^&|%<>=.]+/g,
+        // Operators as complete tokens (e.g., !=, aã, ||, ^, .=, →)
+        /[!<>=+\-*/^&|%:.]+/g,
+        // Function signatures with type annotations (e.g., f(x::Int))
+        /\b[A-Za-z0-9_!]*\([^)]*::[^)]*\)/g,
+        // Numbers (integers, floats,scientific notation)
+        /\b\d+(?:\.\d+)? (?:[eE][+-]?\d+)?\b/g,
+      ];
+
+      // apply patterns in order of specificity
+      for (const pattern of patterns) {
+        pattern.lastIndex = 0; //reset regex state
+        let match;
+        while ((match = pattern.exec(remaining)) != null) {
+          const token = match[0].trim();
+          if (token && !tokens.includes(token)) {
+            tokens.push(token);
+          }
+        }
+      }
+
+      // splitting the content if something remains
+      const basicTokens = remaining
+        .split(/[\s\-,;()[\]{}]+/)
+        .filter((t) => t.trim());
+      for (const token of basicTokens) {
+        if (token && !tokens.includes(token)) {
+          tokens.push(token);
+        }
+      }
+
+      return tokens.filter((token) => token.length > 0);
+    },
     // options which will be applied during the search
     searchOptions: {
       prefix: true,
@@ -327,6 +372,35 @@ function worker_function(documenterSearchIndex, documenterBaseURL, filters) {
     return result_div;
   }
 
+  function calculateCustomScore(result, query) {
+    const titleLower = result.title.toLowerCase();
+    const queryLower = query.toLowerCase();
+
+    // Tier 1 : Exact title match
+    if (titleLower == queryLower) {
+      return 10000 + result.score;
+    }
+
+    // Tier 2 : Title contains exact query
+    if (titleLower.includes(queryLower)) {
+      const position = titleLower.indexOf(queryLower);
+      // prefer matches at the beginning
+      return 5000 + result.score - position * 10;
+    }
+
+    // Tier 3 : All query words in title
+    const queryWords = queryLower.trim().split(/\s+/);
+    const titleWords = titleLower.trim().split(/\s+/);
+    const allWordsInTitle = queryWords.every((qw) =>
+      titleWords.some((tw) => tw.includes(qw)),
+    );
+    if (allWordsInTitle) {
+      return 2000 + result.score;
+    }
+
+    return result.score;
+  }
+
   self.onmessage = function (e) {
     let query = e.data;
     let results = index.search(query, {
@@ -337,6 +411,15 @@ function worker_function(documenterSearchIndex, documenterBaseURL, filters) {
       combineWith: "AND",
     });
 
+    // calculate custom scores for all results
+    results = results.map((result) => ({
+      ...result,
+      customScore: calculateCustomScore(result, query),
+    }));
+
+    // sort by custom score in descending order
+    results.sort((a, b) => b.customScore - a.customScore);
+
     // Pre-filter to deduplicate and limit to 200 per category to the extent
     // possible without knowing what the filters are.
     let filtered_results = [];

diff --git a/test/search/wrapper.js b/test/search/wrapper.js
@@ -21,12 +21,62 @@ const index = new MiniSearch({
     processTerm: (term) => {
         let word = stopWords.has(term) ? null : term;
         if (word) {
-            word = word.replace(/^[^a-zA-Z0-9@!]+/, "").replace(/[^a-zA-Z0-9@!]+$/, "");
-            word = word.toLowerCase();
+          // custom trimmer that doesn't strip (@,!,+, -, *,/,^,&, |, %,<, >, =, :, .) which are used in julia macro,function names and identifiers
+          word = word
+            .replace(/^[^a-zA-Z0-9@!+\-/*^&%|<>._=:]+/, "")
+            .replace(/[^a-zA-Z0-9@!+\-/*^&%|<>._=:]+$/, "");
+
+          word = word.toLowerCase();
         }
+
         return word ?? null;
     },
-    tokenize: (string) => string.split(/[\s\-\.]+/),
+    tokenize: (string) => {
+        const tokens = [];
+        let remaining = string;
+
+        // julia specific patterns
+        const patterns = [
+          // Module qualified names (e.g., Base.sort, Module.Submodule. function)
+          /\b[A-Za-z0-9_1*(?:\.[A-Z][A-Za-z0-9_1*)*\.[a-z_][A-Za-z0-9_!]*\b/g,
+          // Macro calls (e.g., @time, @async)
+          /@[A-Za-z0-9_]*/g,
+          // Type parameters (e.g., Array{T,N}, Vector{Int})
+          /\b[A-Za-z0-9_]*\{[^}]+\}/g,
+          // Function names with module qualification (e.g., Base.+, Base.:^)
+          /\b[A-Za-z0-9_]*\.:[A-Za-z0-9_!+\-*/^&|%<>=.]+/g,
+          // Operators as complete tokens (e.g., !=, aã, ||, ^, .=, →)
+          /[!<>=+\-*/^&|%:.]+/g,
+          // Function signatures with type annotations (e.g., f(x::Int))
+          /\b[A-Za-z0-9_!]*\([^)]*::[^)]*\)/g,
+          // Numbers (integers, floats,scientific notation)
+          /\b\d+(?:\.\d+)? (?:[eE][+-]?\d+)?\b/g,
+        ];
+
+        // apply patterns in order of specificity
+        for (const pattern of patterns) {
+          pattern.lastIndex = 0; //reset regex state
+          let match;
+          while ((match = pattern.exec(remaining)) != null) {
+            const token = match[0].trim();
+            if (token && !tokens.includes(token)) {
+              tokens.push(token);
+            }
+          }
+        }
+
+        // splitting the content if something remains
+        const basicTokens = remaining
+          .split(/[\s\-,;()[\]{}]+/)
+          .filter((t) => t.trim());
+        for (const token of basicTokens) {
+          if (token && !tokens.includes(token)) {
+            tokens.push(token);
+          }
+        }
+
+        return tokens.filter((token) => token.length > 0);
+    },
     searchOptions: { prefix: true, boost: { title: 100 }, fuzzy: 2 }
 });