From 2bcfd0178431c7ebf9c342f149ccc173cc52f5f6 Mon Sep 17 00:00:00 2001 From: Shree Date: Tue, 14 Oct 2025 18:13:40 +0530 Subject: [PATCH 1/2] fix(find): respect spaces when query has no whitespace --- web/pdf_find_controller.js | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/web/pdf_find_controller.js b/web/pdf_find_controller.js index 19abf3fca7dad..5f5f00c5a49a2 100644 --- a/web/pdf_find_controller.js +++ b/web/pdf_find_controller.js @@ -708,6 +708,11 @@ class PDFFindController { #convertToRegExpString(query, hasDiacritics) { const { matchDiacritics } = this.#state; let isUnicode = false; + const rawQuery = this._rawQuery ?? this.#state?.query; + const queryHasWhitespace = + typeof rawQuery === "string" + ? /\s/.test(rawQuery) + : Array.isArray(rawQuery) && rawQuery.some(q => /\s/.test(q)); query = query.replaceAll( SPECIAL_CHARS_REG_EXP, ( @@ -718,16 +723,16 @@ class PDFFindController { p4 /* diacritics */, p5 /* letters */ ) => { - // We don't need to use a \s for whitespaces since all the different - // kind of whitespaces are replaced by a single " ". + // We don't need \s because all whitespace is normalized to a single " ". if (p1) { - // Escape characters like *+?... to not interfere with regexp syntax. - return `[ ]*\\${p1}[ ]*`; + // Escaped metacharacters like . * + ? ... + // Allow spaces around them ONLY if the user typed spaces. + return queryHasWhitespace ? `[ ]*\\${p1}[ ]*` : `\\${p1}`; } if (p2) { - // Allow whitespaces around punctuation signs. - return `[ ]*${p2}[ ]*`; + // Punctuation: allow optional spaces ONLY if the user typed spaces. + return queryHasWhitespace ? `[ ]*${p2}[ ]*` : `${p2}`; } if (p3) { // Replace spaces by \s+ to be sure to match any spaces. @@ -906,7 +911,6 @@ class PDFFindController { .then( textContent => { const strBuf = []; - for (const textItem of textContent.items) { strBuf.push(textItem.str); if (textItem.hasEOL) { From 1186ae81a7b71253ead6626691b3b7dfe119fae3 Mon Sep 17 00:00:00 2001 From: Shree Date: Thu, 16 Oct 2025 13:05:50 +0530 Subject: [PATCH 2/2] Handleded for this test case - o. w --- web/pdf_find_controller.js | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/web/pdf_find_controller.js b/web/pdf_find_controller.js index 5f5f00c5a49a2..724a55a24fc25 100644 --- a/web/pdf_find_controller.js +++ b/web/pdf_find_controller.js @@ -45,6 +45,8 @@ const CHARACTERS_TO_NORMALIZE = { "\u00BE": "3/4", // Vulgar fraction three quarters }; +const escapeForRegex = str => str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + // These diacritics aren't considered as combining diacritics // when searching in a document: // https://searchfox.org/mozilla-central/source/intl/unicharutil/util/is_combining_diacritic.py. @@ -78,7 +80,7 @@ let DIACRITICS_EXCEPTION_STR; // Lazily initialized, see below. const DIACRITICS_REG_EXP = /\p{M}+/gu; const SPECIAL_CHARS_REG_EXP = - /([.*+?^${}()|[\]\\])|(\p{P})|(\s+)|(\p{M})|(\p{L})/gu; + /([*+^${}()|[\]\\])|((?:[.?]|\p{P})+)|(\s+)|(\p{M})|(\p{L})/gu; const NOT_DIACRITIC_FROM_END_REG_EXP = /([^\p{M}])\p{M}*$/u; const NOT_DIACRITIC_FROM_START_REG_EXP = /^\p{M}*([^\p{M}])/u; @@ -732,7 +734,9 @@ class PDFFindController { } if (p2) { // Punctuation: allow optional spaces ONLY if the user typed spaces. - return queryHasWhitespace ? `[ ]*${p2}[ ]*` : `${p2}`; + // p2 is a *run* of punctuation; escape it as a whole. + const escapedRun = escapeForRegex(p2); + return queryHasWhitespace ? `[ ]*${escapedRun}[ ]*` : `${escapedRun}`; } if (p3) { // Replace spaces by \s+ to be sure to match any spaces.