Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 16 additions & 8 deletions web/pdf_find_controller.js
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ const CHARACTERS_TO_NORMALIZE = {
"\u00BE": "3/4", // Vulgar fraction three quarters
};

const escapeForRegex = str => str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");

// These diacritics aren't considered as combining diacritics
// when searching in a document:
// https://searchfox.org/mozilla-central/source/intl/unicharutil/util/is_combining_diacritic.py.
Expand Down Expand Up @@ -78,7 +80,7 @@ let DIACRITICS_EXCEPTION_STR; // Lazily initialized, see below.

const DIACRITICS_REG_EXP = /\p{M}+/gu;
const SPECIAL_CHARS_REG_EXP =
/([.*+?^${}()|[\]\\])|(\p{P})|(\s+)|(\p{M})|(\p{L})/gu;
/([*+^${}()|[\]\\])|((?:[.?]|\p{P})+)|(\s+)|(\p{M})|(\p{L})/gu;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do you want to not capture . or ? ?

const NOT_DIACRITIC_FROM_END_REG_EXP = /([^\p{M}])\p{M}*$/u;
const NOT_DIACRITIC_FROM_START_REG_EXP = /^\p{M}*([^\p{M}])/u;

Expand Down Expand Up @@ -708,6 +710,11 @@ class PDFFindController {
#convertToRegExpString(query, hasDiacritics) {
const { matchDiacritics } = this.#state;
let isUnicode = false;
const rawQuery = this._rawQuery ?? this.#state?.query;
const queryHasWhitespace =
typeof rawQuery === "string"
? /\s/.test(rawQuery)
: Array.isArray(rawQuery) && rawQuery.some(q => /\s/.test(q));
query = query.replaceAll(
SPECIAL_CHARS_REG_EXP,
(
Expand All @@ -718,16 +725,18 @@ class PDFFindController {
p4 /* diacritics */,
p5 /* letters */
) => {
// We don't need to use a \s for whitespaces since all the different
// kind of whitespaces are replaced by a single " ".
// We don't need \s because all whitespace is normalized to a single " ".

if (p1) {
// Escape characters like *+?... to not interfere with regexp syntax.
return `[ ]*\\${p1}[ ]*`;
// Escaped metacharacters like . * + ? ...
// Allow spaces around them ONLY if the user typed spaces.
return queryHasWhitespace ? `[ ]*\\${p1}[ ]*` : `\\${p1}`;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do you want to do that ?

}
if (p2) {
// Allow whitespaces around punctuation signs.
return `[ ]*${p2}[ ]*`;
// Punctuation: allow optional spaces ONLY if the user typed spaces.
// p2 is a *run* of punctuation; escape it as a whole.
const escapedRun = escapeForRegex(p2);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You just have to replace the . and the ? by . and ?

return queryHasWhitespace ? `[ ]*${escapedRun}[ ]*` : `${escapedRun}`;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Again I don't understand why you want to make a difference between hasWhitespace and !hasWhitespace.

}
if (p3) {
// Replace spaces by \s+ to be sure to match any spaces.
Expand Down Expand Up @@ -906,7 +915,6 @@ class PDFFindController {
.then(
textContent => {
const strBuf = [];

for (const textItem of textContent.items) {
strBuf.push(textItem.str);
if (textItem.hasEOL) {
Expand Down