|
| 1 | +From 2a04eb936ccb60af4dfdd523c68b99e0d43e373f Mon Sep 17 00:00:00 2001 |
| 2 | +From: Jamison Lahman < [email protected]> |
| 3 | +Date: Sat, 29 Jul 2023 16:33:16 -0700 |
| 4 | +Subject: [PATCH] [chore] fix "SyntaxError: invalid escape sequence" |
| 5 | + |
| 6 | +--- |
| 7 | + src/whoosh/analysis/filters.py | 4 ++-- |
| 8 | + src/whoosh/analysis/intraword.py | 6 +++--- |
| 9 | + src/whoosh/lang/paicehusk.py | 2 +- |
| 10 | + src/whoosh/lang/porter2.py | 2 +- |
| 11 | + tests/test_analysis.py | 2 +- |
| 12 | + 5 files changed, 8 insertions(+), 8 deletions(-) |
| 13 | + |
| 14 | +diff --git a/src/whoosh/analysis/filters.py b/src/whoosh/analysis/filters.py |
| 15 | +index 3b6f5b47..5cea1480 100644 |
| 16 | +--- a/src/whoosh/analysis/filters.py |
| 17 | ++++ b/src/whoosh/analysis/filters.py |
| 18 | +@@ -53,7 +53,7 @@ |
| 19 | + \\S+? # URL body |
| 20 | + (?=\\s|[.]\\s|$|[.]$) # Stop at space/end, or a dot followed by space/end |
| 21 | + ) | ( # or... |
| 22 | +- \w+([:.]?\w+)* # word characters, with opt. internal colons/dots |
| 23 | ++ \\w+([:.]?\\w+)* # word characters, with opt. internal colons/dots |
| 24 | + ) |
| 25 | + """, verbose=True) |
| 26 | + |
| 27 | +@@ -145,7 +145,7 @@ def __call__(self, tokens): |
| 28 | + |
| 29 | + |
| 30 | + class TeeFilter(Filter): |
| 31 | +- """Interleaves the results of two or more filters (or filter chains). |
| 32 | ++ r"""Interleaves the results of two or more filters (or filter chains). |
| 33 | + |
| 34 | + NOTE: because it needs to create copies of each token for each sub-filter, |
| 35 | + this filter is quite slow. |
| 36 | +diff --git a/src/whoosh/analysis/intraword.py b/src/whoosh/analysis/intraword.py |
| 37 | +index 601423e1..9c1b8831 100644 |
| 38 | +--- a/src/whoosh/analysis/intraword.py |
| 39 | ++++ b/src/whoosh/analysis/intraword.py |
| 40 | +@@ -34,7 +34,7 @@ |
| 41 | + |
| 42 | + |
| 43 | + class CompoundWordFilter(Filter): |
| 44 | +- """Given a set of words (or any object with a ``__contains__`` method), |
| 45 | ++ r"""Given a set of words (or any object with a ``__contains__`` method), |
| 46 | + break any tokens in the stream that are composites of words in the word set |
| 47 | + into their individual parts. |
| 48 | + |
| 49 | +@@ -272,7 +272,7 @@ class IntraWordFilter(Filter): |
| 50 | + >>> iwf_i = IntraWordFilter(mergewords=True, mergenums=True) |
| 51 | + >>> iwf_q = IntraWordFilter(mergewords=False, mergenums=False) |
| 52 | + >>> iwf = MultiFilter(index=iwf_i, query=iwf_q) |
| 53 | +- >>> analyzer = RegexTokenizer(r"\S+") | iwf | LowercaseFilter() |
| 54 | ++ >>> analyzer = RegexTokenizer(r"\\S+") | iwf | LowercaseFilter() |
| 55 | + |
| 56 | + (See :class:`MultiFilter`.) |
| 57 | + """ |
| 58 | +@@ -282,7 +282,7 @@ class IntraWordFilter(Filter): |
| 59 | + __inittypes__ = dict(delims=text_type, splitwords=bool, splitnums=bool, |
| 60 | + mergewords=bool, mergenums=bool) |
| 61 | + |
| 62 | +- def __init__(self, delims=u("-_'\"()!@#$%^&*[]{}<>\|;:,./?`~=+"), |
| 63 | ++ def __init__(self, delims=u("-_'\"()!@#$%^&*[]{}<>\\|;:,./?`~=+"), |
| 64 | + splitwords=True, splitnums=True, |
| 65 | + mergewords=False, mergenums=False): |
| 66 | + """ |
| 67 | +diff --git a/src/whoosh/lang/paicehusk.py b/src/whoosh/lang/paicehusk.py |
| 68 | +index 481c3e40..6aee9066 100644 |
| 69 | +--- a/src/whoosh/lang/paicehusk.py |
| 70 | ++++ b/src/whoosh/lang/paicehusk.py |
| 71 | +@@ -30,7 +30,7 @@ class PaiceHuskStemmer(object): |
| 72 | + (?P<cont>[.>]) |
| 73 | + """, re.UNICODE | re.VERBOSE) |
| 74 | + |
| 75 | +- stem_expr = re.compile("^\w+", re.UNICODE) |
| 76 | ++ stem_expr = re.compile(r"^\w+", re.UNICODE) |
| 77 | + |
| 78 | + def __init__(self, ruletable): |
| 79 | + """ |
| 80 | +diff --git a/src/whoosh/lang/porter2.py b/src/whoosh/lang/porter2.py |
| 81 | +index 4c740473..4d669752 100644 |
| 82 | +--- a/src/whoosh/lang/porter2.py |
| 83 | ++++ b/src/whoosh/lang/porter2.py |
| 84 | +@@ -64,7 +64,7 @@ def remove_initial_apostrophe(word): |
| 85 | + def capitalize_consonant_ys(word): |
| 86 | + if word.startswith('y'): |
| 87 | + word = 'Y' + word[1:] |
| 88 | +- return ccy_exp.sub('\g<1>Y', word) |
| 89 | ++ return ccy_exp.sub(r'\g<1>Y', word) |
| 90 | + |
| 91 | + |
| 92 | + def step_0(word): |
| 93 | +diff --git a/tests/test_analysis.py b/tests/test_analysis.py |
| 94 | +index c46a70db..425415f4 100644 |
| 95 | +--- a/tests/test_analysis.py |
| 96 | ++++ b/tests/test_analysis.py |
| 97 | +@@ -520,7 +520,7 @@ def test_stop_lang(): |
| 98 | + |
| 99 | + |
| 100 | + def test_issue358(): |
| 101 | +- t = analysis.RegexTokenizer("\w+") |
| 102 | ++ t = analysis.RegexTokenizer(r"\w+") |
| 103 | + with pytest.raises(analysis.CompositionError): |
| 104 | + _ = t | analysis.StandardAnalyzer() |
| 105 | + |
0 commit comments