Skip to content

Commit 5032e2d

Browse files
authored
Merge pull request #127 from azmeuk/warnings
Fixed a few regex warnings, thanks azmeuk !
2 parents 471d89d + 6c1c639 commit 5032e2d

File tree

3 files changed

+16
-16
lines changed

3 files changed

+16
-16
lines changed

readability/debug.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ def describe(node, depth=1):
4141
return parent + describe_node(node)
4242

4343

44-
RE_COLLAPSE_WHITESPACES = re.compile('\s+', re.U)
44+
RE_COLLAPSE_WHITESPACES = re.compile(r'\s+', re.U)
4545

4646

4747
def text_content(elem, length=40):

readability/encoding.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ def get_encoding(page):
5151

5252
# Fallback to chardet if declared encodings fail
5353
# Remove all HTML tags, and leave only text for chardet
54-
text = re.sub(b'(\s*</?[^>]*>)+\s*', b' ', page).strip()
54+
text = re.sub(br'(\s*</?[^>]*>)+\s*', b' ', page).strip()
5555
enc = 'utf-8'
5656
if len(text) < 10:
5757
return enc # can't guess

readability/readability.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -23,17 +23,17 @@
2323
log = logging.getLogger("readability.readability")
2424

2525
REGEXES = {
26-
'unlikelyCandidatesRe': re.compile('combx|comment|community|disqus|extra|foot|header|menu|remark|rss|shoutbox|sidebar|sponsor|ad-break|agegate|pagination|pager|popup|tweet|twitter', re.I),
27-
'okMaybeItsACandidateRe': re.compile('and|article|body|column|main|shadow', re.I),
28-
'positiveRe': re.compile('article|body|content|entry|hentry|main|page|pagination|post|text|blog|story', re.I),
29-
'negativeRe': re.compile('combx|comment|com-|contact|foot|footer|footnote|masthead|media|meta|outbrain|promo|related|scroll|shoutbox|sidebar|sponsor|shopping|tags|tool|widget', re.I),
30-
'divToPElementsRe': re.compile('<(a|blockquote|dl|div|img|ol|p|pre|table|ul)', re.I),
31-
#'replaceBrsRe': re.compile('(<br[^>]*>[ \n\r\t]*){2,}',re.I),
32-
#'replaceFontsRe': re.compile('<(\/?)font[^>]*>',re.I),
33-
#'trimRe': re.compile('^\s+|\s+$/'),
34-
#'normalizeRe': re.compile('\s{2,}/'),
35-
#'killBreaksRe': re.compile('(<br\s*\/?>(\s|&nbsp;?)*){1,}/'),
36-
'videoRe': re.compile('https?:\/\/(www\.)?(youtube|vimeo)\.com', re.I),
26+
'unlikelyCandidatesRe': re.compile(r'combx|comment|community|disqus|extra|foot|header|menu|remark|rss|shoutbox|sidebar|sponsor|ad-break|agegate|pagination|pager|popup|tweet|twitter', re.I),
27+
'okMaybeItsACandidateRe': re.compile(r'and|article|body|column|main|shadow', re.I),
28+
'positiveRe': re.compile(r'article|body|content|entry|hentry|main|page|pagination|post|text|blog|story', re.I),
29+
'negativeRe': re.compile(r'combx|comment|com-|contact|foot|footer|footnote|masthead|media|meta|outbrain|promo|related|scroll|shoutbox|sidebar|sponsor|shopping|tags|tool|widget', re.I),
30+
'divToPElementsRe': re.compile(r'<(a|blockquote|dl|div|img|ol|p|pre|table|ul)', re.I),
31+
#'replaceBrsRe': re.compile(r'(<br[^>]*>[ \n\r\t]*){2,}',re.I),
32+
#'replaceFontsRe': re.compile(r'<(\/?)font[^>]*>',re.I),
33+
#'trimRe': re.compile(r'^\s+|\s+$/'),
34+
#'normalizeRe': re.compile(r'\s{2,}/'),
35+
#'killBreaksRe': re.compile(r'(<br\s*\/?>(\s|&nbsp;?)*){1,}/'),
36+
'videoRe': re.compile(r'https?:\/\/(www\.)?(youtube|vimeo)\.com', re.I),
3737
#skipFootnoteLink: /^\s*(\[?[a-z0-9]{1,2}\]?|^|edit|citation needed)\s*$/i,
3838
}
3939

@@ -57,8 +57,8 @@ def clean(text):
5757
# Many spaces make the following regexes run forever
5858
text = re.sub(r'\s{255,}', ' ' * 255, text)
5959

60-
text = re.sub('\s*\n\s*', '\n', text)
61-
text = re.sub('\t|[ \t]{2,}', ' ', text)
60+
text = re.sub(r'\s*\n\s*', '\n', text)
61+
text = re.sub(r'\t|[ \t]{2,}', ' ', text)
6262
return text.strip()
6363

6464

@@ -271,7 +271,7 @@ def get_article(self, candidates, best_candidate, html_partial=False):
271271
append = True
272272
elif node_length <= 80 \
273273
and link_density == 0 \
274-
and re.search('\.( |$)', node_content):
274+
and re.search(r'\.( |$)', node_content):
275275
append = True
276276

277277
if append:

0 commit comments

Comments
 (0)