Skip to content

Commit bf0af4f

Browse files
committed
Fixed various regexes by making them more specific
To use atomic groups, the re module was replaced with the regex module. The list of type regexes in stdlib.py was replaced with a single regex since the regex module supports "^" in lookbehind assertions.
1 parent cfb9444 commit bf0af4f

File tree

12 files changed

+124
-104
lines changed

12 files changed

+124
-104
lines changed

wpiformat/setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@
7070
zip_safe=True,
7171
setup_requires=["pytest-runner"],
7272
tests_require=["pytest"],
73-
install_requires=["yapf"],
73+
install_requires=["regex", "yapf"],
7474
license="BSD License",
7575
classifiers=[
7676
"Development Status :: 5 - Production/Stable",

wpiformat/wpiformat/bracecomment.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
namespace declarations matches that of the declaration name.
33
"""
44

5-
import re
5+
import regex
66

77
from wpiformat.task import Task
88

@@ -19,7 +19,7 @@ def run_pipeline(self, config_file, name, lines):
1919
brace_prefix = "(?P<prefix>(extern|namespace)\s+[\w\"]*)"
2020
brace_postfix = "\s*/(/|\*)[^\r\n]*"
2121

22-
brace_regex = re.compile(
22+
brace_regex = regex.compile(
2323
"(" + brace_prefix + "\s*)?{|" # "{" with optional prefix
2424
"\}(" + brace_postfix + ")?") # "}" with optional comment postfix
2525

wpiformat/wpiformat/cidentlist.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
"""This task replaces empty C identifier lists "()" with "(void)"."""
22

3-
import re
3+
import regex
44

55
from wpiformat.task import Task
66

@@ -34,7 +34,7 @@ def run_pipeline(self, config_file, name, lines):
3434
extern_str = "(?P<ext_decl>extern \"C(\+\+)?\")\s+(?P<ext_brace>\{)?|"
3535
braces_str = "\{|\}|;|def\s+\w+|\w+\s+\w+\s*(?P<paren>\(\))"
3636
postfix_str = "(?=\s*(;|\{))"
37-
token_regex = re.compile(extern_str + braces_str + postfix_str)
37+
token_regex = regex.compile(extern_str + braces_str + postfix_str)
3838

3939
EXTRA_POP_OFFSET = 2
4040

wpiformat/wpiformat/config.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
"""This class is for handling wpiformat config files."""
22

33
import os
4-
import re
4+
import regex
55
import sys
66

77

@@ -76,9 +76,9 @@ def regex(self, *args):
7676

7777
if len(group_contents) == 0:
7878
# If regex string is empty, make regex match nothing
79-
return re.compile("a^")
79+
return regex.compile("a^")
8080
else:
81-
return re.compile("|".join(group_contents))
81+
return regex.compile("|".join(group_contents))
8282

8383
def is_c_file(self, name):
8484
"""Returns True if file is either C header or C source file.

wpiformat/wpiformat/cpplint.py

Lines changed: 31 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -48,16 +48,16 @@
4848
import itertools
4949
import math # for log
5050
import os
51-
import re
51+
import regex
5252
import sre_compile
5353
import string
5454
import sys
5555

5656
# if empty, use defaults
57-
_header_regex = re.compile("a^")
57+
_header_regex = regex.compile("a^")
5858

5959
# if empty, use defaults
60-
_source_regex = re.compile("a^")
60+
_source_regex = regex.compile("a^")
6161

6262

6363
# Files which match the regex are considered to be header
@@ -191,7 +191,7 @@ def IsSourceFile(filename):
191191
# hard-coded international strings, which belong in a separate i18n file.
192192

193193
# Type names
194-
_TYPES = re.compile(
194+
_TYPES = regex.compile(
195195
r'^(?:'
196196
# [dcl.type.simple]
197197
r'(char(16_t|32_t)?)|wchar_t|'
@@ -208,11 +208,11 @@ def IsSourceFile(filename):
208208
# - Anything not following google file name conventions (containing an
209209
# uppercase character, such as Python.h or nsStringAPI.h, for example).
210210
# - Lua headers.
211-
_THIRD_PARTY_HEADERS_PATTERN = re.compile(
211+
_THIRD_PARTY_HEADERS_PATTERN = regex.compile(
212212
r'^(?:[^/]*[A-Z][^/]*\.h|lua\.h|lauxlib\.h|lualib\.h)$')
213213

214214
# Pattern that matches only complete whitespace, possibly across multiple lines.
215-
_EMPTY_CONDITIONAL_BODY_PATTERN = re.compile(r'^\s*$', re.DOTALL)
215+
_EMPTY_CONDITIONAL_BODY_PATTERN = regex.compile(r'^\s*$', regex.DOTALL)
216216

217217
# Alternative tokens and their replacements. For full list, see section 2.5
218218
# Alternative tokens [lex.digraph] in the C++ standard.
@@ -238,7 +238,7 @@ def IsSourceFile(filename):
238238
#
239239
# False positives include C-style multi-line comments and multi-line strings
240240
# but those have always been troublesome for cpplint.
241-
_ALT_TOKEN_REPLACEMENT_PATTERN = re.compile(
241+
_ALT_TOKEN_REPLACEMENT_PATTERN = regex.compile(
242242
r'[ =()](' + ('|'.join(_ALT_TOKEN_REPLACEMENT.keys())) + r')(?=[ (]|$)')
243243

244244

@@ -249,12 +249,12 @@ def IsSourceFile(filename):
249249
_BLOCK_ASM = 3 # The whole block is an inline assembly block
250250

251251
# Match start of assembly blocks
252-
_MATCH_ASM = re.compile(r'^\s*(?:asm|_asm|__asm|__asm__)'
252+
_MATCH_ASM = regex.compile(r'^\s*(?:asm|_asm|__asm|__asm__)'
253253
r'(?:\s+(volatile|__volatile__))?'
254254
r'\s*[{(]')
255255

256256
# Match strings that indicate we're working on a C (not C++) file.
257-
_SEARCH_C_FILE = re.compile(r'\b(?:LINT_C_FILE|'
257+
_SEARCH_C_FILE = regex.compile(r'\b(?:LINT_C_FILE|'
258258
r'vim?:\s*.*(\s*|:)filetype=c(\s*|:|$))')
259259

260260
_regexp_compile_cache = {}
@@ -532,7 +532,7 @@ def Error(filename, linenum, category, confidence, message):
532532
sys.stderr.write(final_message)
533533

534534
# Matches standard C++ escape sequences per 2.13.2.3 of the C++ standard.
535-
_RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile(
535+
_RE_PATTERN_CLEANSE_LINE_ESCAPES = regex.compile(
536536
r'\\([abfnrtv?"\\\']|\d+|x[0-9a-fA-F]+)')
537537
# Match a single C style comment on the same line.
538538
_RE_PATTERN_C_COMMENTS = r'/\*(?:[^*]|\*(?!/))*\*/'
@@ -544,7 +544,7 @@ def Error(filename, linenum, category, confidence, message):
544544
# end of the line. Otherwise, we try to remove spaces from the right side,
545545
# if this doesn't work we try on left side but only if there's a non-character
546546
# on the right.
547-
_RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile(
547+
_RE_PATTERN_CLEANSE_LINE_C_COMMENTS = regex.compile(
548548
r'(\s*' + _RE_PATTERN_C_COMMENTS + r'\s*$|' +
549549
_RE_PATTERN_C_COMMENTS + r'\s+|' +
550550
r'\s+' + _RE_PATTERN_C_COMMENTS + r'(?=\W)|' +
@@ -1117,7 +1117,7 @@ def CheckForMultilineCommentsAndStrings(filename, clean_lines, linenum, error):
11171117

11181118
# Matches invalid increment: *count++, which moves pointer instead of
11191119
# incrementing a value.
1120-
_RE_PATTERN_INVALID_INCREMENT = re.compile(
1120+
_RE_PATTERN_INVALID_INCREMENT = regex.compile(
11211121
r'^\s*\*\w+(\+\+|--);')
11221122

11231123

@@ -1309,7 +1309,7 @@ def CheckEnd(self, filename, clean_lines, linenum, error):
13091309
if self.name:
13101310
# Named namespace
13111311
if not Match((r'^\s*};*\s*(//|/\*).*\bnamespace\s+' +
1312-
re.escape(self.name) + r'[\*/\.\\\s]*$'),
1312+
regex.escape(self.name) + r'[\*/\.\\\s]*$'),
13131313
line):
13141314
error(filename, linenum, 'readability/namespace', 5,
13151315
'Namespace should be terminated with "// namespace %s"' %
@@ -1766,7 +1766,7 @@ def CheckForNonStandardConstructs(filename, clean_lines, linenum,
17661766
explicit_constructor_match = Match(
17671767
r'\s+(?:inline\s+)?(explicit\s+)?(?:inline\s+)?%s\s*'
17681768
r'\(((?:[^()]|\([^()]*\))*)\)'
1769-
% re.escape(base_classname),
1769+
% regex.escape(base_classname),
17701770
line)
17711771

17721772
if explicit_constructor_match:
@@ -1810,7 +1810,7 @@ def CheckForNonStandardConstructs(filename, clean_lines, linenum,
18101810
copy_constructor = bool(
18111811
onearg_constructor and
18121812
Match(r'(const\s+)?%s(\s*<[^>]*>)?(\s+const)?\s*(?:<\w+>\s*)?&'
1813-
% re.escape(base_classname), constructor_args[0].strip()))
1813+
% regex.escape(base_classname), constructor_args[0].strip()))
18141814

18151815
if (not is_marked_explicit and
18161816
onearg_constructor and
@@ -1912,7 +1912,7 @@ def CheckForFunctionLengths(filename, clean_lines, linenum,
19121912
function_state.Count() # Count non-blank/non-comment lines.
19131913

19141914

1915-
_RE_PATTERN_TODO = re.compile(r'^//(\s*)TODO(\(.+?\))?:?(\s|$)?')
1915+
_RE_PATTERN_TODO = regex.compile(r'^//(\s*)TODO(\(.+?\))?:?(\s|$)?')
19161916

19171917

19181918
def CheckComment(line, filename, linenum, next_line_start, error):
@@ -1928,7 +1928,7 @@ def CheckComment(line, filename, linenum, next_line_start, error):
19281928
commentpos = line.find('//')
19291929
if commentpos != -1:
19301930
# Check if the // may be in quotes. If so, ignore it
1931-
if re.sub(r'\\.', '', line[0:commentpos]).count('"') % 2 == 0:
1931+
if regex.sub(r'\\.', '', line[0:commentpos]).count('"') % 2 == 0:
19321932
# Checks for common mistakes in TODO comments.
19331933
comment = line[commentpos:]
19341934
match = _RE_PATTERN_TODO.match(comment)
@@ -2088,7 +2088,7 @@ def _IsType(clean_lines, nesting_state, expr):
20882088
# Try a bit harder to match templated types. Walk up the nesting
20892089
# stack until we find something that resembles a typename
20902090
# declaration for what we are looking for.
2091-
typename_pattern = (r'\b(?:typename|class|struct)\s+' + re.escape(token) +
2091+
typename_pattern = (r'\b(?:typename|class|struct)\s+' + regex.escape(token) +
20922092
r'\b')
20932093
block_index = len(nesting_state.stack) - 1
20942094
while block_index >= 0:
@@ -2664,13 +2664,13 @@ def CheckStyle(filename, clean_lines, linenum, is_header, nesting_state,
26642664
CheckSectionSpacing(filename, clean_lines, classinfo, linenum, error)
26652665

26662666

2667-
_RE_PATTERN_INCLUDE = re.compile(r'^\s*#\s*include\s*([<"])([^>"]*)[>"].*$')
2667+
_RE_PATTERN_INCLUDE = regex.compile(r'^\s*#\s*include\s*([<"])([^>"]*)[>"].*$')
26682668
# Matches the first component of a filename delimited by -s and _s. That is:
26692669
# _RE_FIRST_COMPONENT.match('foo').group(0) == 'foo'
26702670
# _RE_FIRST_COMPONENT.match('foo.cc').group(0) == 'foo'
26712671
# _RE_FIRST_COMPONENT.match('foo-bar_baz.cc').group(0) == 'foo'
26722672
# _RE_FIRST_COMPONENT.match('foo_bar-baz.cc').group(0) == 'foo'
2673-
_RE_FIRST_COMPONENT = re.compile(r'^[^-_.]+')
2673+
_RE_FIRST_COMPONENT = regex.compile(r'^[^-_.]+')
26742674

26752675

26762676
def CheckIncludeLine(filename, clean_lines, linenum, include_state, error):
@@ -2730,7 +2730,7 @@ def _GetTextInside(text, start_pattern):
27302730
closing_punctuation = set(itervalues(matching_punctuation))
27312731

27322732
# Find the position to start extracting text.
2733-
match = re.search(start_pattern, text, re.M)
2733+
match = regex.search(start_pattern, text, regex.M)
27342734
if not match: # start_pattern not found in text.
27352735
return None
27362736
start_position = match.end(0)
@@ -2774,7 +2774,7 @@ def _GetTextInside(text, start_pattern):
27742774
r'\s*<(?:<(?:<[^<>]*>|[^<>])*>|[^<>])*>|'
27752775
r'::)+')
27762776
# A call-by-reference parameter ends with '& identifier'.
2777-
_RE_PATTERN_REF_PARAM = re.compile(
2777+
_RE_PATTERN_REF_PARAM = regex.compile(
27782778
r'(' + _RE_PATTERN_TYPE + r'(?:\s*(?:\bconst\b|[*]))*\s*'
27792779
r'&\s*' + _RE_PATTERN_IDENT + r')\s*(?:=[^,()]+)?[,)]')
27802780
# A call-by-const-reference parameter either ends with 'const& identifier'
@@ -2868,8 +2868,8 @@ def CheckLanguage(filename, clean_lines, linenum, is_header,
28682868
if printf_args:
28692869
match = Match(r'([\w.\->()]+)$', printf_args)
28702870
if match and match.group(1) != '__VA_ARGS__':
2871-
function_name = re.search(r'\b((?:string)?printf)\s*\(',
2872-
line, re.I).group(1)
2871+
function_name = regex.search(r'\b((?:string)?printf)\s*\(',
2872+
line, regex.I).group(1)
28732873
error(filename, linenum, 'runtime/printf', 4,
28742874
'Potential format string bug. Do %s("%%s", %s) instead.'
28752875
% (function_name, match.group(1)))
@@ -2888,7 +2888,7 @@ def CheckLanguage(filename, clean_lines, linenum, is_header,
28882888
# Split the size using space and arithmetic operators as delimiters.
28892889
# If any of the resulting tokens are not compile time constants then
28902890
# report the error.
2891-
tokens = re.split(r'\s|\+|\-|\*|\/|<<|>>]', match.group(3))
2891+
tokens = regex.split(r'\s|\+|\-|\*|\/|<<|>>]', match.group(3))
28922892
is_const = True
28932893
skip_next = False
28942894
for tok in tokens:
@@ -3227,15 +3227,15 @@ def ExpectingFunctionArgs(clean_lines, linenum):
32273227
('<utility>', ('forward', 'make_pair', 'move', 'swap')),
32283228
)
32293229

3230-
_RE_PATTERN_STRING = re.compile(r'\bstring\b')
3230+
_RE_PATTERN_STRING = regex.compile(r'\bstring\b')
32313231

32323232
_re_pattern_headers_maybe_templates = []
32333233
for _header, _templates in _HEADERS_MAYBE_TEMPLATES:
32343234
for _template in _templates:
32353235
# Match max<type>(..., ...), max(..., ...), but not foo->max, foo.max or
32363236
# type::max().
32373237
_re_pattern_headers_maybe_templates.append(
3238-
(re.compile(r'[^>.]\b' + _template + r'(<.*?>)?\([^\)]'),
3238+
(regex.compile(r'[^>.]\b' + _template + r'(<.*?>)?\([^\)]'),
32393239
_template,
32403240
_header))
32413241

@@ -3244,7 +3244,7 @@ def ExpectingFunctionArgs(clean_lines, linenum):
32443244
for _header, _templates in _HEADERS_CONTAINING_TEMPLATES:
32453245
for _template in _templates:
32463246
_re_pattern_templates.append(
3247-
(re.compile(r'(\<|\b)' + _template + r'\s*\<'),
3247+
(regex.compile(r'(\<|\b)' + _template + r'\s*\<'),
32483248
_template + '<>',
32493249
_header))
32503250

@@ -3422,7 +3422,7 @@ def CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error,
34223422
'Add #include ' + required_header_unstripped + ' for ' + template)
34233423

34243424

3425-
_RE_PATTERN_EXPLICIT_MAKEPAIR = re.compile(r'\bmake_pair\s*<')
3425+
_RE_PATTERN_EXPLICIT_MAKEPAIR = regex.compile(r'\bmake_pair\s*<')
34263426

34273427

34283428
def CheckMakePairUsesDeduction(filename, clean_lines, linenum, error):
@@ -3694,13 +3694,13 @@ def ParseArguments(args):
36943694
elif opt == '--srcs':
36953695
global _source_regex
36963696
try:
3697-
_source_regex = re.compile(val)
3697+
_source_regex = regex.compile(val)
36983698
except ValueError:
36993699
PrintUsage('Extensions must be comma seperated list.')
37003700
elif opt == '--headers':
37013701
global _header_regex
37023702
try:
3703-
_header_regex = re.compile(val)
3703+
_header_regex = regex.compile(val)
37043704
except ValueError:
37053705
PrintUsage('Extensions must be comma seperated list.')
37063706

wpiformat/wpiformat/includeguard.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
"""
44

55
import os
6-
import re
6+
import regex
77

88
from enum import Enum
99
from wpiformat.task import Task
@@ -26,8 +26,8 @@ def run_pipeline(self, config_file, name, lines):
2626
output_list = lines_list
2727

2828
state = State.FINDING_IFNDEF
29-
ifndef_regex = re.compile("#ifndef \w+", re.ASCII)
30-
define_regex = re.compile("#define \w+", re.ASCII)
29+
ifndef_regex = regex.compile("#ifndef \w+", regex.ASCII)
30+
define_regex = regex.compile("#define \w+", regex.ASCII)
3131

3232
if_preproc_count = 0
3333
for i in range(len(lines_list)):
@@ -89,8 +89,9 @@ def make_include_guard(self, config_file, name):
8989
for include_root in include_roots:
9090
if name.startswith(include_root):
9191
guard_path += name[len(include_root):]
92-
return re.sub("[^a-zA-Z0-9]", "_", guard_path).upper() + "_"
92+
return regex.sub("[^a-zA-Z0-9]", "_",
93+
guard_path).upper() + "_"
9394

9495
# No include guard roots matched, so append full name
9596
guard_path += name
96-
return re.sub("[^a-zA-Z0-9]", "_", guard_path).upper() + "_"
97+
return regex.sub("[^a-zA-Z0-9]", "_", guard_path).upper() + "_"

wpiformat/wpiformat/includeorder.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
"""This task sorts C/C++ includes."""
22

33
import os
4-
import re
4+
import regex
55

66
from wpiformat.task import Task
77

@@ -34,7 +34,7 @@ def __init__(self):
3434
]
3535

3636
# Header type 1: C system headers
37-
self.c_sys_regex = re.compile("<[a-z][A-Za-z0-9/_-]*\.h>")
37+
self.c_sys_regex = regex.compile("<[a-z][A-Za-z0-9/_-]*\.h>")
3838

3939
# Header type 2: C++ standard library headers
4040
self.cpp_std = [
@@ -61,13 +61,13 @@ def __init__(self):
6161
#
6262
# Header type 4: Project headers
6363
# They use double quotes (all other headers)
64-
self.header_regex = re.compile("(?P<comment>//\s*)?"
65-
"\#include\s*"
66-
"(?P<header>"
67-
"(?P<open_bracket><|\")"
68-
"(?P<name>.*)"
69-
"(?P<close_bracket>>|\"))"
70-
"(?P<postfix>.*)")
64+
self.header_regex = regex.compile("(?P<comment>//\s*)?"
65+
"\#include\s*"
66+
"(?P<header>"
67+
"(?P<open_bracket><|\")"
68+
"(?P<name>[^>\"]*)"
69+
"(?P<close_bracket>>|\"))"
70+
"(?P<postfix>.*)$")
7171

7272
def should_process_file(self, config_file, name):
7373
return config_file.is_c_file(name) or config_file.is_cpp_file(name)
@@ -343,7 +343,7 @@ def run_pipeline(self, config_file, name, lines):
343343
"includeOtherLibs", "includeProject"
344344
]:
345345
regex_str = config_file.regex(group)
346-
self.override_regexes.append(re.compile(regex_str))
346+
self.override_regexes.append(regex.compile(regex_str))
347347

348348
self.linesep = Task.get_linesep(lines)
349349

0 commit comments

Comments
 (0)