Skip to content

Commit b256079

Browse files
committed
cidentlist.py and usingdeclaration.py now process comments in strings properly
Previously, comments within strings were resulting in braces after the string being skipped, which broke the cidentlist.py stack.
1 parent 47283df commit b256079

File tree

3 files changed

+215
-73
lines changed

3 files changed

+215
-73
lines changed

wpiformat/test/test_cidentlist.py

Lines changed: 68 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,8 @@ def test_cidentlist():
165165
"ES_Event Elevator_Service_Run(ES_Event event) {" + os.linesep + \
166166
"#ifdef USE_TATTLETALE" + os.linesep + \
167167
" ES_Tail(); // trace call stack end" + os.linesep + \
168-
"#endif" + os.linesep)
168+
"#endif" + os.linesep + \
169+
"}" + os.linesep)
169170
test.add_latest_input_as_output(True)
170171

171172
test.add_input("./Timer.hpp",
@@ -212,4 +213,70 @@ def test_cidentlist():
212213
"}" + os.linesep)
213214
test.add_latest_input_as_output(True)
214215

216+
# Ensure nested comments don't mess up brace stack
217+
test.add_input("./Test.cpp", "// { /* */ }" + os.linesep)
218+
test.add_latest_input_as_output(True)
219+
test.add_input("./Test.cpp", "{ // /* */ }" + os.linesep)
220+
test.add_latest_input_as_output(False)
221+
test.add_input("./Test.cpp", "{ /* // */ }" + os.linesep)
222+
test.add_latest_input_as_output(True)
223+
test.add_input("./Test.cpp", "{ /* */ // }" + os.linesep)
224+
test.add_latest_input_as_output(False)
225+
test.add_input("./Test.cpp", "{ // /* // */ }" + os.linesep)
226+
test.add_latest_input_as_output(False)
227+
228+
# Ensure popping too many braces doesn't crash
229+
test.add_input("./Test.cpp", "}" + os.linesep)
230+
test.add_latest_input_as_output(False)
231+
232+
# Ensure comments inside quoted string don't mess up brace stack
233+
test.add_input("./Test.cpp",
234+
"void func() {" + os.linesep + \
235+
" // \"//\"" + os.linesep + \
236+
" if (!query.startswith(\"//\")) {" + os.linesep + \
237+
" return;" + os.linesep + \
238+
" }" + os.linesep + \
239+
"}" + os.linesep)
240+
test.add_latest_input_as_output(True)
241+
242+
# Ensure braces in double quotes don't mess up brace stack
243+
test.add_input("./Test.cpp", "void func() { std::cout << '{'; }")
244+
test.add_latest_input_as_output(True)
245+
246+
# Ensure braces in single quotes don't mess up brace stack
247+
test.add_input("./Test.cpp", "void func() { std::cout << \"{\"; }")
248+
test.add_latest_input_as_output(True)
249+
250+
# Ensure single quote within double quotes doesn't mess up brace stack
251+
test.add_input("./Test.cpp", "void func() { std::cout << \"'\"; }")
252+
test.add_latest_input_as_output(True)
253+
254+
# Ensure double quote within single quotes doesn't mess up brace stack
255+
test.add_input("./Test.cpp", "void func() { std::cout << '\"'; }")
256+
test.add_latest_input_as_output(True)
257+
258+
# Ensure escaped double quote doesn't mess up brace stack
259+
test.add_input("./Test.cpp", "void func() { std::cout << \"\\\"\"; }")
260+
test.add_latest_input_as_output(True)
261+
262+
# Ensure escaped single quote doesn't mess up brace stack
263+
test.add_input("./Test.cpp", "void func() { std::cout << '\\''; }")
264+
test.add_latest_input_as_output(True)
265+
266+
# Ensure escaped backslash isn't considered as an escaped single quote
267+
test.add_input("./Test.cpp", "void func() { std::cout << '\\\\'; }")
268+
test.add_latest_input_as_output(True)
269+
270+
# Test logic for deduplicating braces within #ifdef
271+
test.add_input("./Test.cpp",
272+
"void func() {" + os.linesep + \
273+
"#ifdef _WIN32" + os.linesep + \
274+
" if (errno == WSAEWOULDBLOCK) {" + os.linesep + \
275+
"#else" + os.linesep + \
276+
" if (errno == EWOULDBLOCK) {" + os.linesep + \
277+
"#endif" + os.linesep + \
278+
" }" + os.linesep + \
279+
"}" + os.linesep)
280+
test.add_latest_input_as_output(True)
281+
215282
test.run(OutputType.FILE)

wpiformat/wpiformat/cidentlist.py

Lines changed: 105 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@
77

88
class CIdentList(Task):
99

10+
def __print_failure(self, name):
11+
print("Error: " + name + ": unmatched curly braces when scanning for "
12+
"C identifier lists")
13+
1014
def should_process_file(self, config_file, name):
1115
return config_file.is_c_file(name) or config_file.is_cpp_file(name)
1216

@@ -31,12 +35,16 @@ def run_pipeline(self, config_file, name, lines):
3135
#
3236
# "def\\s+\w+" matches preprocessor directives "#ifdef" and "#ifndef" so
3337
# their contents aren't used as a return type.
38+
preproc_str = "#else|#endif|"
3439
comment_str = "/\*|\*/|//|" + linesep + "|"
40+
string_str = r"\\\\|\\\"|\"|"
41+
char_str = r"\\'|'|"
3542
extern_str = "(?P<ext_decl>extern \"C(\+\+)?\")\s+(?P<ext_brace>\{)?|"
3643
braces_str = "\{|\}|;|def\s+\w+|\w+\s+\w+\s*(?P<paren>\(\))"
3744
postfix_str = "(?=\s*(;|\{))"
3845
token_regex = regex.compile(
39-
comment_str + extern_str + braces_str + postfix_str)
46+
preproc_str + comment_str + string_str + char_str + extern_str +
47+
braces_str + postfix_str)
4048

4149
EXTRA_POP_OFFSET = 2
4250

@@ -48,70 +56,115 @@ def run_pipeline(self, config_file, name, lines):
4856
# is_c + pop offset == 3: C++ lang restore that needs extra brace pop
4957
extern_brace_indices = [is_c]
5058

59+
in_preproc_else = False
5160
in_multicomment = False
52-
in_comment = False
61+
in_singlecomment = False
62+
in_string = False
63+
in_char = False
5364
for match in token_regex.finditer(lines):
5465
token = match.group()
5566

67+
# Skip #else to #endif in case they have braces in them. This
68+
# assumes preprocessor directives are only used for conditional
69+
# compilation for different platforms and have the same amount of
70+
# braces in both branches. Nested preprocessor directives are also
71+
# not handled.
72+
if token == "#else":
73+
in_preproc_else = True
74+
elif token == "#endif":
75+
in_preproc_else = False
76+
if in_preproc_else:
77+
continue
78+
5679
if token == "/*":
57-
in_multicomment = True
80+
if not in_singlecomment and not in_string and not in_char:
81+
in_multicomment = True
5882
elif token == "*/":
59-
in_multicomment = False
60-
in_comment = False
83+
if not in_singlecomment and not in_string and not in_char:
84+
in_multicomment = False
6185
elif token == "//":
62-
print("into comment")
63-
in_comment = True
86+
if not in_multicomment and not in_string and not in_char:
87+
in_singlecomment = True
6488
elif token == linesep:
65-
print("out of comment")
66-
in_comment = False
67-
elif not in_multicomment and not in_comment:
68-
if token == "{":
89+
if not in_multicomment:
90+
in_singlecomment = False
91+
elif in_multicomment or in_singlecomment:
92+
# Tokens processed after this branch are ignored if they are in
93+
# comments
94+
continue
95+
elif token == "\\\"":
96+
continue
97+
elif token == "\"":
98+
if not in_char:
99+
in_string = not in_string
100+
elif token == "\\'":
101+
continue
102+
elif token == "'":
103+
if not in_string:
104+
in_char = not in_char
105+
elif in_string or in_char:
106+
# Tokens processed after this branch are ignored if they are in
107+
# double or single quotes
108+
continue
109+
elif token == "{":
110+
extern_brace_indices.append(is_c)
111+
elif token == "}":
112+
is_c = extern_brace_indices.pop()
113+
114+
if len(extern_brace_indices) == 0:
115+
self.__print_failure(name)
116+
return (lines, False, False)
117+
118+
# If the next stack frame is from an extern without braces, pop
119+
# it.
120+
if extern_brace_indices[-1] >= EXTRA_POP_OFFSET:
121+
is_c = extern_brace_indices[-1] - EXTRA_POP_OFFSET
122+
extern_brace_indices.pop()
123+
elif token == ";":
124+
if len(extern_brace_indices) == 0:
125+
self.__print_failure(name)
126+
return (lines, False, False)
127+
128+
# If the next stack frame is from an extern without braces, pop
129+
# it.
130+
if extern_brace_indices[-1] >= EXTRA_POP_OFFSET:
131+
is_c = extern_brace_indices[-1] - EXTRA_POP_OFFSET
132+
extern_brace_indices.pop()
133+
elif token.startswith("extern"):
134+
# Back up language setting first
135+
if match.group("ext_brace"):
69136
extern_brace_indices.append(is_c)
70-
elif token == "}":
71-
is_c = extern_brace_indices.pop()
72-
73-
# If the next stack frame is from an extern without braces,
74-
# pop it.
75-
if extern_brace_indices[-1] >= EXTRA_POP_OFFSET:
76-
is_c = extern_brace_indices[-1] - EXTRA_POP_OFFSET
77-
extern_brace_indices.pop()
78-
elif token == ";":
79-
# If the next stack frame is from an extern without braces,
80-
# pop it.
81-
if extern_brace_indices[-1] >= EXTRA_POP_OFFSET:
82-
is_c = extern_brace_indices[-1] - EXTRA_POP_OFFSET
83-
extern_brace_indices.pop()
84-
elif token.startswith("extern"):
85-
# Back up language setting first
86-
if match.group("ext_brace"):
87-
extern_brace_indices.append(is_c)
88-
else:
89-
# Handling an extern without braces changing the
90-
# language type is done by treating it as a pseudo-brace
91-
# that gets popped as well when the next "}" or ";" is
92-
# encountered. The "extra pop" offset is used as a flag
93-
# on the top stack value that is checked whenever a pop
94-
# is performed.
95-
extern_brace_indices.append(is_c + EXTRA_POP_OFFSET)
96-
97-
# Change language based on extern declaration
98-
if match.group("ext_decl") == "extern \"C\"":
99-
is_c = True
100-
else:
101-
is_c = False
102-
elif match.group(
103-
"paren") and "return " not in match.group() and is_c:
104-
# Replaces () with (void)
105-
output += lines[pos:match.span("paren")[0]] + "(void)"
106-
pos = match.span("paren")[0] + len("()")
107-
108-
file_changed = True
137+
else:
138+
# Handling an extern without braces changing the language
139+
# type is done by treating it as a pseudo-brace that gets
140+
# popped as well when the next "}" or ";" is encountered.
141+
# The "extra pop" offset is used as a flag on the top stack
142+
# value that is checked whenever a pop is performed.
143+
extern_brace_indices.append(is_c + EXTRA_POP_OFFSET)
144+
145+
# Change language based on extern declaration
146+
if match.group("ext_decl") == "extern \"C\"":
147+
is_c = True
148+
else:
149+
is_c = False
150+
elif match.group(
151+
"paren") and "return " not in match.group() and is_c:
152+
# Replaces () with (void)
153+
output += lines[pos:match.span("paren")[0]] + "(void)"
154+
pos = match.span("paren")[0] + len("()")
155+
156+
file_changed = True
109157

110158
# Write rest of file if it wasn't all processed
111159
if pos < len(lines):
112160
output += lines[pos:]
113161

162+
# Invariant: extern_brace_indices has one entry
163+
success = len(extern_brace_indices) == 1
164+
if not success:
165+
self.__print_failure(name)
166+
114167
if file_changed:
115-
return (output, file_changed, True)
168+
return (output, file_changed, success)
116169
else:
117-
return (lines, file_changed, True)
170+
return (lines, file_changed, success)

wpiformat/wpiformat/usingdeclaration.py

Lines changed: 42 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -17,35 +17,57 @@ def run_pipeline(self, config_file, name, lines):
1717
# Tokenize file as brace opens, brace closes, and "using" declarations.
1818
# "using" declarations are scoped, so content inside any bracket pair is
1919
# considered outside the global namespace.
20-
token_regex = regex.compile(
21-
"/\*|\*/|//|" + linesep + "|\{|\}|using\s[^;]*;")
20+
token_regex = regex.compile(r"/\*|\*/|//|\\\\|\\\"|\"|\\'|'|" +
21+
linesep + "|\{|\}|using\s[^;]*;")
2222

2323
brace_count = 0
2424
in_multicomment = False
25-
in_comment = False
25+
in_singlecomment = False
26+
in_string = False
27+
in_char = False
2628
for match in token_regex.finditer(lines):
2729
token = match.group()
2830

2931
if token == "/*":
30-
in_multicomment = True
32+
if not in_singlecomment and not in_string and not in_char:
33+
in_multicomment = True
3134
elif token == "*/":
32-
in_multicomment = False
33-
in_comment = False
35+
if not in_singlecomment and not in_string and not in_char:
36+
in_multicomment = False
3437
elif token == "//":
35-
in_comment = True
38+
if not in_multicomment and not in_string and not in_char:
39+
in_singlecomment = True
3640
elif token == linesep:
37-
in_comment = False
38-
elif not in_multicomment and not in_comment:
39-
if token == "{":
40-
brace_count += 1
41-
elif token == "}":
42-
brace_count -= 1
43-
elif token.startswith("using"):
44-
if brace_count == 0:
45-
linenum = lines.count(linesep, 0, match.start()) + 1
46-
if "NOLINT" not in lines.splitlines()[linenum - 1]:
47-
format_succeeded = False
48-
print(name + ": " + str(linenum) + ": '" + token + \
49-
"' in global namespace")
41+
if not in_multicomment:
42+
in_singlecomment = False
43+
elif in_multicomment or in_singlecomment:
44+
# Tokens processed after this branch are ignored if they are in
45+
# comments
46+
continue
47+
elif token == "\\\"":
48+
continue
49+
elif token == "\"":
50+
if not in_char:
51+
in_string = not in_string
52+
elif token == "\\'":
53+
continue
54+
elif token == "'":
55+
if not in_string:
56+
in_char = not in_char
57+
elif in_string or in_char:
58+
# Tokens processed after this branch are ignored if they are in
59+
# double or single quotes
60+
continue
61+
elif token == "{":
62+
brace_count += 1
63+
elif token == "}":
64+
brace_count -= 1
65+
elif token.startswith("using"):
66+
if brace_count == 0:
67+
linenum = lines.count(linesep, 0, match.start()) + 1
68+
if "NOLINT" not in lines.splitlines()[linenum - 1]:
69+
format_succeeded = False
70+
print(name + ": " + str(linenum) + ": '" + token + \
71+
"' in global namespace")
5072

5173
return (lines, False, format_succeeded)

0 commit comments

Comments
 (0)