Skip to content

Commit de52136

Browse files
committed
Removed support for c-style and embedded comments
1 parent de70108 commit de52136

File tree

7 files changed

+39
-127
lines changed

7 files changed

+39
-127
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,11 @@
1111
``AutoCompleter`` which has since developed a dependency on ``cmd2`` methods.
1212
* Removed ability to call commands in ``pyscript`` as if they were functions (e.g ``app.help()``) in favor
1313
of only supporting one ``pyscript`` interface. This simplifies future maintenance.
14+
* No longer supporting C-style comments. Hash (#) is the only valid comment marker.
15+
* No longer supporting comments embedded in a command. Only strings where the first non-whitespace character
16+
is a # will be treated as comments. All other # characters will be treated as literals.
17+
* \# this is a comment
18+
* this # is not a comment
1419

1520
## 0.9.10 (February 22, 2019)
1621
* Bug Fixes

cmd2/cmd2.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ def parse_quoted_string(string: str, preserve_quotes: bool) -> List[str]:
160160
lexed_arglist = string
161161
else:
162162
# Use shlex to split the command line into a list of arguments based on shell rules
163-
lexed_arglist = shlex.split(string, posix=False)
163+
lexed_arglist = shlex.split(string, comments=False, posix=False)
164164

165165
if not preserve_quotes:
166166
lexed_arglist = [utils.strip_quotes(arg) for arg in lexed_arglist]
@@ -761,7 +761,7 @@ def tokens_for_completion(self, line: str, begidx: int, endidx: int) -> Tuple[Li
761761
while True:
762762
try:
763763
# Use non-POSIX parsing to keep the quotes around the tokens
764-
initial_tokens = shlex.split(tmp_line[:tmp_endidx], posix=False)
764+
initial_tokens = shlex.split(tmp_line[:tmp_endidx], comments=False, posix=False)
765765

766766
# If the cursor is at an empty token outside of a quoted string,
767767
# then that is the token being completed. Add it to the list.

cmd2/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
REDIRECTION_APPEND = '>>'
1313
REDIRECTION_CHARS = [REDIRECTION_PIPE, REDIRECTION_OUTPUT]
1414
REDIRECTION_TOKENS = [REDIRECTION_PIPE, REDIRECTION_OUTPUT, REDIRECTION_APPEND]
15+
COMMENT_CHAR = '#'
1516

1617
# Regular expression to match ANSI escape codes
1718
ANSI_ESCAPE_RE = re.compile(r'\x1b[^m]*m')

cmd2/parsing.py

Lines changed: 10 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -236,33 +236,6 @@ def __init__(
236236
else:
237237
self.shortcuts = shortcuts
238238

239-
# this regular expression matches C-style comments and quoted
240-
# strings, i.e. stuff between single or double quote marks
241-
# it's used with _comment_replacer() to strip out the C-style
242-
# comments, while leaving C-style comments that are inside either
243-
# double or single quotes.
244-
#
245-
# this big regular expression can be broken down into 3 regular
246-
# expressions that are OR'ed together with a pipe character
247-
#
248-
# /\*.*\*/ Matches C-style comments (i.e. /* comment */)
249-
# does not match unclosed comments.
250-
# \'(?:\\.|[^\\\'])*\' Matches a single quoted string, allowing
251-
# for embedded backslash escaped single quote
252-
# marks.
253-
# "(?:\\.|[^\\"])*" Matches a double quoted string, allowing
254-
# for embedded backslash escaped double quote
255-
# marks.
256-
#
257-
# by way of reminder the (?:...) regular expression syntax is just
258-
# a non-capturing version of regular parenthesis. We need the non-
259-
# capturing syntax because _comment_replacer() looks at match
260-
# groups
261-
self.comment_pattern = re.compile(
262-
r'/\*.*\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"',
263-
re.DOTALL | re.MULTILINE
264-
)
265-
266239
# commands have to be a word, so make a regular expression
267240
# that matches the first word in the line. This regex has three
268241
# parts:
@@ -315,6 +288,9 @@ def is_valid_command(self, word: str) -> Tuple[bool, str]:
315288
if not word:
316289
return False, 'cannot be an empty string'
317290

291+
if word.startswith(constants.COMMENT_CHAR):
292+
return False, 'cannot start with the comment character'
293+
318294
for (shortcut, _) in self.shortcuts:
319295
if word.startswith(shortcut):
320296
# Build an error string with all shortcuts listed
@@ -338,24 +314,23 @@ def is_valid_command(self, word: str) -> Tuple[bool, str]:
338314
def tokenize(self, line: str) -> List[str]:
339315
"""Lex a string into a list of tokens.
340316
341-
Comments are removed, and shortcuts and aliases are expanded.
317+
shortcuts and aliases are expanded and comments are removed
342318
343319
Raises ValueError if there are unclosed quotation marks.
344320
"""
345321

346-
# strip C-style comments
347-
# shlex will handle the python/shell style comments for us
348-
line = re.sub(self.comment_pattern, self._comment_replacer, line)
349-
350322
# expand shortcuts and aliases
351323
line = self._expand(line)
352324

325+
# check if this line is a comment
326+
if line.strip().startswith(constants.COMMENT_CHAR):
327+
return []
328+
353329
# split on whitespace
354-
lexer = shlex.shlex(line, posix=False)
355-
lexer.whitespace_split = True
330+
tokens = shlex.split(line, comments=False, posix=False)
356331

357332
# custom lexing
358-
tokens = self._split_on_punctuation(list(lexer))
333+
tokens = self._split_on_punctuation(tokens)
359334
return tokens
360335

361336
def parse(self, line: str) -> Statement:
@@ -610,15 +585,6 @@ def _command_and_args(tokens: List[str]) -> Tuple[str, str]:
610585

611586
return command, args
612587

613-
@staticmethod
614-
def _comment_replacer(match):
615-
matched_string = match.group(0)
616-
if matched_string.startswith('/'):
617-
# the matched string was a comment, so remove it
618-
return ''
619-
# the matched string was a quoted string, return the match
620-
return matched_string
621-
622588
def _split_on_punctuation(self, tokens: List[str]) -> List[str]:
623589
"""Further splits tokens from a command line using punctuation characters
624590

tests/test_argparse.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -141,10 +141,6 @@ def test_argparse_with_list_and_empty_doc(argparse_app):
141141
out = run_cmd(argparse_app, 'speak -s hello world!')
142142
assert out == ['HELLO WORLD!']
143143

144-
def test_argparse_comment_stripping(argparse_app):
145-
out = run_cmd(argparse_app, 'speak it was /* not */ delicious! # Yuck!')
146-
assert out == ['it was delicious!']
147-
148144
def test_argparser_correct_args_with_quotes_and_midline_options(argparse_app):
149145
out = run_cmd(argparse_app, "speak 'This is a' -s test of the emergency broadcast system!")
150146
assert out == ['THIS IS A TEST OF THE EMERGENCY BROADCAST SYSTEM!']

tests/test_cmd2.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,7 @@
2424
from unittest import mock
2525

2626
import cmd2
27-
from cmd2 import clipboard
28-
from cmd2 import utils
27+
from cmd2 import clipboard, constants, utils
2928
from .conftest import run_cmd, normalize, BASE_HELP, BASE_HELP_VERBOSE, \
3029
HELP_HISTORY, SHORTCUTS_TXT, SHOW_TXT, SHOW_LONG
3130

@@ -1828,6 +1827,7 @@ def test_poutput_color_never(base_app):
18281827
# These are invalid names for aliases and macros
18291828
invalid_command_name = [
18301829
'""', # Blank name
1830+
constants.COMMENT_CHAR,
18311831
'!no_shortcut',
18321832
'">"',
18331833
'"no>pe"',

tests/test_parsing.py

Lines changed: 19 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
import cmd2
1313
from cmd2.parsing import StatementParser
14-
from cmd2 import utils
14+
from cmd2 import constants, utils
1515

1616
@pytest.fixture
1717
def parser():
@@ -70,8 +70,8 @@ def test_parse_empty_string_default(default_parser):
7070

7171
@pytest.mark.parametrize('line,tokens', [
7272
('command', ['command']),
73-
('command /* with some comment */ arg', ['command', 'arg']),
74-
('command arg1 arg2 # comment at the end', ['command', 'arg1', 'arg2']),
73+
(constants.COMMENT_CHAR + 'comment', []),
74+
('not ' + constants.COMMENT_CHAR + ' a comment', ['not', constants.COMMENT_CHAR, 'a', 'comment']),
7575
('termbare ; > /tmp/output', ['termbare', ';', '>', '/tmp/output']),
7676
('termbare; > /tmp/output', ['termbare', ';', '>', '/tmp/output']),
7777
('termbare & > /tmp/output', ['termbare', '&', '>', '/tmp/output']),
@@ -84,8 +84,8 @@ def test_tokenize_default(default_parser, line, tokens):
8484

8585
@pytest.mark.parametrize('line,tokens', [
8686
('command', ['command']),
87-
('command /* with some comment */ arg', ['command', 'arg']),
88-
('command arg1 arg2 # comment at the end', ['command', 'arg1', 'arg2']),
87+
('# comment', []),
88+
('not ' + constants.COMMENT_CHAR + ' a comment', ['not', constants.COMMENT_CHAR, 'a', 'comment']),
8989
('42 arg1 arg2', ['theanswer', 'arg1', 'arg2']),
9090
('l', ['shell', 'ls', '-al']),
9191
('termbare ; > /tmp/output', ['termbare', ';', '>', '/tmp/output']),
@@ -193,59 +193,23 @@ def test_parse_command_with_args_terminator_and_suffix(parser):
193193
assert statement.terminator == ';'
194194
assert statement.suffix == 'and suffix'
195195

196-
def test_parse_hashcomment(parser):
197-
statement = parser.parse('hi # this is all a comment')
198-
assert statement.command == 'hi'
199-
assert statement == ''
200-
assert statement.args == statement
201-
assert statement.argv == ['hi']
202-
assert not statement.arg_list
203-
204-
def test_parse_c_comment(parser):
205-
statement = parser.parse('hi /* this is | all a comment */')
206-
assert statement.command == 'hi'
207-
assert statement == ''
208-
assert statement.args == statement
209-
assert statement.argv == ['hi']
210-
assert not statement.arg_list
211-
assert not statement.pipe_to
212-
213-
def test_parse_c_comment_empty(parser):
214-
statement = parser.parse('/* this is | all a comment */')
196+
def test_parse_comment(parser):
197+
statement = parser.parse(constants.COMMENT_CHAR + ' this is all a comment')
215198
assert statement.command == ''
199+
assert statement == ''
216200
assert statement.args == statement
217-
assert not statement.pipe_to
218201
assert not statement.argv
219202
assert not statement.arg_list
220-
assert statement == ''
221203

222-
def test_parse_c_comment_no_closing(parser):
223-
statement = parser.parse('cat /tmp/*.txt')
224-
assert statement.command == 'cat'
225-
assert statement == '/tmp/*.txt'
226-
assert statement.args == statement
227-
assert not statement.pipe_to
228-
assert statement.argv == ['cat', '/tmp/*.txt']
229-
assert statement.arg_list == statement.argv[1:]
230-
231-
def test_parse_c_comment_multiple_opening(parser):
232-
statement = parser.parse('cat /tmp/*.txt /tmp/*.cfg')
233-
assert statement.command == 'cat'
234-
assert statement == '/tmp/*.txt /tmp/*.cfg'
204+
def test_parse_embedded_comment_char(parser):
205+
command_str = 'hi ' + constants.COMMENT_CHAR + ' not a comment'
206+
statement = parser.parse(command_str)
207+
assert statement.command == 'hi'
208+
assert statement == constants.COMMENT_CHAR + ' not a comment'
235209
assert statement.args == statement
236-
assert not statement.pipe_to
237-
assert statement.argv == ['cat', '/tmp/*.txt', '/tmp/*.cfg']
210+
assert statement.argv == command_str.split()
238211
assert statement.arg_list == statement.argv[1:]
239212

240-
def test_parse_what_if_quoted_strings_seem_to_start_comments(parser):
241-
statement = parser.parse('what if "quoted strings /* seem to " start comments?')
242-
assert statement.command == 'what'
243-
assert statement == 'if "quoted strings /* seem to " start comments?'
244-
assert statement.args == statement
245-
assert statement.argv == ['what', 'if', 'quoted strings /* seem to ', 'start', 'comments?']
246-
assert statement.arg_list == ['if', '"quoted strings /* seem to "', 'start', 'comments?']
247-
assert not statement.pipe_to
248-
249213
@pytest.mark.parametrize('line',[
250214
'simple | piped',
251215
'simple|piped',
@@ -411,30 +375,6 @@ def test_parse_multiline_command_ignores_redirectors_within_it(parser, line, ter
411375
assert statement.arg_list == statement.argv[1:]
412376
assert statement.terminator == terminator
413377

414-
def test_parse_multiline_with_incomplete_comment(parser):
415-
"""A terminator within a comment will be ignored and won't terminate a multiline command.
416-
Un-closed comments effectively comment out everything after the start."""
417-
line = 'multiline command /* with unclosed comment;'
418-
statement = parser.parse(line)
419-
assert statement.multiline_command == 'multiline'
420-
assert statement.command == 'multiline'
421-
assert statement == 'command /* with unclosed comment'
422-
assert statement.args == statement
423-
assert statement.argv == ['multiline', 'command', '/*', 'with', 'unclosed', 'comment']
424-
assert statement.arg_list == statement.argv[1:]
425-
assert statement.terminator == ';'
426-
427-
def test_parse_multiline_with_complete_comment(parser):
428-
line = 'multiline command /* with comment complete */ is done;'
429-
statement = parser.parse(line)
430-
assert statement.multiline_command == 'multiline'
431-
assert statement.command == 'multiline'
432-
assert statement == 'command is done'
433-
assert statement.args == statement
434-
assert statement.argv == ['multiline', 'command', 'is', 'done']
435-
assert statement.arg_list == statement.argv[1:]
436-
assert statement.terminator == ';'
437-
438378
def test_parse_multiline_terminated_by_empty_line(parser):
439379
line = 'multiline command ends\n\n'
440380
statement = parser.parse(line)
@@ -464,7 +404,7 @@ def test_parse_multiline_with_embedded_newline(parser, line, terminator):
464404
assert statement.arg_list == ['command', '"with\nembedded newline"']
465405
assert statement.terminator == terminator
466406

467-
def test_parse_multiline_ignores_terminators_in_comments(parser):
407+
def test_parse_multiline_ignores_terminators_in_quotes(parser):
468408
line = 'multiline command "with term; ends" now\n\n'
469409
statement = parser.parse(line)
470410
assert statement.multiline_command == 'multiline'
@@ -762,6 +702,10 @@ def test_is_valid_command_invalid(parser):
762702
valid, errmsg = parser.is_valid_command('')
763703
assert not valid and 'cannot be an empty string' in errmsg
764704

705+
# Start with the comment character
706+
valid, errmsg = parser.is_valid_command(constants.COMMENT_CHAR)
707+
assert not valid and 'cannot start with the comment character' in errmsg
708+
765709
# Starts with shortcut
766710
valid, errmsg = parser.is_valid_command('!ls')
767711
assert not valid and 'cannot start with a shortcut' in errmsg

0 commit comments

Comments
 (0)