Removed support for c-style and embedded comments

kmvanbrunt · kmvanbrunt · commit de5213605d04 · 2019-03-01T18:34:39.000-05:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -11,6 +11,11 @@
     ``AutoCompleter`` which has since developed a dependency on ``cmd2`` methods. 
     * Removed ability to call commands in ``pyscript`` as if they were functions (e.g ``app.help()``) in favor
     of only supporting one ``pyscript`` interface. This simplifies future maintenance.
+    * No longer supporting C-style comments. Hash (#) is the only valid comment marker.
+    * No longer supporting comments embedded in a command. Only strings where the first non-whitespace character
+    is a # will be treated as comments. All other # characters will be treated as literals.
+        * \# this is a comment
+        * this # is not a comment
 
 ## 0.9.10 (February 22, 2019)
 * Bug Fixes
diff --git a/cmd2/cmd2.py b/cmd2/cmd2.py
@@ -160,7 +160,7 @@ def parse_quoted_string(string: str, preserve_quotes: bool) -> List[str]:
         lexed_arglist = string
     else:
         # Use shlex to split the command line into a list of arguments based on shell rules
-        lexed_arglist = shlex.split(string, posix=False)
+        lexed_arglist = shlex.split(string, comments=False, posix=False)
 
         if not preserve_quotes:
             lexed_arglist = [utils.strip_quotes(arg) for arg in lexed_arglist]
@@ -761,7 +761,7 @@ def tokens_for_completion(self, line: str, begidx: int, endidx: int) -> Tuple[Li
         while True:
             try:
                 # Use non-POSIX parsing to keep the quotes around the tokens
-                initial_tokens = shlex.split(tmp_line[:tmp_endidx], posix=False)
+                initial_tokens = shlex.split(tmp_line[:tmp_endidx], comments=False, posix=False)
 
                 # If the cursor is at an empty token outside of a quoted string,
                 # then that is the token being completed. Add it to the list.
diff --git a/cmd2/constants.py b/cmd2/constants.py
@@ -12,6 +12,7 @@
 REDIRECTION_APPEND = '>>'
 REDIRECTION_CHARS = [REDIRECTION_PIPE, REDIRECTION_OUTPUT]
 REDIRECTION_TOKENS = [REDIRECTION_PIPE, REDIRECTION_OUTPUT, REDIRECTION_APPEND]
+COMMENT_CHAR = '#'
 
 # Regular expression to match ANSI escape codes
 ANSI_ESCAPE_RE = re.compile(r'\x1b[^m]*m')
diff --git a/cmd2/parsing.py b/cmd2/parsing.py
@@ -236,33 +236,6 @@ def __init__(
         else:
             self.shortcuts = shortcuts
 
-        # this regular expression matches C-style comments and quoted
-        # strings, i.e. stuff between single or double quote marks
-        # it's used with _comment_replacer() to strip out the C-style
-        # comments, while leaving C-style comments that are inside either
-        # double or single quotes.
-        #
-        # this big regular expression can be broken down into 3 regular
-        # expressions that are OR'ed together with a pipe character
-        #
-        # /\*.*\*/               Matches C-style comments (i.e. /* comment */)
-        #                        does not match unclosed comments.
-        # \'(?:\\.|[^\\\'])*\'   Matches a single quoted string, allowing
-        #                        for embedded backslash escaped single quote
-        #                        marks.
-        # "(?:\\.|[^\\"])*"      Matches a double quoted string, allowing
-        #                        for embedded backslash escaped double quote
-        #                        marks.
-        #
-        # by way of reminder the (?:...) regular expression syntax is just
-        # a non-capturing version of regular parenthesis. We need the non-
-        # capturing syntax because _comment_replacer() looks at match
-        # groups
-        self.comment_pattern = re.compile(
-            r'/\*.*\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"',
-            re.DOTALL | re.MULTILINE
-        )
-
         # commands have to be a word, so make a regular expression
         # that matches the first word in the line. This regex has three
         # parts:
@@ -315,6 +288,9 @@ def is_valid_command(self, word: str) -> Tuple[bool, str]:
         if not word:
             return False, 'cannot be an empty string'
 
+        if word.startswith(constants.COMMENT_CHAR):
+            return False, 'cannot start with the comment character'
+
         for (shortcut, _) in self.shortcuts:
             if word.startswith(shortcut):
                 # Build an error string with all shortcuts listed
@@ -338,24 +314,23 @@ def is_valid_command(self, word: str) -> Tuple[bool, str]:
     def tokenize(self, line: str) -> List[str]:
         """Lex a string into a list of tokens.
 
-        Comments are removed, and shortcuts and aliases are expanded.
+        shortcuts and aliases are expanded and comments are removed
 
         Raises ValueError if there are unclosed quotation marks.
         """
 
-        # strip C-style comments
-        # shlex will handle the python/shell style comments for us
-        line = re.sub(self.comment_pattern, self._comment_replacer, line)
-
         # expand shortcuts and aliases
         line = self._expand(line)
 
+        # check if this line is a comment
+        if line.strip().startswith(constants.COMMENT_CHAR):
+            return []
+
         # split on whitespace
-        lexer = shlex.shlex(line, posix=False)
-        lexer.whitespace_split = True
+        tokens = shlex.split(line, comments=False, posix=False)
 
         # custom lexing
-        tokens = self._split_on_punctuation(list(lexer))
+        tokens = self._split_on_punctuation(tokens)
         return tokens
 
     def parse(self, line: str) -> Statement:
@@ -610,15 +585,6 @@ def _command_and_args(tokens: List[str]) -> Tuple[str, str]:
 
         return command, args
 
-    @staticmethod
-    def _comment_replacer(match):
-        matched_string = match.group(0)
-        if matched_string.startswith('/'):
-            # the matched string was a comment, so remove it
-            return ''
-        # the matched string was a quoted string, return the match
-        return matched_string
-
     def _split_on_punctuation(self, tokens: List[str]) -> List[str]:
         """Further splits tokens from a command line using punctuation characters
 
diff --git a/tests/test_argparse.py b/tests/test_argparse.py
@@ -141,10 +141,6 @@ def test_argparse_with_list_and_empty_doc(argparse_app):
     out = run_cmd(argparse_app, 'speak -s hello world!')
     assert out == ['HELLO WORLD!']
 
-def test_argparse_comment_stripping(argparse_app):
-    out = run_cmd(argparse_app, 'speak it was /* not */ delicious! # Yuck!')
-    assert out == ['it was delicious!']
-
 def test_argparser_correct_args_with_quotes_and_midline_options(argparse_app):
     out = run_cmd(argparse_app, "speak 'This  is a' -s test of the emergency broadcast system!")
     assert out == ['THIS  IS A TEST OF THE EMERGENCY BROADCAST SYSTEM!']
diff --git a/tests/test_cmd2.py b/tests/test_cmd2.py
@@ -24,8 +24,7 @@
     from unittest import mock
 
 import cmd2
-from cmd2 import clipboard
-from cmd2 import utils
+from cmd2 import clipboard, constants, utils
 from .conftest import run_cmd, normalize, BASE_HELP, BASE_HELP_VERBOSE, \
     HELP_HISTORY, SHORTCUTS_TXT, SHOW_TXT, SHOW_LONG
 
@@ -1828,6 +1827,7 @@ def test_poutput_color_never(base_app):
 # These are invalid names for aliases and macros
 invalid_command_name = [
     '""',  # Blank name
+    constants.COMMENT_CHAR,
     '!no_shortcut',
     '">"',
     '"no>pe"',
diff --git a/tests/test_parsing.py b/tests/test_parsing.py
@@ -11,7 +11,7 @@
 
 import cmd2
 from cmd2.parsing import StatementParser
-from cmd2 import utils
+from cmd2 import constants, utils
 
 @pytest.fixture
 def parser():
@@ -70,8 +70,8 @@ def test_parse_empty_string_default(default_parser):
 
 @pytest.mark.parametrize('line,tokens', [
     ('command', ['command']),
-    ('command /* with some comment */ arg', ['command', 'arg']),
-    ('command arg1 arg2 # comment at the end', ['command', 'arg1', 'arg2']),
+    (constants.COMMENT_CHAR + 'comment', []),
+    ('not ' + constants.COMMENT_CHAR + ' a comment', ['not', constants.COMMENT_CHAR, 'a', 'comment']),
     ('termbare ; > /tmp/output', ['termbare', ';', '>', '/tmp/output']),
     ('termbare; > /tmp/output', ['termbare', ';', '>', '/tmp/output']),
     ('termbare & > /tmp/output', ['termbare', '&', '>', '/tmp/output']),
@@ -84,8 +84,8 @@ def test_tokenize_default(default_parser, line, tokens):
 
 @pytest.mark.parametrize('line,tokens', [
     ('command', ['command']),
-    ('command /* with some comment */ arg', ['command', 'arg']),
-    ('command arg1 arg2 # comment at the end', ['command', 'arg1', 'arg2']),
+    ('# comment', []),
+    ('not ' + constants.COMMENT_CHAR + ' a comment', ['not', constants.COMMENT_CHAR, 'a', 'comment']),
     ('42 arg1 arg2', ['theanswer', 'arg1', 'arg2']),
     ('l', ['shell', 'ls', '-al']),
     ('termbare ; > /tmp/output', ['termbare', ';', '>', '/tmp/output']),
@@ -193,59 +193,23 @@ def test_parse_command_with_args_terminator_and_suffix(parser):
     assert statement.terminator == ';'
     assert statement.suffix == 'and suffix'
 
-def test_parse_hashcomment(parser):
-    statement = parser.parse('hi # this is all a comment')
-    assert statement.command == 'hi'
-    assert statement == ''
-    assert statement.args == statement
-    assert statement.argv == ['hi']
-    assert not statement.arg_list
-
-def test_parse_c_comment(parser):
-    statement = parser.parse('hi /* this is | all a comment */')
-    assert statement.command == 'hi'
-    assert statement == ''
-    assert statement.args == statement
-    assert statement.argv == ['hi']
-    assert not statement.arg_list
-    assert not statement.pipe_to
-
-def test_parse_c_comment_empty(parser):
-    statement = parser.parse('/* this is | all a comment */')
+def test_parse_comment(parser):
+    statement = parser.parse(constants.COMMENT_CHAR + ' this is all a comment')
     assert statement.command == ''
+    assert statement == ''
     assert statement.args == statement
-    assert not statement.pipe_to
     assert not statement.argv
     assert not statement.arg_list
-    assert statement == ''
 
-def test_parse_c_comment_no_closing(parser):
-    statement = parser.parse('cat /tmp/*.txt')
-    assert statement.command == 'cat'
-    assert statement == '/tmp/*.txt'
-    assert statement.args == statement
-    assert not statement.pipe_to
-    assert statement.argv == ['cat', '/tmp/*.txt']
-    assert statement.arg_list == statement.argv[1:]
-
-def test_parse_c_comment_multiple_opening(parser):
-    statement = parser.parse('cat /tmp/*.txt /tmp/*.cfg')
-    assert statement.command == 'cat'
-    assert statement == '/tmp/*.txt /tmp/*.cfg'
+def test_parse_embedded_comment_char(parser):
+    command_str = 'hi ' + constants.COMMENT_CHAR + ' not a comment'
+    statement = parser.parse(command_str)
+    assert statement.command == 'hi'
+    assert statement == constants.COMMENT_CHAR + ' not a comment'
     assert statement.args == statement
-    assert not statement.pipe_to
-    assert statement.argv == ['cat', '/tmp/*.txt', '/tmp/*.cfg']
+    assert statement.argv == command_str.split()
     assert statement.arg_list == statement.argv[1:]
 
-def test_parse_what_if_quoted_strings_seem_to_start_comments(parser):
-    statement = parser.parse('what if "quoted strings /* seem to " start comments?')
-    assert statement.command == 'what'
-    assert statement == 'if "quoted strings /* seem to " start comments?'
-    assert statement.args == statement
-    assert statement.argv == ['what', 'if', 'quoted strings /* seem to ', 'start', 'comments?']
-    assert statement.arg_list == ['if', '"quoted strings /* seem to "', 'start', 'comments?']
-    assert not statement.pipe_to
-
 @pytest.mark.parametrize('line',[
     'simple | piped',
     'simple|piped',
@@ -411,30 +375,6 @@ def test_parse_multiline_command_ignores_redirectors_within_it(parser, line, ter
     assert statement.arg_list == statement.argv[1:]
     assert statement.terminator == terminator
 
-def test_parse_multiline_with_incomplete_comment(parser):
-    """A terminator within a comment will be ignored and won't terminate a multiline command.
-    Un-closed comments effectively comment out everything after the start."""
-    line = 'multiline command /* with unclosed comment;'
-    statement = parser.parse(line)
-    assert statement.multiline_command == 'multiline'
-    assert statement.command == 'multiline'
-    assert statement == 'command /* with unclosed comment'
-    assert statement.args == statement
-    assert statement.argv == ['multiline', 'command', '/*', 'with', 'unclosed', 'comment']
-    assert statement.arg_list == statement.argv[1:]
-    assert statement.terminator == ';'
-
-def test_parse_multiline_with_complete_comment(parser):
-    line = 'multiline command /* with comment complete */ is done;'
-    statement = parser.parse(line)
-    assert statement.multiline_command == 'multiline'
-    assert statement.command == 'multiline'
-    assert statement == 'command is done'
-    assert statement.args == statement
-    assert statement.argv == ['multiline', 'command', 'is', 'done']
-    assert statement.arg_list == statement.argv[1:]
-    assert statement.terminator == ';'
-
 def test_parse_multiline_terminated_by_empty_line(parser):
     line = 'multiline command ends\n\n'
     statement = parser.parse(line)
@@ -464,7 +404,7 @@ def test_parse_multiline_with_embedded_newline(parser, line, terminator):
     assert statement.arg_list == ['command', '"with\nembedded newline"']
     assert statement.terminator == terminator
 
-def test_parse_multiline_ignores_terminators_in_comments(parser):
+def test_parse_multiline_ignores_terminators_in_quotes(parser):
     line = 'multiline command "with term; ends" now\n\n'
     statement = parser.parse(line)
     assert statement.multiline_command == 'multiline'
@@ -762,6 +702,10 @@ def test_is_valid_command_invalid(parser):
     valid, errmsg = parser.is_valid_command('')
     assert not valid and 'cannot be an empty string' in errmsg
 
+    # Start with the comment character
+    valid, errmsg = parser.is_valid_command(constants.COMMENT_CHAR)
+    assert not valid and 'cannot start with the comment character' in errmsg
+
     # Starts with shortcut
     valid, errmsg = parser.is_valid_command('!ls')
     assert not valid and 'cannot start with a shortcut' in errmsg