Merge branch 'master' into ignore_identchars

kotfu · kotfu · commit bc3c31f182dd · 2018-05-06T09:49:27.000-06:00
# Conflicts:
#	cmd2/parsing.py
#	tests/test_parsing.py
diff --git a/cmd2/cmd2.py b/cmd2/cmd2.py
@@ -1590,12 +1590,9 @@ def complete(self, text, state):
             if begidx > 0:
 
                 # Parse the command line
-                command, args, expanded_line = self.parseline(line)
-
-                # use these lines instead of the one above
-                # statement = self.command_parser.parse_command_only(line)
-                # command = statement.command
-                # expanded_line = statement.command_and_args
+                statement = self.statement_parser.parse_command_only(line)
+                command = statement.command
+                expanded_line = statement.command_and_args
 
                 # We overwrote line with a properly formatted but fully stripped version
                 # Restore the end spaces since line is only supposed to be lstripped when
@@ -1616,8 +1613,7 @@ def complete(self, text, state):
                 tokens, raw_tokens = self.tokens_for_completion(line, begidx, endidx)
 
                 # Either had a parsing error or are trying to complete the command token
-                # The latter can happen if default_to_shell is True and parseline() allowed
-                # assumed something like " or ' was a command.
+                # The latter can happen if " or ' was entered as the command
                 if tokens is None or len(tokens) == 1:
                     self.completion_matches = []
                     return None
@@ -1937,66 +1933,16 @@ def postparsing_postcmd(self, stop: bool) -> bool:
     def parseline(self, line):
         """Parse the line into a command name and a string containing the arguments.
 
-        NOTE: This is an override of a parent class method.  It is only used by other parent class methods.  But
-        we do need to override it here so that the additional shortcuts present in cmd2 get properly expanded for
-        purposes of tab completion.
+        NOTE: This is an override of a parent class method.  It is only used by other parent class methods.
 
-        Used for command tab completion.  Returns a tuple containing (command, args, line).
-        'command' and 'args' may be None if the line couldn't be parsed.
+        Different from the parent class method, this ignores self.identchars.
 
         :param line: str - line read by readline
         :return: (str, str, str) - tuple containing (command, args, line)
         """
-        line = line.strip()
-
-        if not line:
-            # Deal with empty line or all whitespace line
-            return None, None, line
-
-        # Make a copy of aliases so we can edit it
-        tmp_aliases = list(self.aliases.keys())
-        keep_expanding = len(tmp_aliases) > 0
-
-        # Expand aliases
-        while keep_expanding:
-            for cur_alias in tmp_aliases:
-                keep_expanding = False
-
-                if line == cur_alias or line.startswith(cur_alias + ' '):
-                    line = line.replace(cur_alias, self.aliases[cur_alias], 1)
-
-                    # Do not expand the same alias more than once
-                    tmp_aliases.remove(cur_alias)
-                    keep_expanding = len(tmp_aliases) > 0
-                    break
-
-        # Expand command shortcut to its full command name
-        for (shortcut, expansion) in self.shortcuts:
-            if line.startswith(shortcut):
-                # If the next character after the shortcut isn't a space, then insert one
-                shortcut_len = len(shortcut)
-                if len(line) == shortcut_len or line[shortcut_len] != ' ':
-                    expansion += ' '
-
-                # Expand the shortcut
-                line = line.replace(shortcut, expansion, 1)
-                break
-
-        i, n = 0, len(line)
-
-        # If we are allowing shell commands, then allow any character in the command
-        if self.default_to_shell:
-            while i < n and line[i] != ' ':
-                i += 1
-
-        # Otherwise only allow those in identchars
-        else:
-            while i < n and line[i] in self.identchars:
-                i += 1
-
-        command, arg = line[:i], line[i:].strip()
 
-        return command, arg, line
+        statement = self.statement_parser.parse_command_only(line)
+        return statement.command, statement.args, statement.command_and_args
 
     def onecmd_plus_hooks(self, line):
         """Top-level function called by cmdloop() to handle parsing a line and running the command and all of its hooks.
diff --git a/cmd2/parsing.py b/cmd2/parsing.py
@@ -81,7 +81,7 @@ def command_and_args(self):
         return rtn
 
 
-class StatementParser():
+class StatementParser:
     """Parse raw text into command components.
 
     Shortcuts is a list of tuples with each tuple containing the shortcut and the expansion.
@@ -93,7 +93,7 @@ def __init__(
             multiline_commands=None,
             aliases=None,
             shortcuts=None,
-        ):
+    ):
         self.allow_redirection = allow_redirection
         if terminators is None:
             self.terminators = [';']
@@ -169,6 +169,8 @@ def tokenize(self, line: str) -> List[str]:
         """Lex a string into a list of tokens.
 
         Comments are removed, and shortcuts and aliases are expanded.
+
+        Raises ValueError if there are unclosed quotation marks.
         """
 
         # strip C-style comments
@@ -190,6 +192,8 @@ def parse(self, rawinput: str) -> Statement:
         """Tokenize the input and parse it into a Statement object, stripping
         comments, expanding aliases and shortcuts, and extracting output
         redirection directives.
+
+        Raises ValueError if there are unclosed quotation marks.
         """
 
         # handle the special case/hardcoded terminator of a blank line
@@ -310,16 +314,40 @@ def parse(self, rawinput: str) -> Statement:
         return statement
 
     def parse_command_only(self, rawinput: str) -> Statement:
-        """Partially parse input into a Statement object. The command is
-        identified, and shortcuts and aliases are expanded.
+        """Partially parse input into a Statement object.
+
+        The command is identified, and shortcuts and aliases are expanded.
         Terminators, multiline commands, and output redirection are not
         parsed.
+
+        This method is used by tab completion code and therefore must not
+        generate an exception if there are unclosed quotes.
+
+        The Statement object returned by this method can at most contained
+        values in the following attributes:
+          - raw
+          - command
+          - args
+
+        Different from parse(), this method does not remove redundant whitespace
+        within statement.args. It does however, ensure args does not have leading
+        or trailing whitespace.
         """
-        # lex the input into a list of tokens
-        tokens = self.tokenize(rawinput)
+        # expand shortcuts and aliases
+        line = self._expand(rawinput)
 
-        # parse out the command and everything else
-        (command, args) = self._command_and_args(tokens)
+        command = None
+        args = None
+        match = self.command_pattern.search(line)
+        if match:
+            # we got a match, extract the command
+            command = match.group(1)
+            # the command_pattern regex is designed to match the spaces
+            # between command and args with a second match group. Using
+            # the end of the second match group ensures that args has
+            # no leading whitespace. The rstrip() makes sure there is
+            # no trailing whitespace
+            args = line[match.end(2):].rstrip()
 
         # build the statement
         # string representation of args must be an empty string instead of
@@ -328,7 +356,6 @@ def parse_command_only(self, rawinput: str) -> Statement:
         statement.raw = rawinput
         statement.command = command
         statement.args = args
-        statement.argv = tokens
         return statement
 
     def _expand(self, line: str) -> str:
@@ -355,7 +382,7 @@ def _expand(self, line: str) -> str:
 
         # expand shortcuts
         for (shortcut, expansion) in self.shortcuts:
-            if  line.startswith(shortcut):
+            if line.startswith(shortcut):
                 # If the next character after the shortcut isn't a space, then insert one
                 shortcut_len = len(shortcut)
                 if len(line) == shortcut_len or line[shortcut_len] != ' ':
@@ -383,7 +410,7 @@ def _command_and_args(tokens: List[str]) -> Tuple[str, str]:
         if len(tokens) > 1:
             args = ' '.join(tokens[1:])
 
-        return (command, args)
+        return command, args
 
     @staticmethod
     def _comment_replacer(match):
@@ -400,7 +427,7 @@ def _split_on_punctuation(self, tokens: List[str]) -> List[str]:
         # as word breaks when they are in unquoted strings. Each run of punctuation
         # characters is treated as a single token.
 
-        :param initial_tokens: the tokens as parsed by shlex
+        :param tokens: the tokens as parsed by shlex
         :return: the punctuated tokens
         """
         punctuation = []
diff --git a/tests/test_cmd2.py b/tests/test_cmd2.py
@@ -1723,3 +1723,21 @@ def test_ppaged(base_app):
     base_app.ppaged(msg)
     out = base_app.stdout.buffer
     assert out == msg + end
+
+# we override cmd.parseline() so we always get consistent
+# command parsing by parent methods we don't override
+# don't need to test all the parsing logic here, because
+# parseline just calls StatementParser.parse_command_only()
+def test_parseline_empty(base_app):
+    statement = ''
+    command, args, line = base_app.parseline(statement)
+    assert not command
+    assert not args
+    assert not line
+
+def test_parseline(base_app):
+    statement = " command with 'partially completed quotes  "
+    command, args, line = base_app.parseline(statement)
+    assert command == 'command'
+    assert args == "with 'partially completed quotes"
+    assert line == statement.strip()
diff --git a/tests/test_parsing.py b/tests/test_parsing.py
@@ -44,6 +44,10 @@ def test_tokenize(parser, line, tokens):
     tokens_to_test = parser.tokenize(line)
     assert tokens_to_test == tokens
 
+def test_tokenize_unclosed_quotes(parser):
+    with pytest.raises(ValueError):
+        tokens = parser.tokenize('command with "unclosed quotes')
+
 @pytest.mark.parametrize('tokens,command,args', [
     ([], None, None),
     (['command'], 'command', None),
@@ -219,7 +223,7 @@ def test_parse_output_to_paste_buffer(parser):
     assert statement.argv == ['output', 'to', 'paste', 'buffer']
     assert statement.output == '>>'
 
-def test_has_redirect_inside_terminator(parser):
+def test_parse_redirect_inside_terminator(parser):
     """The terminator designates the end of the commmand/arguments portion.  If a redirector
     occurs before a terminator, then it will be treated as part of the arguments and not as a redirector."""
     line = 'has > inside;'
@@ -307,6 +311,10 @@ def test_parse_redirect_to_unicode_filename(parser):
     assert statement.output == '>'
     assert statement.output_to == 'café'
 
+def test_parse_unclosed_quotes(parser):
+    with pytest.raises(ValueError):
+        tokens = parser.tokenize("command with 'unclosed quotes")
+
 def test_empty_statement_raises_exception():
     app = cmd2.Cmd()
     with pytest.raises(cmd2.EmptyStatement):
@@ -372,7 +380,6 @@ def test_parse_command_only_command_and_args(parser):
     statement = parser.parse_command_only(line)
     assert statement.command == 'help'
     assert statement.args == 'history'
-    assert statement.argv == ['help', 'history']
     assert statement.command_and_args == line
 
 def test_parse_command_only_emptyline(parser):
@@ -392,22 +399,19 @@ def test_parse_command_only_strips_line(parser):
     statement = parser.parse_command_only(line)
     assert statement.command == 'help'
     assert statement.args == 'history'
-    assert statement.argv == ['help', 'history']
     assert statement.command_and_args == line.strip()
 
 def test_parse_command_only_expands_alias(parser):
     line = 'fake foobar.py'
     statement = parser.parse_command_only(line)
     assert statement.command == 'pyscript'
     assert statement.args == 'foobar.py'
-    assert statement.argv == ['pyscript', 'foobar.py']
 
 def test_parse_command_only_expands_shortcuts(parser):
     line = '!cat foobar.txt'
     statement = parser.parse_command_only(line)
     assert statement.command == 'shell'
     assert statement.args == 'cat foobar.txt'
-    assert statement.argv == ['shell', 'cat', 'foobar.txt']
     assert statement.command_and_args == 'shell cat foobar.txt'
 
 def test_parse_command_only_quoted_args(parser):