@@ -81,7 +81,7 @@ def command_and_args(self):
8181 return rtn
8282
8383
84- class StatementParser () :
84+ class StatementParser :
8585 """Parse raw text into command components.
8686
8787 Shortcuts is a list of tuples with each tuple containing the shortcut and the expansion.
@@ -93,7 +93,7 @@ def __init__(
9393 multiline_commands = None ,
9494 aliases = None ,
9595 shortcuts = None ,
96- ):
96+ ):
9797 self .allow_redirection = allow_redirection
9898 if terminators is None :
9999 self .terminators = [';' ]
@@ -169,6 +169,8 @@ def tokenize(self, line: str) -> List[str]:
169169 """Lex a string into a list of tokens.
170170
171171 Comments are removed, and shortcuts and aliases are expanded.
172+
173+ Raises ValueError if there are unclosed quotation marks.
172174 """
173175
174176 # strip C-style comments
@@ -190,6 +192,8 @@ def parse(self, rawinput: str) -> Statement:
190192 """Tokenize the input and parse it into a Statement object, stripping
191193 comments, expanding aliases and shortcuts, and extracting output
192194 redirection directives.
195+
196+ Raises ValueError if there are unclosed quotation marks.
193197 """
194198
195199 # handle the special case/hardcoded terminator of a blank line
@@ -310,16 +314,40 @@ def parse(self, rawinput: str) -> Statement:
310314 return statement
311315
312316 def parse_command_only (self , rawinput : str ) -> Statement :
313- """Partially parse input into a Statement object. The command is
314- identified, and shortcuts and aliases are expanded.
317+ """Partially parse input into a Statement object.
318+
319+ The command is identified, and shortcuts and aliases are expanded.
315320 Terminators, multiline commands, and output redirection are not
316321 parsed.
322+
323+ This method is used by tab completion code and therefore must not
324+ generate an exception if there are unclosed quotes.
325+
326+ The Statement object returned by this method can at most contained
327+ values in the following attributes:
328+ - raw
329+ - command
330+ - args
331+
332+ Different from parse(), this method does not remove redundant whitespace
333+ within statement.args. It does however, ensure args does not have leading
334+ or trailing whitespace.
317335 """
318- # lex the input into a list of tokens
319- tokens = self .tokenize (rawinput )
336+ # expand shortcuts and aliases
337+ line = self ._expand (rawinput )
320338
321- # parse out the command and everything else
322- (command , args ) = self ._command_and_args (tokens )
339+ command = None
340+ args = None
341+ match = self .command_pattern .search (line )
342+ if match :
343+ # we got a match, extract the command
344+ command = match .group (1 )
345+ # the command_pattern regex is designed to match the spaces
346+ # between command and args with a second match group. Using
347+ # the end of the second match group ensures that args has
348+ # no leading whitespace. The rstrip() makes sure there is
349+ # no trailing whitespace
350+ args = line [match .end (2 ):].rstrip ()
323351
324352 # build the statement
325353 # string representation of args must be an empty string instead of
@@ -328,7 +356,6 @@ def parse_command_only(self, rawinput: str) -> Statement:
328356 statement .raw = rawinput
329357 statement .command = command
330358 statement .args = args
331- statement .argv = tokens
332359 return statement
333360
334361 def _expand (self , line : str ) -> str :
@@ -355,7 +382,7 @@ def _expand(self, line: str) -> str:
355382
356383 # expand shortcuts
357384 for (shortcut , expansion ) in self .shortcuts :
358- if line .startswith (shortcut ):
385+ if line .startswith (shortcut ):
359386 # If the next character after the shortcut isn't a space, then insert one
360387 shortcut_len = len (shortcut )
361388 if len (line ) == shortcut_len or line [shortcut_len ] != ' ' :
@@ -383,7 +410,7 @@ def _command_and_args(tokens: List[str]) -> Tuple[str, str]:
383410 if len (tokens ) > 1 :
384411 args = ' ' .join (tokens [1 :])
385412
386- return ( command , args )
413+ return command , args
387414
388415 @staticmethod
389416 def _comment_replacer (match ):
@@ -400,7 +427,7 @@ def _split_on_punctuation(self, tokens: List[str]) -> List[str]:
400427 # as word breaks when they are in unquoted strings. Each run of punctuation
401428 # characters is treated as a single token.
402429
403- :param initial_tokens : the tokens as parsed by shlex
430+ :param tokens : the tokens as parsed by shlex
404431 :return: the punctuated tokens
405432 """
406433 punctuation = []
0 commit comments