@@ -81,7 +81,7 @@ def command_and_args(self):
8181 return rtn
8282
8383
84- class StatementParser () :
84+ class StatementParser :
8585 """Parse raw text into command components.
8686
8787 Shortcuts is a list of tuples with each tuple containing the shortcut and the expansion.
@@ -93,7 +93,7 @@ def __init__(
9393 multiline_commands = None ,
9494 aliases = None ,
9595 shortcuts = None ,
96- ):
96+ ):
9797 self .allow_redirection = allow_redirection
9898 if terminators is None :
9999 self .terminators = [';' ]
@@ -144,18 +144,19 @@ def __init__(
144144 # aliases have to be a word, so make a regular expression
145145 # that matches the first word in the line. This regex has two
146146 # parts, the first parenthesis enclosed group matches one
147- # or more non-whitespace characters, and the second group
148- # matches either a whitespace character or the end of the
149- # string. We use \A and \Z to ensure we always match the
150- # beginning and end of a string that may have multiple
151- # lines
152- self .command_pattern = re .compile (r'\A(\S+)(\s|\Z)' )
153-
147+ # or more non-whitespace characters (which may be preceeded
148+ # by whitespace) and the second group matches either a whitespace
149+ # character or the end of the string. We use \A and \Z to ensure
150+ # we always match the beginning and end of a string that may have
151+ # multiple lines
152+ self .command_pattern = re .compile (r'\A\s*(\S+)(\s|\Z)+' )
154153
155154 def tokenize (self , line : str ) -> List [str ]:
156155 """Lex a string into a list of tokens.
157156
158157 Comments are removed, and shortcuts and aliases are expanded.
158+
159+ Raises ValueError if there are unclosed quotation marks.
159160 """
160161
161162 # strip C-style comments
@@ -177,6 +178,8 @@ def parse(self, rawinput: str) -> Statement:
177178 """Tokenize the input and parse it into a Statement object, stripping
178179 comments, expanding aliases and shortcuts, and extracting output
179180 redirection directives.
181+
182+ Raises ValueError if there are unclosed quotation marks.
180183 """
181184
182185 # handle the special case/hardcoded terminator of a blank line
@@ -297,16 +300,40 @@ def parse(self, rawinput: str) -> Statement:
297300 return statement
298301
299302 def parse_command_only (self , rawinput : str ) -> Statement :
300- """Partially parse input into a Statement object. The command is
301- identified, and shortcuts and aliases are expanded.
303+ """Partially parse input into a Statement object.
304+
305+ The command is identified, and shortcuts and aliases are expanded.
302306 Terminators, multiline commands, and output redirection are not
303307 parsed.
308+
309+ This method is used by tab completion code and therefore must not
310+ generate an exception if there are unclosed quotes.
311+
312+ The Statement object returned by this method can at most contained
313+ values in the following attributes:
314+ - raw
315+ - command
316+ - args
317+
318+ Different from parse(), this method does not remove redundant whitespace
319+ within statement.args. It does however, ensure args does not have leading
320+ or trailing whitespace.
304321 """
305- # lex the input into a list of tokens
306- tokens = self .tokenize (rawinput )
322+ # expand shortcuts and aliases
323+ line = self ._expand (rawinput )
307324
308- # parse out the command and everything else
309- (command , args ) = self ._command_and_args (tokens )
325+ command = None
326+ args = None
327+ match = self .command_pattern .search (line )
328+ if match :
329+ # we got a match, extract the command
330+ command = match .group (1 )
331+ # the command_pattern regex is designed to match the spaces
332+ # between command and args with a second match group. Using
333+ # the end of the second match group ensures that args has
334+ # no leading whitespace. The rstrip() makes sure there is
335+ # no trailing whitespace
336+ args = line [match .end (2 ):].rstrip ()
310337
311338 # build the statement
312339 # string representation of args must be an empty string instead of
@@ -315,7 +342,6 @@ def parse_command_only(self, rawinput: str) -> Statement:
315342 statement .raw = rawinput
316343 statement .command = command
317344 statement .args = args
318- statement .argv = tokens
319345 return statement
320346
321347 def _expand (self , line : str ) -> str :
@@ -342,7 +368,7 @@ def _expand(self, line: str) -> str:
342368
343369 # expand shortcuts
344370 for (shortcut , expansion ) in self .shortcuts :
345- if line .startswith (shortcut ):
371+ if line .startswith (shortcut ):
346372 # If the next character after the shortcut isn't a space, then insert one
347373 shortcut_len = len (shortcut )
348374 if len (line ) == shortcut_len or line [shortcut_len ] != ' ' :
@@ -370,7 +396,7 @@ def _command_and_args(tokens: List[str]) -> Tuple[str, str]:
370396 if len (tokens ) > 1 :
371397 args = ' ' .join (tokens [1 :])
372398
373- return ( command , args )
399+ return command , args
374400
375401 @staticmethod
376402 def _comment_replacer (match ):
@@ -387,7 +413,7 @@ def _split_on_punctuation(self, tokens: List[str]) -> List[str]:
387413 # as word breaks when they are in unquoted strings. Each run of punctuation
388414 # characters is treated as a single token.
389415
390- :param initial_tokens : the tokens as parsed by shlex
416+ :param tokens : the tokens as parsed by shlex
391417 :return: the punctuated tokens
392418 """
393419 punctuation = []
0 commit comments