Skip to content

Commit bfdb482

Browse files
authored
Merge pull request #390 from python-cmd2/refactor_parseline
Refactor parseline()
2 parents 7f33f04 + 2f102b9 commit bfdb482

File tree

4 files changed

+95
-101
lines changed

4 files changed

+95
-101
lines changed

cmd2/cmd2.py

Lines changed: 8 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -1577,12 +1577,9 @@ def complete(self, text, state):
15771577
if begidx > 0:
15781578

15791579
# Parse the command line
1580-
command, args, expanded_line = self.parseline(line)
1581-
1582-
# use these lines instead of the one above
1583-
# statement = self.command_parser.parse_command_only(line)
1584-
# command = statement.command
1585-
# expanded_line = statement.command_and_args
1580+
statement = self.statement_parser.parse_command_only(line)
1581+
command = statement.command
1582+
expanded_line = statement.command_and_args
15861583

15871584
# We overwrote line with a properly formatted but fully stripped version
15881585
# Restore the end spaces since line is only supposed to be lstripped when
@@ -1603,8 +1600,7 @@ def complete(self, text, state):
16031600
tokens, raw_tokens = self.tokens_for_completion(line, begidx, endidx)
16041601

16051602
# Either had a parsing error or are trying to complete the command token
1606-
# The latter can happen if default_to_shell is True and parseline() allowed
1607-
# assumed something like " or ' was a command.
1603+
# The latter can happen if " or ' was entered as the command
16081604
if tokens is None or len(tokens) == 1:
16091605
self.completion_matches = []
16101606
return None
@@ -1924,66 +1920,16 @@ def postparsing_postcmd(self, stop: bool) -> bool:
19241920
def parseline(self, line):
19251921
"""Parse the line into a command name and a string containing the arguments.
19261922
1927-
NOTE: This is an override of a parent class method. It is only used by other parent class methods. But
1928-
we do need to override it here so that the additional shortcuts present in cmd2 get properly expanded for
1929-
purposes of tab completion.
1923+
NOTE: This is an override of a parent class method. It is only used by other parent class methods.
19301924
1931-
Used for command tab completion. Returns a tuple containing (command, args, line).
1932-
'command' and 'args' may be None if the line couldn't be parsed.
1925+
Different from the parent class method, this ignores self.identchars.
19331926
19341927
:param line: str - line read by readline
19351928
:return: (str, str, str) - tuple containing (command, args, line)
19361929
"""
1937-
line = line.strip()
1938-
1939-
if not line:
1940-
# Deal with empty line or all whitespace line
1941-
return None, None, line
1942-
1943-
# Make a copy of aliases so we can edit it
1944-
tmp_aliases = list(self.aliases.keys())
1945-
keep_expanding = len(tmp_aliases) > 0
1946-
1947-
# Expand aliases
1948-
while keep_expanding:
1949-
for cur_alias in tmp_aliases:
1950-
keep_expanding = False
1951-
1952-
if line == cur_alias or line.startswith(cur_alias + ' '):
1953-
line = line.replace(cur_alias, self.aliases[cur_alias], 1)
1954-
1955-
# Do not expand the same alias more than once
1956-
tmp_aliases.remove(cur_alias)
1957-
keep_expanding = len(tmp_aliases) > 0
1958-
break
1959-
1960-
# Expand command shortcut to its full command name
1961-
for (shortcut, expansion) in self.shortcuts:
1962-
if line.startswith(shortcut):
1963-
# If the next character after the shortcut isn't a space, then insert one
1964-
shortcut_len = len(shortcut)
1965-
if len(line) == shortcut_len or line[shortcut_len] != ' ':
1966-
expansion += ' '
1967-
1968-
# Expand the shortcut
1969-
line = line.replace(shortcut, expansion, 1)
1970-
break
1971-
1972-
i, n = 0, len(line)
1973-
1974-
# If we are allowing shell commands, then allow any character in the command
1975-
if self.default_to_shell:
1976-
while i < n and line[i] != ' ':
1977-
i += 1
1978-
1979-
# Otherwise only allow those in identchars
1980-
else:
1981-
while i < n and line[i] in self.identchars:
1982-
i += 1
1983-
1984-
command, arg = line[:i], line[i:].strip()
19851930

1986-
return command, arg, line
1931+
statement = self.statement_parser.parse_command_only(line)
1932+
return statement.command, statement.args, statement.command_and_args
19871933

19881934
def onecmd_plus_hooks(self, line):
19891935
"""Top-level function called by cmdloop() to handle parsing a line and running the command and all of its hooks.

cmd2/parsing.py

Lines changed: 45 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ def command_and_args(self):
8181
return rtn
8282

8383

84-
class StatementParser():
84+
class StatementParser:
8585
"""Parse raw text into command components.
8686
8787
Shortcuts is a list of tuples with each tuple containing the shortcut and the expansion.
@@ -93,7 +93,7 @@ def __init__(
9393
multiline_commands=None,
9494
aliases=None,
9595
shortcuts=None,
96-
):
96+
):
9797
self.allow_redirection = allow_redirection
9898
if terminators is None:
9999
self.terminators = [';']
@@ -144,18 +144,19 @@ def __init__(
144144
# aliases have to be a word, so make a regular expression
145145
# that matches the first word in the line. This regex has two
146146
# parts, the first parenthesis enclosed group matches one
147-
# or more non-whitespace characters, and the second group
148-
# matches either a whitespace character or the end of the
149-
# string. We use \A and \Z to ensure we always match the
150-
# beginning and end of a string that may have multiple
151-
# lines
152-
self.command_pattern = re.compile(r'\A(\S+)(\s|\Z)')
153-
147+
# or more non-whitespace characters (which may be preceeded
148+
# by whitespace) and the second group matches either a whitespace
149+
# character or the end of the string. We use \A and \Z to ensure
150+
# we always match the beginning and end of a string that may have
151+
# multiple lines
152+
self.command_pattern = re.compile(r'\A\s*(\S+)(\s|\Z)+')
154153

155154
def tokenize(self, line: str) -> List[str]:
156155
"""Lex a string into a list of tokens.
157156
158157
Comments are removed, and shortcuts and aliases are expanded.
158+
159+
Raises ValueError if there are unclosed quotation marks.
159160
"""
160161

161162
# strip C-style comments
@@ -177,6 +178,8 @@ def parse(self, rawinput: str) -> Statement:
177178
"""Tokenize the input and parse it into a Statement object, stripping
178179
comments, expanding aliases and shortcuts, and extracting output
179180
redirection directives.
181+
182+
Raises ValueError if there are unclosed quotation marks.
180183
"""
181184

182185
# handle the special case/hardcoded terminator of a blank line
@@ -297,16 +300,40 @@ def parse(self, rawinput: str) -> Statement:
297300
return statement
298301

299302
def parse_command_only(self, rawinput: str) -> Statement:
300-
"""Partially parse input into a Statement object. The command is
301-
identified, and shortcuts and aliases are expanded.
303+
"""Partially parse input into a Statement object.
304+
305+
The command is identified, and shortcuts and aliases are expanded.
302306
Terminators, multiline commands, and output redirection are not
303307
parsed.
308+
309+
This method is used by tab completion code and therefore must not
310+
generate an exception if there are unclosed quotes.
311+
312+
The Statement object returned by this method can at most contained
313+
values in the following attributes:
314+
- raw
315+
- command
316+
- args
317+
318+
Different from parse(), this method does not remove redundant whitespace
319+
within statement.args. It does however, ensure args does not have leading
320+
or trailing whitespace.
304321
"""
305-
# lex the input into a list of tokens
306-
tokens = self.tokenize(rawinput)
322+
# expand shortcuts and aliases
323+
line = self._expand(rawinput)
307324

308-
# parse out the command and everything else
309-
(command, args) = self._command_and_args(tokens)
325+
command = None
326+
args = None
327+
match = self.command_pattern.search(line)
328+
if match:
329+
# we got a match, extract the command
330+
command = match.group(1)
331+
# the command_pattern regex is designed to match the spaces
332+
# between command and args with a second match group. Using
333+
# the end of the second match group ensures that args has
334+
# no leading whitespace. The rstrip() makes sure there is
335+
# no trailing whitespace
336+
args = line[match.end(2):].rstrip()
310337

311338
# build the statement
312339
# string representation of args must be an empty string instead of
@@ -315,7 +342,6 @@ def parse_command_only(self, rawinput: str) -> Statement:
315342
statement.raw = rawinput
316343
statement.command = command
317344
statement.args = args
318-
statement.argv = tokens
319345
return statement
320346

321347
def _expand(self, line: str) -> str:
@@ -342,7 +368,7 @@ def _expand(self, line: str) -> str:
342368

343369
# expand shortcuts
344370
for (shortcut, expansion) in self.shortcuts:
345-
if line.startswith(shortcut):
371+
if line.startswith(shortcut):
346372
# If the next character after the shortcut isn't a space, then insert one
347373
shortcut_len = len(shortcut)
348374
if len(line) == shortcut_len or line[shortcut_len] != ' ':
@@ -370,7 +396,7 @@ def _command_and_args(tokens: List[str]) -> Tuple[str, str]:
370396
if len(tokens) > 1:
371397
args = ' '.join(tokens[1:])
372398

373-
return (command, args)
399+
return command, args
374400

375401
@staticmethod
376402
def _comment_replacer(match):
@@ -387,7 +413,7 @@ def _split_on_punctuation(self, tokens: List[str]) -> List[str]:
387413
# as word breaks when they are in unquoted strings. Each run of punctuation
388414
# characters is treated as a single token.
389415
390-
:param initial_tokens: the tokens as parsed by shlex
416+
:param tokens: the tokens as parsed by shlex
391417
:return: the punctuated tokens
392418
"""
393419
punctuation = []

tests/test_cmd2.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1718,3 +1718,21 @@ def test_ppaged(base_app):
17181718
base_app.ppaged(msg)
17191719
out = base_app.stdout.buffer
17201720
assert out == msg + end
1721+
1722+
# we override cmd.parseline() so we always get consistent
1723+
# command parsing by parent methods we don't override
1724+
# don't need to test all the parsing logic here, because
1725+
# parseline just calls StatementParser.parse_command_only()
1726+
def test_parseline_empty(base_app):
1727+
statement = ''
1728+
command, args, line = base_app.parseline(statement)
1729+
assert not command
1730+
assert not args
1731+
assert not line
1732+
1733+
def test_parseline(base_app):
1734+
statement = " command with 'partially completed quotes "
1735+
command, args, line = base_app.parseline(statement)
1736+
assert command == 'command'
1737+
assert args == "with 'partially completed quotes"
1738+
assert line == statement.strip()

0 commit comments

Comments
 (0)