Skip to content

Commit 2488ee1

Browse files
committed
Rewrite command-line splitting to tokenize quoted strings
This replaces the previous simple "split on breaks" regex with a function that uses a more complete regex and returns tokens shapes. This vastly improves the readability of CompletionContext::splitCommand() Partially resolves #67 (this doesn't support output of quoted strings yet)
1 parent cd73886 commit 2488ee1

File tree

3 files changed

+174
-37
lines changed

3 files changed

+174
-37
lines changed

src/CompletionContext.php

Lines changed: 121 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ class CompletionContext
6161
*
6262
* @var string
6363
*/
64-
protected $wordBreaks = "'\"()= \t\n";
64+
protected $wordBreaks = "= \t\n";
6565

6666
/**
6767
* Set the whole contents of the command line as a string
@@ -178,12 +178,15 @@ public function setCharIndex($index)
178178
* This defaults to a sane value based on BASH's word break characters and shouldn't
179179
* need to be changed unless your completions contain the default word break characters.
180180
*
181+
* @deprecated This is becoming an internal setting that doesn't make sense to expose publicly.
182+
*
181183
* @see wordBreaks
182184
* @param string $charList - a single string containing all of the characters to break words on
183185
*/
184186
public function setWordBreaks($charList)
185187
{
186-
$this->wordBreaks = $charList;
188+
// Drop quotes from break characters - strings are handled separately to word breaks now
189+
$this->wordBreaks = str_replace(array('"', '\''), '', $charList);;
187190
$this->reset();
188191
}
189192

@@ -194,55 +197,136 @@ public function setWordBreaks($charList)
194197
*/
195198
protected function splitCommand()
196199
{
197-
$this->words = array();
198-
$this->wordIndex = null;
199-
$cursor = 0;
200-
201-
$breaks = preg_quote($this->wordBreaks);
202-
203-
if (!preg_match_all("/([^$breaks]*)([$breaks]*)/", $this->commandLine, $matches)) {
204-
return;
205-
}
206-
207-
// Groups:
208-
// 1: Word
209-
// 2: Break characters
210-
foreach ($matches[0] as $index => $wholeMatch) {
211-
// Determine which word the cursor is in
212-
$cursor += strlen($wholeMatch);
213-
$word = $matches[1][$index];
214-
$breaks = $matches[2][$index];
215-
216-
if ($this->wordIndex === null && $cursor >= $this->charIndex) {
217-
$this->wordIndex = $index;
200+
$tokens = $this->tokenizeString($this->commandLine);
218201

219-
// Find the user's cursor position relative to the end of this word
220-
// The end of the word is the internal cursor minus any break characters that were captured
221-
$cursorWordOffset = $this->charIndex - ($cursor - strlen($breaks));
202+
foreach ($tokens as $token) {
203+
if ($token['type'] != 'break') {
204+
$this->words[] = $this->getTokenValue($token);
205+
}
222206

223-
if ($cursorWordOffset < 0) {
224-
// Cursor is inside the word - truncate the word at the cursor
225-
// (This emulates normal BASH completion behaviour I've observed, though I'm not entirely sure if it's useful)
226-
$word = substr($word, 0, strlen($word) + $cursorWordOffset);
207+
// Determine which word index the cursor is inside once we reach it's offset
208+
if ($this->wordIndex === null && $this->charIndex <= $token['offsetEnd']) {
209+
$this->wordIndex = count($this->words) - 1;
227210

228-
} elseif ($cursorWordOffset > 0) {
211+
if ($token['type'] == 'break') {
229212
// Cursor is in the break-space after a word
230213
// Push an empty word at the cursor to allow completion of new terms at the cursor, ignoring words ahead
231214
$this->wordIndex++;
232-
$this->words[] = $word;
233215
$this->words[] = '';
234216
continue;
235217
}
236-
}
237218

238-
if ($word !== '') {
239-
$this->words[] = $word;
219+
if ($this->charIndex < $token['offsetEnd']) {
220+
// Cursor is inside the current word - truncate the word at the cursor
221+
// (This emulates normal BASH completion behaviour I've observed, though I'm not entirely sure if it's useful)
222+
$relativeOffset = $this->charIndex - $token['offset'];
223+
$truncated = substr($token['value'], 0, $relativeOffset);
224+
225+
$this->words[$this->wordIndex] = $truncated;
226+
}
240227
}
241228
}
242229

243-
if ($this->wordIndex > count($this->words) - 1) {
244-
$this->wordIndex = count($this->words) - 1;
230+
// Cursor position is past the end of the command line string - consider it a new word
231+
if ($this->wordIndex === null) {
232+
$this->wordIndex = count($this->words);
233+
$this->words[] = '';
234+
}
235+
}
236+
237+
/**
238+
* Return a token's value with escaping and quotes removed
239+
*
240+
* @see self::tokenizeString()
241+
* @param array $token
242+
* @return string
243+
*/
244+
protected function getTokenValue($token)
245+
{
246+
$value = $token['value'];
247+
248+
// Remove outer quote characters (or first quote if unclosed)
249+
if ($token['type'] == 'quoted') {
250+
$value = preg_replace('/^(?:[\'"])(.*?)(?:[\'"])?$/', '$1', $value);
245251
}
252+
253+
// Remove escape characters
254+
$value = preg_replace('/\\\\(.)/', '$1', $value);
255+
256+
return $value;
257+
}
258+
259+
/**
260+
* Break a string into words, quoted strings and non-words (breaks)
261+
*
262+
* Returns an array of unmodified segments of $string with offset and type information.
263+
*
264+
* @param string $string
265+
* @return array as [ [type => string, value => string, offset => int], ... ]
266+
*/
267+
protected function tokenizeString($string)
268+
{
269+
// Map capture groups to returned token type
270+
$typeMap = array(
271+
'double_quote_string' => 'quoted',
272+
'single_quote_string' => 'quoted',
273+
'word' => 'word',
274+
'break' => 'break',
275+
);
276+
277+
// Escape every word break character including whitespace
278+
// preg_quote won't work here as it doesn't understand the ignore whitespace flag ("x")
279+
$breaks = preg_replace('/(.)/', '\\\$1', $this->wordBreaks);
280+
281+
$pattern = <<<"REGEX"
282+
/(?:
283+
(?P<double_quote_string>
284+
"(\\\\.|[^\"\\\\])*(?:"|$)
285+
) |
286+
(?P<single_quote_string>
287+
'(\\\\.|[^'\\\\])*(?:'|$)
288+
) |
289+
(?P<word>
290+
(?:\\\\.|[^$breaks])+
291+
) |
292+
(?P<break>
293+
[$breaks]+
294+
)
295+
)/x
296+
REGEX;
297+
298+
$tokens = array();
299+
300+
if (!preg_match_all($pattern, $string, $matches, PREG_OFFSET_CAPTURE | PREG_SET_ORDER)) {
301+
return $tokens;
302+
}
303+
304+
foreach ($matches as $set) {
305+
foreach ($set as $groupName => $match) {
306+
307+
// Ignore integer indices preg_match outputs (duplicates of named groups)
308+
if (is_integer($groupName)) {
309+
continue;
310+
}
311+
312+
// Skip if the offset indicates this group didn't match
313+
if ($match[1] === -1) {
314+
continue;
315+
}
316+
317+
$tokens[] = array(
318+
'type' => $typeMap[$groupName],
319+
'value' => $match[0],
320+
'offset' => $match[1],
321+
'offsetEnd' => $match[1] + strlen($match[0])
322+
);
323+
324+
// Move to the next set (only one group should match per set)
325+
continue;
326+
}
327+
}
328+
329+
return $tokens;
246330
}
247331

248332
/**

tests/Stecman/Component/Symfony/Console/BashCompletion/CompletionContextTest.php

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,44 @@ public function testWordBreakingWithSmallInputs()
9292
$this->assertEquals('', $context->getCurrentWord());
9393
}
9494

95+
public function testQuotedStringWordBreaking()
96+
{
97+
$context = new CompletionContext();
98+
$context->setCharIndex(1000);
99+
$context->setCommandLine('make horse --legs=3 --name="Jeff the horse" --colour Extreme\ Blanc \'foo " bar\'');
100+
101+
// Ensure spaces and quotes
102+
$this->assertEquals(
103+
array(
104+
'make',
105+
'horse',
106+
'--legs',
107+
'3',
108+
'--name',
109+
'Jeff the horse',
110+
'--colour',
111+
'Extreme Blanc',
112+
'foo " bar',
113+
'',
114+
),
115+
$context->getWords()
116+
);
117+
118+
$context = new CompletionContext();
119+
$context->setCommandLine('console --tag=');
120+
121+
// Cursor after equals symbol on option argument
122+
$context->setCharIndex(14);
123+
$this->assertEquals(
124+
array(
125+
'console',
126+
'--tag',
127+
''
128+
),
129+
$context->getWords()
130+
);
131+
}
132+
95133
public function testConfigureFromEnvironment()
96134
{
97135
putenv("CMDLINE_CONTENTS=beam up li");

tests/Stecman/Component/Symfony/Console/BashCompletion/CompletionHandlerTest.php

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,21 @@ public function testCompleteOptionFull()
8080
$this->assertArraySubset(array('--jazz-hands'), $this->getTerms($handler->runCompletion()));
8181
}
8282

83+
public function testCompleteOptionEqualsValue()
84+
{
85+
// Cursor at the "=" sign
86+
$handler = $this->createHandler('app completion-aware --option-with-suggestions=');
87+
$this->assertEquals(array('one-opt', 'two-opt'), $this->getTerms($handler->runCompletion()));
88+
89+
// Cursor at an opening quote
90+
$handler = $this->createHandler('app completion-aware --option-with-suggestions="');
91+
$this->assertEquals(array('one-opt', 'two-opt'), $this->getTerms($handler->runCompletion()));
92+
93+
// Cursor inside a quote with value
94+
$handler = $this->createHandler('app completion-aware --option-with-suggestions="two');
95+
$this->assertEquals(array('two-opt'), $this->getTerms($handler->runCompletion()));
96+
}
97+
8398
public function testCompleteOptionOrder()
8499
{
85100
// Completion of options should be able to happen anywhere after the command name

0 commit comments

Comments
 (0)