Skip to content

Commit ddf4cc9

Browse files
authored
Merge pull request #13 from Samsu-F/parser_rewrite
Parser rewrite
2 parents 68b56f7 + 6e86aa1 commit ddf4cc9

File tree

3 files changed

+153
-25
lines changed

3 files changed

+153
-25
lines changed

README.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -76,13 +76,14 @@ Handles options with values
7676
<tr>
7777
<td width="50%">
7878

79-
### 🔀 Pipe Support
79+
### 🔀 Complex Syntax Support
8080

81-
Detects correct command in pipelines
81+
Detects correct command in pipelines, command substitutions & more
8282

8383
```
8484
cat file | grep -i → opens man grep
85-
tree | less -N → opens man less
85+
if true && ! [ -e → opens man test at -e
86+
printf "$(pwd; ls → opens man ls
8687
```
8788

8889
</td>

lib/parser.zsh

Lines changed: 145 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,154 @@
11
# lib/parser.zsh - Word and command parsing utilities
22
# Extracts words and commands from the command line buffer
33

4-
# Get the word at the current cursor position
5-
# Uses LBUFFER and RBUFFER which are ZLE special variables
6-
zvm_parse_word_at_cursor() {
7-
local left="${LBUFFER##*[[:space:]]}"
8-
local right="${RBUFFER%%[[:space:]]*}"
9-
echo "${left}${right}"
4+
# Calculates the index of the token under the cursor within the tokenization
5+
# (i.e. ${(z)myvar}) of the argument string
6+
zvm_token_index_at_stringpos(){
7+
local line="$1"
8+
local stringpos="$2"
9+
local left="${line[1,stringpos]}"
10+
local right="${line[stringpos+1,-1]}"
11+
local right_tokens=(${(Z+C+)right})
12+
local first_right_token="${right_tokens[1]:-}"
13+
local left_tokens=(${(Z+C+)left})
14+
if [[ $left == *[[:space:]] ]] || (( ${#left_tokens} == 0 )); then
15+
left+="${first_right_token}"
16+
left_tokens=(${(Z+C+)left})
17+
fi
18+
echo ${#left_tokens}
1019
}
1120

12-
# Get the current command segment (handles pipes)
13-
# Returns the text after the last pipe before cursor
14-
zvm_get_current_segment() {
15-
local segment="${LBUFFER##*|}"
16-
# Trim leading whitespace
17-
segment="${segment#"${segment%%[![:space:]]*}"}"
18-
echo "$segment"
21+
# returns with status 0 iff $1 is a token that separates different segments, e.g. ';'
22+
zvm_token_is_segment_separator() {
23+
case "$1" in
24+
'||'|'|'|'&'|'&&'|';'|'{'|'}'|'('|')'|'"')
25+
return 0
26+
;;
27+
esac
28+
return 1
29+
}
30+
31+
# returns with status 0 iff $1 is a reserved word in zsh that should be skipped by this plugin.
32+
# e.g., if you type "if ! test -f", the man page for test should be opened on '-f', instead of
33+
# searching for '-f' in the if man page, which is not even about the shell keyword 'if'
34+
zvm_token_is_skipped_resword() {
35+
if (( $reswords[(Ie)$1] )) && ! man -w 1 $1 &>/dev/null; then
36+
return 0
37+
fi
38+
return 1
39+
}
40+
41+
# Returns the command segment of the line $1 at stringpos $2.
42+
# This function tries to avoid returning segments containing separator tokens and
43+
# therefore, the returned segment may not necessarily be aroud stringpos.
44+
# Does not descend into tokens that are nested commands.
45+
zvm_segment_at_stringpos() {
46+
local line="$1"
47+
local stringpos=$2
48+
local -a tokens=(${(Z+C+)line})
49+
(( ${#tokens} == 0 )) && return
50+
local last_token_idx=$(zvm_token_index_at_stringpos "$line" $stringpos)
51+
if zvm_token_is_segment_separator "${tokens[last_token_idx]}" && (( last_token_idx < ${#tokens} )); then
52+
(( last_token_idx++ )) # use the next segment e.g. if cursor is immediately after a semicolon
53+
fi
54+
while (( last_token_idx > 1 )) && zvm_token_is_segment_separator "${tokens[last_token_idx]}"; do
55+
(( last_token_idx-- )) # we don't want the result segment to end with a separator
56+
done
57+
local first_token_idx=$last_token_idx
58+
while (( first_token_idx > 1 )) && ! zvm_token_is_segment_separator "${tokens[first_token_idx-1]}"; do
59+
(( first_token_idx-- )) # previous token also belongs to the segment
60+
done
61+
while (( first_token_idx < last_token_idx )) && zvm_token_is_skipped_resword "${tokens[first_token_idx]}"; do
62+
(( first_token_idx++ )) # current token is not part of the segment
63+
done
64+
local segment="${tokens[first_token_idx,last_token_idx]}"
65+
printf '%s' "$segment" # to prevent escape sequence interpretation, do not use echo here
66+
}
67+
68+
# in string $1, find the stringpos of the first unmatched closing ')' token.
69+
# prints -1 iff there is no such token
70+
# For example:
71+
# input 'foo)bar' --> output 4
72+
# input '(x))bar' --> output 4
73+
# input 'echo ")")' --> output 9
74+
# input 'echo ")"' --> output -1
75+
zvm_stringpos_of_closing_parenthesis() {
76+
local -i stringpos=1
77+
local -i nesting_depth=0
78+
local string="$1"
79+
while (( stringpos <= ${#string} )); do
80+
if [[ "${string[stringpos]}" == [[:space:]] ]]; then
81+
(( stringpos++ ))
82+
continue
83+
fi
84+
local -a tokens=(${(Z+C+)${string[stringpos,-1]}})
85+
local first_token="${tokens[1]}"
86+
if [[ "$first_token" == ')' ]] && (( nesting_depth == 0 )); then
87+
echo $stringpos
88+
return
89+
elif [[ "$first_token" == ')' ]]; then
90+
(( nesting_depth-- ))
91+
(( stringpos++ ))
92+
elif [[ "$first_token" == '(' ]]; then
93+
(( nesting_depth++ ))
94+
(( stringpos++ ))
95+
else
96+
(( stringpos+=${#first_token} ))
97+
fi
98+
done
99+
echo "-1"
100+
}
101+
102+
# Based on zvm_segment_at_stringpos but descends into nested subcommands
103+
zvm_nested_segment_at_stringpos() {
104+
local string="$1"
105+
local stringpos=$2
106+
local -i skipped_prefix=${3:-0}
107+
local segment="$(zvm_segment_at_stringpos "$string" $stringpos)"
108+
local -a segment_tokens=(${(Z+C+)segment})
109+
local last_segment_token="${segment_tokens[-1]:-}"
110+
local left="${string[1,stringpos]}"
111+
local -a left_tokens=(${(Z+C+)left})
112+
local last_left_token="${left_tokens[-1]:-}" # this is not necessarily part of the last segment token!
113+
if [[ "$last_segment_token" != "$last_left_token"* ]]; then
114+
# never descend if we are behind a separator ending the segment, e.g. if cursor is after the pipe in `echo $(ls) |`
115+
printf '%s' "$segment" # to prevent escape sequence interpretation, do not use echo here
116+
return
117+
fi
118+
local match mbegin mend # special parameters created by zsh when using [[ =~ ]]; for good style we don't want them to be global
119+
if (( skipped_prefix == 0 )) && [[ "$last_left_token" =~ '^(([^$\\`"'"']*('[^']*')*)*\"?)" ]]; then
120+
skipped_prefix=${#match[1]} # skip until first double quote, first unmatched single quote, or first special character outside of single quotes
121+
fi
122+
if [[ "$last_left_token" =~ '^.{'"$skipped_prefix"'}[^"`$\\'"']*'" ]]; then # if all relevant characters are after an unmatched single quote
123+
printf '%s' "$segment" # to prevent escape sequence interpretation, do not use echo here
124+
return
125+
fi
126+
if [[ "$last_left_token" =~ '^(.{'"$skipped_prefix"'}[^"`$\\]*\$\().*$' || \
127+
"$last_left_token" =~ '^(.{'"$skipped_prefix"'}[^"`$\\]*<\().*$' ]]; then
128+
local cutoff=${#match[1]} # the length of the prefix that we want to cut off. match is a special zsh variable
129+
local remaining_suffix="${last_segment_token[cutoff+1,-1]}" # the part of the last segment token after the opening $( or <(
130+
local stringpos_in_rem_suffix=$(( ${#last_left_token} - cutoff ))
131+
local pos_closing_parenthesis="$(zvm_stringpos_of_closing_parenthesis "$remaining_suffix")"
132+
if (( pos_closing_parenthesis > 0 && pos_closing_parenthesis <= stringpos_in_rem_suffix )); then
133+
# if the command substituation found is closed to the left of stringpos
134+
zvm_nested_segment_at_stringpos "$1" $2 $(( cutoff + pos_closing_parenthesis ))
135+
return $?
136+
else # if stringpos is within the command substitution found ==> descend into nested command
137+
string="${remaining_suffix[1,pos_closing_parenthesis]}" # pos_closing_parenthesis may be -1 if it does not exist ==> until end
138+
zvm_nested_segment_at_stringpos "$string" $stringpos_in_rem_suffix 0
139+
return $?
140+
fi
141+
elif [[ "$last_left_token" =~ '^(.{'"$skipped_prefix"'}[^"`$\\]*\$[^"`\(\\]).*$' ]] || \
142+
[[ "$last_left_token" =~ '^(.{'"$skipped_prefix"'}[^"`$\\]*\\.).*$' ]]; then
143+
# skip parameter expansion or backslash escaped character
144+
zvm_nested_segment_at_stringpos "$1" $2 ${#match[1]}
145+
return $?
146+
fi
147+
printf '%s' "$segment" # to prevent escape sequence interpretation, do not use echo here
19148
}
20149

21-
# Extract the command name from a segment
22-
# Takes the first word of the segment
23-
zvm_parse_command() {
24-
local segment
25-
segment=$(zvm_get_current_segment)
26-
echo "${segment%%[[:space:]]*}"
150+
zvm_get_current_segment() {
151+
zvm_nested_segment_at_stringpos "$BUFFER" $CURSOR
27152
}
28153

29154
# Determine the man page to open, checking for subcommands
@@ -32,6 +157,7 @@ zvm_parse_command() {
32157
zvm_determine_man_page() {
33158
local cmd="$1"
34159
local segment="$2"
160+
[[ "$cmd" == '[' ]] && cmd=test
35161
local man_page="$cmd"
36162

37163
local rest="${segment#*[[:space:]]}"

zsh-vi-man.zsh

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,16 +34,17 @@ source "${ZVM_LIB_DIR}/keybinding.zsh"
3434
# Main widget function - orchestrates the man page lookup
3535
function zvm-man() {
3636
# Parse current context
37-
local word=$(zvm_parse_word_at_cursor)
38-
local cmd=$(zvm_parse_command)
37+
local current_segment="$(zvm_get_current_segment)"
38+
local -a segment_tokens=(${(Z+C+)current_segment})
39+
local word="${segment_tokens[-1]:-}"
40+
local cmd="${segment_tokens[1]:-}"
3941

4042
if [[ -z "$cmd" ]]; then
4143
zle -M "No command found"
4244
return 1
4345
fi
4446

4547
# Determine the man page to open (may include subcommand)
46-
local current_segment=$(zvm_get_current_segment)
4748
local man_page=$(zvm_determine_man_page "$cmd" "$current_segment")
4849

4950
# Clear screen and open man page with appropriate pager

0 commit comments

Comments
 (0)