11# lib/parser.zsh - Word and command parsing utilities
22# Extracts words and commands from the command line buffer
33
4- # Get the word at the current cursor position
5- # Uses LBUFFER and RBUFFER which are ZLE special variables
6- zvm_parse_word_at_cursor () {
7- local left=" ${LBUFFER##* [[:space:]]} "
8- local right=" ${RBUFFER%% [[:space:]]* } "
9- echo " ${left}${right} "
4+ # Calculates the index of the token under the cursor within the tokenization
5+ # (i.e. ${(z)myvar}) of the argument string
6+ zvm_token_index_at_stringpos (){
7+ local line=" $1 "
8+ local stringpos=" $2 "
9+ local left=" ${line[1,stringpos]} "
10+ local right=" ${line[stringpos+1,-1]} "
11+ local right_tokens=(${(Z+C+)right} )
12+ local first_right_token=" ${right_tokens[1]:- } "
13+ local left_tokens=(${(Z+C+)left} )
14+ if [[ $left == * [[:space:]] ]] || (( ${# left_tokens} == 0 )) ; then
15+ left+=" ${first_right_token} "
16+ left_tokens=(${(Z+C+)left} )
17+ fi
18+ echo ${# left_tokens}
1019}
1120
12- # Get the current command segment (handles pipes)
13- # Returns the text after the last pipe before cursor
14- zvm_get_current_segment () {
15- local segment=" ${LBUFFER##* |} "
16- # Trim leading whitespace
17- segment=" ${segment# " ${segment%% [![:space:]]* } " } "
18- echo " $segment "
21+ # returns with status 0 iff $1 is a token that separates different segments, e.g. ';'
22+ zvm_token_is_segment_separator () {
23+ case " $1 " in
24+ ' ||' |' |' |' &' |' &&' |' ;' |' {' |' }' |' (' |' )' |' "' )
25+ return 0
26+ ;;
27+ esac
28+ return 1
29+ }
30+
31+ # returns with status 0 iff $1 is a reserved word in zsh that should be skipped by this plugin.
32+ # e.g., if you type "if ! test -f", the man page for test should be opened on '-f', instead of
33+ # searching for '-f' in the if man page, which is not even about the shell keyword 'if'
34+ zvm_token_is_skipped_resword () {
35+ if (( $reswords [(Ie)$1 ] )) && ! man -w 1 $1 & > /dev/null; then
36+ return 0
37+ fi
38+ return 1
39+ }
40+
41+ # Returns the command segment of the line $1 at stringpos $2.
42+ # This function tries to avoid returning segments containing separator tokens and
43+ # therefore, the returned segment may not necessarily be aroud stringpos.
44+ # Does not descend into tokens that are nested commands.
45+ zvm_segment_at_stringpos () {
46+ local line=" $1 "
47+ local stringpos=$2
48+ local -a tokens=(${(Z+C+)line} )
49+ (( ${# tokens} == 0 )) && return
50+ local last_token_idx=$( zvm_token_index_at_stringpos " $line " $stringpos )
51+ if zvm_token_is_segment_separator " ${tokens[last_token_idx]} " && (( last_token_idx < ${# tokens} )) ; then
52+ (( last_token_idx++ )) # use the next segment e.g. if cursor is immediately after a semicolon
53+ fi
54+ while (( last_token_idx > 1 )) && zvm_token_is_segment_separator " ${tokens[last_token_idx]} " ; do
55+ (( last_token_idx-- )) # we don't want the result segment to end with a separator
56+ done
57+ local first_token_idx=$last_token_idx
58+ while (( first_token_idx > 1 )) && ! zvm_token_is_segment_separator " ${tokens[first_token_idx-1]} " ; do
59+ (( first_token_idx-- )) # previous token also belongs to the segment
60+ done
61+ while (( first_token_idx < last_token_idx )) && zvm_token_is_skipped_resword " ${tokens[first_token_idx]} " ; do
62+ (( first_token_idx++ )) # current token is not part of the segment
63+ done
64+ local segment=" ${tokens[first_token_idx,last_token_idx]} "
65+ printf ' %s' " $segment " # to prevent escape sequence interpretation, do not use echo here
66+ }
67+
68+ # in string $1, find the stringpos of the first unmatched closing ')' token.
69+ # prints -1 iff there is no such token
70+ # For example:
71+ # input 'foo)bar' --> output 4
72+ # input '(x))bar' --> output 4
73+ # input 'echo ")")' --> output 9
74+ # input 'echo ")"' --> output -1
75+ zvm_stringpos_of_closing_parenthesis () {
76+ local -i stringpos=1
77+ local -i nesting_depth=0
78+ local string=" $1 "
79+ while (( stringpos <= ${# string} )) ; do
80+ if [[ " ${string[stringpos]} " == [[:space:]] ]]; then
81+ (( stringpos++ ))
82+ continue
83+ fi
84+ local -a tokens=(${(Z+C+)${string[stringpos,-1]} } )
85+ local first_token=" ${tokens[1]} "
86+ if [[ " $first_token " == ' )' ]] && (( nesting_depth == 0 )) ; then
87+ echo $stringpos
88+ return
89+ elif [[ " $first_token " == ' )' ]]; then
90+ (( nesting_depth-- ))
91+ (( stringpos++ ))
92+ elif [[ " $first_token " == ' (' ]]; then
93+ (( nesting_depth++ ))
94+ (( stringpos++ ))
95+ else
96+ (( stringpos+= ${# first_token} ))
97+ fi
98+ done
99+ echo " -1"
100+ }
101+
102+ # Based on zvm_segment_at_stringpos but descends into nested subcommands
103+ zvm_nested_segment_at_stringpos () {
104+ local string=" $1 "
105+ local stringpos=$2
106+ local -i skipped_prefix=${3:- 0}
107+ local segment=" $( zvm_segment_at_stringpos " $string " $stringpos ) "
108+ local -a segment_tokens=(${(Z+C+)segment} )
109+ local last_segment_token=" ${segment_tokens[-1]:- } "
110+ local left=" ${string[1,stringpos]} "
111+ local -a left_tokens=(${(Z+C+)left} )
112+ local last_left_token=" ${left_tokens[-1]:- } " # this is not necessarily part of the last segment token!
113+ if [[ " $last_segment_token " != " $last_left_token " * ]]; then
114+ # never descend if we are behind a separator ending the segment, e.g. if cursor is after the pipe in `echo $(ls) |`
115+ printf ' %s' " $segment " # to prevent escape sequence interpretation, do not use echo here
116+ return
117+ fi
118+ local match mbegin mend # special parameters created by zsh when using [[ =~ ]]; for good style we don't want them to be global
119+ if (( skipped_prefix == 0 )) && [[ " $last_left_token " =~ ' ^(([^$\\`"' " ']*('[^']*')*)*\" ?)" ]]; then
120+ skipped_prefix=${# match[1]} # skip until first double quote, first unmatched single quote, or first special character outside of single quotes
121+ fi
122+ if [[ " $last_left_token " =~ ' ^.{' " $skipped_prefix " ' }[^"`$\\' " ']*'" ]]; then # if all relevant characters are after an unmatched single quote
123+ printf ' %s' " $segment " # to prevent escape sequence interpretation, do not use echo here
124+ return
125+ fi
126+ if [[ " $last_left_token " =~ ' ^(.{' " $skipped_prefix " ' }[^"`$\\]*\$\().*$' || \
127+ " $last_left_token " =~ ' ^(.{' " $skipped_prefix " ' }[^"`$\\]*<\().*$' ]]; then
128+ local cutoff=${# match[1]} # the length of the prefix that we want to cut off. match is a special zsh variable
129+ local remaining_suffix=" ${last_segment_token[cutoff+1,-1]} " # the part of the last segment token after the opening $( or <(
130+ local stringpos_in_rem_suffix=$(( ${# last_left_token} - cutoff ))
131+ local pos_closing_parenthesis=" $( zvm_stringpos_of_closing_parenthesis " $remaining_suffix " ) "
132+ if (( pos_closing_parenthesis > 0 && pos_closing_parenthesis <= stringpos_in_rem_suffix )) ; then
133+ # if the command substituation found is closed to the left of stringpos
134+ zvm_nested_segment_at_stringpos " $1 " $2 $(( cutoff + pos_closing_parenthesis ))
135+ return $?
136+ else # if stringpos is within the command substitution found ==> descend into nested command
137+ string=" ${remaining_suffix[1,pos_closing_parenthesis]} " # pos_closing_parenthesis may be -1 if it does not exist ==> until end
138+ zvm_nested_segment_at_stringpos " $string " $stringpos_in_rem_suffix 0
139+ return $?
140+ fi
141+ elif [[ " $last_left_token " =~ ' ^(.{' " $skipped_prefix " ' }[^"`$\\]*\$[^"`\(\\]).*$' ]] || \
142+ [[ " $last_left_token " =~ ' ^(.{' " $skipped_prefix " ' }[^"`$\\]*\\.).*$' ]]; then
143+ # skip parameter expansion or backslash escaped character
144+ zvm_nested_segment_at_stringpos " $1 " $2 ${# match[1]}
145+ return $?
146+ fi
147+ printf ' %s' " $segment " # to prevent escape sequence interpretation, do not use echo here
19148}
20149
21- # Extract the command name from a segment
22- # Takes the first word of the segment
23- zvm_parse_command () {
24- local segment
25- segment=$( zvm_get_current_segment)
26- echo " ${segment%% [[:space:]]* } "
150+ zvm_get_current_segment () {
151+ zvm_nested_segment_at_stringpos " $BUFFER " $CURSOR
27152}
28153
29154# Determine the man page to open, checking for subcommands
@@ -32,6 +157,7 @@ zvm_parse_command() {
32157zvm_determine_man_page () {
33158 local cmd=" $1 "
34159 local segment=" $2 "
160+ [[ " $cmd " == ' [' ]] && cmd=test
35161 local man_page=" $cmd "
36162
37163 local rest=" ${segment#* [[:space:]]} "
0 commit comments