Skip to content

Commit b42298e

Browse files
committed
Allow single-char identifiers per Lubos
... but add exceptions for getopts-looking single character syntax such as "-x" or "x)". Also: - Space out regexes from JFlex braces for readability.
1 parent a723156 commit b42298e

File tree

5 files changed

+69
-68
lines changed

5 files changed

+69
-68
lines changed

src/org/opensolaris/opengrok/analysis/sh/Sh.lexh

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,10 @@ Number = \$? [0-9]+\.[0-9]+| [0-9][0-9]* | [0][xX] [0-9a-fA-F]+
2828

2929
/*
3030
* Rather than enumerate letters, just treat all hyphen-single-char as a
31-
* unary op.
31+
* unary op. "req_lookahead" because an assertion is needed that it is followed
32+
* by non-word or end-of-line.
3233
*/
33-
Unary_op = [\-][A-Za-z]{WhspChar}
34+
Unary_op_req_lookahead = [\-]{Unary_op_char}
35+
Unary_op_char = [A-Za-z]
3436

3537
Binary_op = [\-]("ef"|"nt"|"ot"|"eq"|"ge"|"gt"|"le"|"lt"|"ne"){WhspChar}

src/org/opensolaris/opengrok/analysis/sh/ShSymbolTokenizer.lex

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,8 @@ super(in);
4444
%%
4545

4646
<YYINITIAL> {
47-
{Identifier} {String id = yytext();
47+
{Identifier} {
48+
String id = yytext();
4849
if(!Consts.shkwd.contains(id)){
4950
setAttribs(id, yychar, yychar + yylength());
5051
return yystate(); }
@@ -54,17 +55,27 @@ super(in);
5455
\' { yybegin(QSTRING); }
5556
"#" { yybegin(SCOMMENT); }
5657

57-
{Unary_op} |
58-
{Binary_op} {}
58+
{Unary_op_req_lookahead} / \W {
59+
// noop
60+
}
61+
{Unary_op_req_lookahead} $ {
62+
// noop
63+
}
64+
{WhiteSpace} {Unary_op_char} / ")" {
65+
// noop
66+
}
67+
{Binary_op} {
68+
// noop
69+
}
5970
}
6071

6172
<STRING> {
62-
"$" {Identifier} {
73+
"$" {Identifier} {
6374
setAttribs(yytext().substring(1), yychar + 1, yychar + yylength());
6475
return yystate();
6576
}
6677

67-
"${" {Identifier} "}" {
78+
"${" {Identifier} "}" {
6879
int startOffset = 2; // trim away the "${" prefix
6980
int endOffset = yylength() - 1; // trim away the "}" suffix
7081
setAttribs(yytext().substring(startOffset, endOffset),

src/org/opensolaris/opengrok/analysis/sh/ShXref.lex

Lines changed: 47 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -120,46 +120,33 @@ File = {FNameChar}+ "." ([a-zA-Z]+)
120120
%include Sh.lexh
121121
%%
122122
<STRING>{
123-
"$" {Identifier} {
123+
"$" {Identifier} {
124124
String id = yytext();
125-
// For historical reasons, ShXref will not link identifiers of length=1
126-
// (or of length=2 with a leading '$')
127-
if (id.length() > 2) {
128-
out.write("<a href=\"");
129-
out.write(urlPrefix);
130-
out.write("refs=");
131-
out.write(id);
132-
appendProject();
133-
out.write("\">");
134-
out.write(id);
135-
out.write("</a>");
136-
} else {
137-
out.write(id);
138-
}
125+
out.write("<a href=\"");
126+
out.write(urlPrefix);
127+
out.write("refs=");
128+
out.write(id);
129+
appendProject();
130+
out.write("\">");
131+
out.write(id);
132+
out.write("</a>");
139133
}
140134

141135
/* This rule matches associative arrays inside strings,
142136
for instance "${array["string"]}". Push a new STRING
143137
state on the stack to prevent premature exit from the
144138
STRING state. */
145-
\$\{ {Identifier} \[\" {
139+
\$\{ {Identifier} \[\" {
146140
out.write(htmlize(yytext()));
147141
pushSpan(STRING, HtmlConsts.STRING_CLASS);
148142
}
149143
}
150144

151145
<YYINITIAL, SUBSHELL, BACKQUOTE, BRACEGROUP> {
152-
\$ ? {Identifier} {
146+
\$ ? {Identifier} {
153147
String id = yytext();
154-
// For historical reasons, ShXref will not link identifiers of length=1
155-
int minlength = 1;
156-
if (id.startsWith("$")) ++minlength;
157-
if (id.length() > minlength) {
158-
writeSymbol(id, Consts.shkwd, yyline);
159-
} else {
160-
out.write(id);
161-
}
162-
}
148+
writeSymbol(id, Consts.shkwd, yyline);
149+
}
163150

164151
{Number} {
165152
String lastClassName = getDisjointSpanClassName();
@@ -168,11 +155,11 @@ File = {FNameChar}+ "." ([a-zA-Z]+)
168155
disjointSpan(lastClassName);
169156
}
170157

171-
\$ ? \" {
158+
\$ ? \" {
172159
pushSpan(STRING, HtmlConsts.STRING_CLASS);
173160
out.write(htmlize(yytext()));
174161
}
175-
\$ ? \' {
162+
\$ ? \' {
176163
pushSpan(QSTRING, HtmlConsts.STRING_CLASS);
177164
out.write(htmlize(yytext()));
178165
}
@@ -182,7 +169,7 @@ File = {FNameChar}+ "." ([a-zA-Z]+)
182169
}
183170

184171
// Recognize here-documents. At least a subset of them.
185-
"<<" "-"? {WhspChar}* {Identifier} {WhspChar}* {
172+
"<<" "-"? {WhspChar}* {Identifier} {WhspChar}* {
186173
String text = yytext();
187174
out.write(htmlize(text));
188175

@@ -193,21 +180,29 @@ File = {FNameChar}+ "." ([a-zA-Z]+)
193180

194181
// Any sequence of more than two < characters should not start HEREDOC. Use
195182
// this rule to catch them before the HEREDOC rule.
196-
"<<" "<" + {
183+
"<<" "<" + {
197184
out.write(htmlize(yytext()));
198185
}
199186

200-
{Unary_op} |
187+
{Unary_op_req_lookahead} / \W {
188+
out.write(yytext());
189+
}
190+
{Unary_op_req_lookahead} $ {
191+
out.write(yytext());
192+
}
193+
{WhiteSpace} {Unary_op_char} / ")" {
194+
out.write(yytext());
195+
}
201196
{Binary_op} {
202197
out.write(yytext());
203198
}
204199
}
205200

206201
<STRING> {
207202
\\[\"\$\`\\] |
208-
\" {WhspChar}* \" { out.write(htmlize(yytext())); }
203+
\" {WhspChar}* \" { out.write(htmlize(yytext())); }
209204
\" { out.write(htmlize(yytext())); yypop(); }
210-
\$\( {
205+
\$\( {
211206
pushSpan(SUBSHELL, null);
212207
out.write(yytext());
213208
}
@@ -220,16 +215,16 @@ File = {FNameChar}+ "." ([a-zA-Z]+)
220215
* to ksh man page http://www2.research.att.com/~gsf/man/man1/ksh-man.html#Command%20Substitution
221216
* the opening brace must be followed by a blank.
222217
*/
223-
"${" / {WhspChar} | {EOL} {
218+
"${" / {WhspChar} | {EOL} {
224219
pushSpan(BRACEGROUP, null);
225220
out.write(yytext());
226221
}
227222
}
228223

229224
<QSTRING> {
230225
\\[\'] |
231-
\' {WhspChar}* \' { out.write(htmlize(yytext())); }
232-
\' { out.write(htmlize(yytext())); yypop(); }
226+
\' {WhspChar}* \' { out.write(htmlize(yytext())); }
227+
\' { out.write(htmlize(yytext())); yypop(); }
233228
}
234229

235230
<SCOMMENT> {
@@ -244,7 +239,7 @@ File = {FNameChar}+ "." ([a-zA-Z]+)
244239
}
245240

246241
<BACKQUOTE> {
247-
` { out.write(yytext()); yypop(); }
242+
[`] { out.write(yytext()); yypop(); }
248243
}
249244

250245
<BRACEGROUP> {
@@ -253,45 +248,45 @@ File = {FNameChar}+ "." ([a-zA-Z]+)
253248
* the closing brace must be on beginning of line, or it must be preceded by
254249
* a semi-colon and (optionally) whitespace.
255250
*/
256-
^ {WhspChar}* \} { out.write(yytext()); yypop(); }
257-
; {WhspChar}* \} { out.write(yytext()); yypop(); }
251+
^ {WhspChar}* \} { out.write(yytext()); yypop(); }
252+
; {WhspChar}* \} { out.write(yytext()); yypop(); }
258253
}
259254

260255
<HEREDOC> {
261-
[^\n]+ {
256+
[^\n]+ {
262257
String line = yytext();
263258
if (isHeredocStopWord(line)) {
264259
yypop();
265260
}
266261
out.write(htmlize(line));
267262
}
268263

269-
{EOL} { startNewLine(); }
264+
{EOL} { startNewLine(); }
270265
}
271266

272267
<YYINITIAL, SUBSHELL, BACKQUOTE, BRACEGROUP> {
273268
/* Don't enter new state if special character is escaped. */
274269
\\[`\)\(\{\"\'\$\#\\] { out.write(htmlize(yytext())); }
275270

276271
/* $# should not start a comment. */
277-
"$#" { out.write(yytext()); }
272+
"$#" { out.write(yytext()); }
278273

279-
\$ ? \( { pushSpan(SUBSHELL, null); out.write(yytext()); }
280-
` { pushSpan(BACKQUOTE, null); out.write(yytext()); }
274+
\$ ? \( { pushSpan(SUBSHELL, null); out.write(yytext()); }
275+
[`] { pushSpan(BACKQUOTE, null); out.write(yytext()); }
281276

282277
/* Bug #15661: Recognize ksh command substitution within strings. According
283278
* to ksh man page http://www2.research.att.com/~gsf/man/man1/ksh-man.html#Command%20Substitution
284279
* the opening brace must be followed by a blank. Make the initial dollar sign
285280
* optional so that we get the nesting right and don't terminate the brace
286281
* group too early if the ${ cmd; } expression contains nested { cmd; } groups.
287282
*/
288-
\$ ? \{ / {WhspChar} | {EOL} {
283+
\$ ? \{ / {WhspChar} | {EOL} {
289284
pushSpan(BRACEGROUP, null); out.write(yytext());
290285
}
291286
}
292287

293288
<YYINITIAL, SUBSHELL, BACKQUOTE, BRACEGROUP, STRING, SCOMMENT, QSTRING> {
294-
{File} {
289+
{File} {
295290
String path = yytext();
296291
out.write("<a href=\""+urlPrefix+"path=");
297292
out.write(path);
@@ -301,19 +296,18 @@ File = {FNameChar}+ "." ([a-zA-Z]+)
301296
out.write("</a>");
302297
}
303298

304-
{RelaxedMiddleFPath}
305-
{ out.write(Util.breadcrumbPath(urlPrefix+"path=",yytext(),'/'));}
299+
{RelaxedMiddleFPath} {
300+
out.write(Util.breadcrumbPath(urlPrefix + "path=", yytext(), '/')); }
306301

307302
[&<>\'\"] { out.write(htmlize(yytext())); }
308303
{WhspChar}*{EOL} { startNewLine(); }
309-
{WhiteSpace} { out.write(yytext()); }
310-
[!-~] { out.write(yycharat(0)); }
311-
[^\n] { writeUnicodeChar(yycharat(0)); }
304+
{WhiteSpace} { out.write(yytext()); }
305+
[!-~] { out.write(yycharat(0)); }
306+
[^\n] { writeUnicodeChar(yycharat(0)); }
312307
}
313308

314309
<STRING, SCOMMENT, QSTRING> {
315-
{FNameChar}+ "@" {FNameChar}+ "." {FNameChar}+
316-
{
310+
{FNameChar}+ "@" {FNameChar}+ "." {FNameChar}+ {
317311
writeEMailAddress(yytext());
318312
}
319313
}

test/org/opensolaris/opengrok/analysis/sh/sample_xref.html

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -114,10 +114,10 @@
114114
<a class="l" name="106" href="#106">106</a><b>while</b> <b>getopts</b> <a href="/source/s?defs=cfm" class="intelliWindow-symbol" data-definition-place="undefined-in-file">cfm</a>:<a href="/source/s?defs=px" class="intelliWindow-symbol" data-definition-place="undefined-in-file">px</a>: <a href="/source/s?defs=flag" class="intelliWindow-symbol" data-definition-place="undefined-in-file">flag</a>; <b>do</b>
115115
<a class="l" name="107" href="#107">107</a> <b>case</b> <a href="/source/s?defs=$flag" class="intelliWindow-symbol" data-definition-place="undefined-in-file">$flag</a> <b>in</b>
116116
<a class="l" name="108" href="#108">108</a> c)
117-
<a class="l" name="109" href="#109">109</a> <a href="/source/s?defs=clobber" class="intelliWindow-symbol" data-definition-place="undefined-in-file">clobber</a>=y
117+
<a class="l" name="109" href="#109">109</a> <a href="/source/s?defs=clobber" class="intelliWindow-symbol" data-definition-place="undefined-in-file">clobber</a>=<a href="/source/s?defs=y" class="intelliWindow-symbol" data-definition-place="undefined-in-file">y</a>
118118
<a class="hl" name="110" href="#110">110</a> ;;
119119
<a class="l" name="111" href="#111">111</a> f)
120-
<a class="l" name="112" href="#112">112</a> <a href="/source/s?defs=noflg" class="intelliWindow-symbol" data-definition-place="undefined-in-file">noflg</a>=y
120+
<a class="l" name="112" href="#112">112</a> <a href="/source/s?defs=noflg" class="intelliWindow-symbol" data-definition-place="undefined-in-file">noflg</a>=<a href="/source/s?defs=y" class="intelliWindow-symbol" data-definition-place="undefined-in-file">y</a>
121121
<a class="l" name="113" href="#113">113</a> ;;
122122
<a class="l" name="114" href="#114">114</a> m)
123123
<a class="l" name="115" href="#115">115</a> <a href="/source/s?defs=XRMAKEFILE" class="intelliWindow-symbol" data-definition-place="undefined-in-file">XRMAKEFILE</a>=<a href="/source/s?defs=$OPTARG" class="intelliWindow-symbol" data-definition-place="undefined-in-file">$OPTARG</a>

test/org/opensolaris/opengrok/analysis/sh/samplesymbols.txt

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,6 @@ src
5050
SRC
5151
MACH
5252
uname
53-
p
5453
MACH
5554
XREFMK
5655
fail
@@ -61,21 +60,16 @@ cfm # 106:while getopts cfm:px: flag; do
6160
px
6261
flag
6362
flag
64-
c
6563
clobber
6664
y
67-
f
6865
noflg
6966
y
70-
m
7167
XRMAKEFILE
7268
OPTARG
73-
p
7469
ENVCPPFLAGS1
7570
ENVCPPFLAGS2
7671
ENVCPPFLAGS3
7772
ENVCPPFLAGS4
78-
x
7973
xrefs
8074
OPTARG
8175
PROG

0 commit comments

Comments
 (0)