Skip to content

Commit 029eb5b

Browse files
committed
Add ShSymbolTokenizerTest, and fix bugs
- "in" should be a keyword. - ShSymbolTokenizerTest should recognize and ignore {Number}. - Unary and binary shell ops (e.g., -f or -eq) should not produce tokens. - ShSymbolTokenizer should recognize escape- apostrophe as does Xref. - Escape all HTML special characters in ShXref.
1 parent e506a23 commit 029eb5b

File tree

9 files changed

+434
-89
lines changed

9 files changed

+434
-89
lines changed

src/org/opensolaris/opengrok/analysis/sh/Consts.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
/*
2121
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
2222
* Use is subject to license terms.
23+
* Portions Copyright (c) 2017, Chris Fraire <[email protected]>.
2324
*/
2425

2526
package org.opensolaris.opengrok.analysis.sh;
@@ -144,5 +145,6 @@ public class Consts{
144145
shkwd.add( "void" );
145146
shkwd.add( "unsigned" );
146147
shkwd.add( "nameref" );
148+
shkwd.add( "in" );
147149
}
148150
}
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
/*
2+
* CDDL HEADER START
3+
*
4+
* The contents of this file are subject to the terms of the
5+
* Common Development and Distribution License (the "License").
6+
* You may not use this file except in compliance with the License.
7+
*
8+
* See LICENSE.txt included in this distribution for the specific
9+
* language governing permissions and limitations under the License.
10+
*
11+
* When distributing Covered Code, include this CDDL HEADER in each
12+
* file and include the License file at LICENSE.txt.
13+
* If applicable, add the following below this CDDL HEADER, with the
14+
* fields enclosed by brackets "[]" replaced with your own identifying
15+
* information: Portions Copyright [yyyy] [name of copyright owner]
16+
*
17+
* CDDL HEADER END
18+
*/
19+
20+
/*
21+
* Copyright (c) 2005, 2017, Oracle and/or its affiliates. All rights reserved.
22+
* Portions Copyright (c) 2017, Chris Fraire <[email protected]>.
23+
*/
24+
25+
Identifier = [a-zA-Z_] [a-zA-Z0-9_]*
26+
27+
Number = \$? [0-9]+\.[0-9]+| [0-9][0-9]* | [0][xX] [0-9a-fA-F]+
28+
29+
/*
30+
* Rather than enumerate letters, just treat all hyphen-single-char as a
31+
* unary op.
32+
*/
33+
Unary_op = [\-][A-Za-z]{WhspChar}
34+
35+
Binary_op = [\-]("ef"|"nt"|"ot"|"eq"|"ge"|"gt"|"le"|"lt"|"ne"){WhspChar}

src/org/opensolaris/opengrok/analysis/sh/ShSymbolTokenizer.lex

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,10 @@ super(in);
3737
%include CommonTokenizer.lexh
3838
%char
3939

40-
Identifier = [a-zA-Z_] [a-zA-Z0-9_]*
41-
4240
%state STRING COMMENT SCOMMENT QSTRING
4341

42+
%include Common.lexh
43+
%include Sh.lexh
4444
%%
4545

4646
<YYINITIAL> {
@@ -49,16 +49,20 @@ Identifier = [a-zA-Z_] [a-zA-Z0-9_]*
4949
setAttribs(id, yychar, yychar + yylength());
5050
return yystate(); }
5151
}
52+
{Number} {}
5253
\" { yybegin(STRING); }
5354
\' { yybegin(QSTRING); }
5455
"#" { yybegin(SCOMMENT); }
56+
57+
{Unary_op} |
58+
{Binary_op} {}
5559
}
5660

5761
<STRING> {
5862
"$" {Identifier} {
5963
setAttribs(yytext().substring(1), yychar + 1, yychar + yylength());
6064
return yystate();
61-
}
65+
}
6266

6367
"${" {Identifier} "}" {
6468
int startOffset = 2; // trim away the "${" prefix
@@ -67,18 +71,20 @@ Identifier = [a-zA-Z_] [a-zA-Z0-9_]*
6771
yychar + startOffset,
6872
yychar + endOffset);
6973
return yystate();
70-
}
74+
}
7175

7276
\" { yybegin(YYINITIAL); }
73-
\\\\ | \\\" {}
77+
\\[\"\$\`\\] {}
7478
}
7579

7680
<QSTRING> {
81+
\\[\'] {}
7782
\' { yybegin(YYINITIAL); }
7883
}
7984

8085
<SCOMMENT> {
81-
\n { yybegin(YYINITIAL);}
86+
{WhiteSpace} {}
87+
{EOL} { yybegin(YYINITIAL);}
8288
}
8389

8490
<YYINITIAL, STRING, SCOMMENT, QSTRING> {

src/org/opensolaris/opengrok/analysis/sh/ShXref.lex

Lines changed: 45 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -95,9 +95,6 @@ import org.opensolaris.opengrok.web.Util;
9595

9696
%}
9797

98-
Identifier = [a-zA-Z_] [a-zA-Z0-9_]+
99-
Number = \$? [0-9]+\.[0-9]+| [0-9][0-9]* | [0][xX] [0-9a-fA-F]+
100-
10198
File = {FNameChar}+ "." ([a-zA-Z]+)
10299

103100
/*
@@ -119,34 +116,48 @@ File = {FNameChar}+ "." ([a-zA-Z]+)
119116
%include CommonURI.lexh
120117
%include CommonPath.lexh
121118
%include CommonLaxFPath.lexh
119+
%include Sh.lexh
122120
%%
123121
<STRING>{
124122
"$" {Identifier} {
125123
String id = yytext();
126-
out.write("<a href=\"");
127-
out.write(urlPrefix);
128-
out.write("refs=");
129-
out.write(id);
130-
appendProject();
131-
out.write("\">");
132-
out.write(id);
133-
out.write("</a>");
124+
// For historical reasons, ShXref will not link identifiers of length=1
125+
// (or of length=2 with a leading '$')
126+
if (id.length() > 2) {
127+
out.write("<a href=\"");
128+
out.write(urlPrefix);
129+
out.write("refs=");
130+
out.write(id);
131+
appendProject();
132+
out.write("\">");
133+
out.write(id);
134+
out.write("</a>");
135+
} else {
136+
out.write(id);
137+
}
134138
}
135139

136140
/* This rule matches associative arrays inside strings,
137141
for instance "${array["string"]}". Push a new STRING
138142
state on the stack to prevent premature exit from the
139143
STRING state. */
140144
\$\{ {Identifier} \[\" {
141-
out.write(yytext());
145+
out.write(htmlize(yytext()));
142146
pushSpan(STRING, HtmlConsts.STRING_CLASS);
143147
}
144148
}
145149

146150
<YYINITIAL, SUBSHELL, BACKQUOTE, BRACEGROUP> {
147151
\$ ? {Identifier} {
148152
String id = yytext();
149-
writeSymbol(id, Consts.shkwd, yyline);
153+
// For historical reasons, ShXref will not link identifiers of length=1
154+
int minlength = 1;
155+
if (id.startsWith("$")) ++minlength;
156+
if (id.length() > minlength) {
157+
writeSymbol(id, Consts.shkwd, yyline);
158+
} else {
159+
out.write(id);
160+
}
150161
}
151162

152163
{Number} {
@@ -158,11 +169,11 @@ File = {FNameChar}+ "." ([a-zA-Z]+)
158169

159170
\$ ? \" {
160171
pushSpan(STRING, HtmlConsts.STRING_CLASS);
161-
out.write(yytext());
172+
out.write(htmlize(yytext()));
162173
}
163174
\$ ? \' {
164175
pushSpan(QSTRING, HtmlConsts.STRING_CLASS);
165-
out.write(yytext());
176+
out.write(htmlize(yytext()));
166177
}
167178
"#" {
168179
pushSpan(SCOMMENT, HtmlConsts.COMMENT_CLASS);
@@ -172,7 +183,7 @@ File = {FNameChar}+ "." ([a-zA-Z]+)
172183
// Recognize here-documents. At least a subset of them.
173184
"<<" "-"? {WhspChar}* {Identifier} {WhspChar}* {
174185
String text = yytext();
175-
out.write(Util.htmlize(text));
186+
out.write(htmlize(text));
176187

177188
heredocStripLeadingTabs = (text.charAt(2) == '-');
178189
heredocStopWord = text.substring(heredocStripLeadingTabs ? 3 : 2).trim();
@@ -182,20 +193,24 @@ File = {FNameChar}+ "." ([a-zA-Z]+)
182193
// Any sequence of more than two < characters should not start HEREDOC. Use
183194
// this rule to catch them before the HEREDOC rule.
184195
"<<" "<" + {
185-
out.write(Util.htmlize(yytext()));
196+
out.write(htmlize(yytext()));
186197
}
187198

199+
{Unary_op} |
200+
{Binary_op} {
201+
out.write(yytext());
202+
}
188203
}
189204

190205
<STRING> {
191-
\" {WhspChar}* \" { out.write(yytext()); }
192-
\" { out.write(yytext()); yypop(); }
193-
\\\\ | \\\" | \\\$ | \\` { out.write(yytext()); }
206+
\\[\"\$\`\\] |
207+
\" {WhspChar}* \" { out.write(htmlize(yytext())); }
208+
\" { out.write(htmlize(yytext())); yypop(); }
194209
\$\( {
195210
pushSpan(SUBSHELL, null);
196211
out.write(yytext());
197212
}
198-
` {
213+
[`] {
199214
pushSpan(BACKQUOTE, null);
200215
out.write(yytext());
201216
}
@@ -211,13 +226,13 @@ File = {FNameChar}+ "." ([a-zA-Z]+)
211226
}
212227

213228
<QSTRING> {
214-
\' {WhspChar}* \' { out.write(yytext()); }
215-
\\' { out.write("\\'"); }
216-
\' { out.write(yytext()); yypop(); }
229+
\\[\'] |
230+
\' {WhspChar}* \' { out.write(htmlize(yytext())); }
231+
\' { out.write(htmlize(yytext())); yypop(); }
217232
}
218233

219234
<SCOMMENT> {
220-
{EOL} {
235+
{WhspChar}*{EOL} {
221236
yypop();
222237
startNewLine();
223238
}
@@ -247,16 +262,15 @@ File = {FNameChar}+ "." ([a-zA-Z]+)
247262
if (isHeredocStopWord(line)) {
248263
yypop();
249264
}
250-
out.write(Util.htmlize(line));
265+
out.write(htmlize(line));
251266
}
252267

253268
{EOL} { startNewLine(); }
254269
}
255270

256271
<YYINITIAL, SUBSHELL, BACKQUOTE, BRACEGROUP> {
257272
/* Don't enter new state if special character is escaped. */
258-
\\` | \\\( | \\\) | \\\\ | \\\{ { out.write(yytext()); }
259-
\\\" | \\' | \\\$ | \\\# { out.write(yytext()); }
273+
\\[`\)\(\{\"\'\$\#\\] { out.write(htmlize(yytext())); }
260274

261275
/* $# should not start a comment. */
262276
"$#" { out.write(yytext()); }
@@ -288,11 +302,9 @@ File = {FNameChar}+ "." ([a-zA-Z]+)
288302

289303
{RelaxedMiddleFPath}
290304
{ out.write(Util.breadcrumbPath(urlPrefix+"path=",yytext(),'/'));}
291-
"&" {out.write( "&amp;");}
292-
"<" {out.write( "&lt;");}
293-
">" {out.write( "&gt;");}
294-
{WhiteSpace}{EOL} |
295-
{EOL} { startNewLine(); }
305+
306+
[&<>\'\"] { out.write(htmlize(yytext())); }
307+
{WhspChar}*{EOL} { startNewLine(); }
296308
{WhiteSpace} { out.write(yytext()); }
297309
[!-~] { out.write(yycharat(0)); }
298310
[^\n] { writeUnicodeChar(yycharat(0)); }
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
/*
2+
* CDDL HEADER START
3+
*
4+
* The contents of this file are subject to the terms of the
5+
* Common Development and Distribution License (the "License").
6+
* You may not use this file except in compliance with the License.
7+
*
8+
* See LICENSE.txt included in this distribution for the specific
9+
* language governing permissions and limitations under the License.
10+
*
11+
* When distributing Covered Code, include this CDDL HEADER in each
12+
* file and include the License file at LICENSE.txt.
13+
* If applicable, add the following below this CDDL HEADER, with the
14+
* fields enclosed by brackets "[]" replaced with your own identifying
15+
* information: Portions Copyright [yyyy] [name of copyright owner]
16+
*
17+
* CDDL HEADER END
18+
*/
19+
20+
/*
21+
* Copyright (c) 2010, 2015, Oracle and/or its affiliates. All rights reserved.
22+
* Portions Copyright (c) 2017, Chris Fraire <[email protected]>.
23+
*/
24+
25+
package org.opensolaris.opengrok.analysis.sh;
26+
27+
import java.io.BufferedReader;
28+
import java.io.InputStream;
29+
import java.io.InputStreamReader;
30+
import java.util.ArrayList;
31+
import java.util.List;
32+
import static org.junit.Assert.assertNotNull;
33+
import org.junit.Test;
34+
import static org.opensolaris.opengrok.util.CustomAssertions.assertSymbolStream;
35+
36+
/**
37+
* Tests the {@link ShSymbolTokenizer} class.
38+
*/
39+
public class ShSymbolTokenizerTest {
40+
41+
/**
42+
* Test sample.sh v. samplesymbols.txt
43+
* @throws java.lang.Exception thrown on error
44+
*/
45+
@Test
46+
public void testShSymbolStream() throws Exception {
47+
InputStream shres = getClass().getClassLoader().getResourceAsStream(
48+
"org/opensolaris/opengrok/analysis/sh/sample.sh");
49+
assertNotNull("despite sample.sh as resource,", shres);
50+
InputStream symres = getClass().getClassLoader().getResourceAsStream(
51+
"org/opensolaris/opengrok/analysis/sh/samplesymbols.txt");
52+
assertNotNull("despite samplesymbols.txt as resource,", symres);
53+
54+
List<String> expectedSymbols = new ArrayList<>();
55+
try (BufferedReader wdsr = new BufferedReader(new InputStreamReader(
56+
symres, "UTF-8"))) {
57+
String line;
58+
while ((line = wdsr.readLine()) != null) {
59+
int hasho = line.indexOf('#');
60+
if (hasho != -1) line = line.substring(0, hasho);
61+
expectedSymbols.add(line.trim());
62+
}
63+
}
64+
65+
assertSymbolStream(ShSymbolTokenizer.class, shres, expectedSymbols);
66+
}
67+
}

test/org/opensolaris/opengrok/analysis/sh/sample.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,3 +264,6 @@ for dir in $dirs; do
264264
cd - > /dev/null
265265
done
266266
exit 0
267+
268+
typeset -i vint
269+
vint=0xFFEF

0 commit comments

Comments
 (0)