Skip to content

Commit bd3f549

Browse files
authored
Merge pull request #1943 from idodeclare/feature/tcl_tests
Feature/tcl tests
2 parents 822cda1 + e18407f commit bd3f549

File tree

13 files changed

+1452
-45
lines changed

13 files changed

+1452
-45
lines changed

opengrok-indexer/pom.xml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,13 @@ Portions Copyright (c) 2017, Chris Fraire <[email protected]>.
200200
<exclude>*.java</exclude>
201201
</excludes>
202202
</testResource>
203+
<testResource>
204+
<targetPath>org/opensolaris/opengrok/analysis/tcl/</targetPath>
205+
<directory>../test/org/opensolaris/opengrok/analysis/tcl/</directory>
206+
<excludes>
207+
<exclude>*.java</exclude>
208+
</excludes>
209+
</testResource>
203210
<testResource>
204211
<targetPath>org/opensolaris/opengrok/analysis/haskell/</targetPath>
205212
<directory>../test/org/opensolaris/opengrok/analysis/haskell/</directory>
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
/*
2+
* CDDL HEADER START
3+
*
4+
* The contents of this file are subject to the terms of the
5+
* Common Development and Distribution License (the "License").
6+
* You may not use this file except in compliance with the License.
7+
*
8+
* See LICENSE.txt included in this distribution for the specific
9+
* language governing permissions and limitations under the License.
10+
*
11+
* When distributing Covered Code, include this CDDL HEADER in each
12+
* file and include the License file at LICENSE.txt.
13+
* If applicable, add the following below this CDDL HEADER, with the
14+
* fields enclosed by brackets "[]" replaced with your own identifying
15+
* information: Portions Copyright [yyyy] [name of copyright owner]
16+
*
17+
* CDDL HEADER END
18+
*/
19+
20+
/*
21+
* Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
22+
* Portions Copyright (c) 2017, Chris Fraire <[email protected]>.
23+
*
24+
* Copyright © 1993 The Regents of the University of California.
25+
* Copyright © 1994-1996 Sun Microsystems, Inc.
26+
* Copyright © 1995-1997 Roger E. Critchlow Jr.
27+
*/
28+
29+
Number = ([0-9]+\.[0-9]+|[0-9][0-9]*|"#" [boxBOX] [0-9a-fA-F]+)
30+
31+
/*
32+
* [1] Commands. ... Semi-colons and newlines are command separators unless
33+
* quoted as described below.
34+
*
35+
* [3] Words. Words of a command are separated by white space (except for
36+
* newlines, which are command separators).
37+
* [4] Double quotes. If the first character of a word is double-quote (``"'')
38+
* then the word is terminated by the next double-quote character.
39+
* [5] Braces. If the first character of a word is an open brace (``{'') then
40+
* the word is terminated by the matching close brace (``}'').
41+
* N.b. OpenGrok handles [4] and [5] as special matches distinct from {Word}.
42+
*
43+
* [9] Comments. If a hash character (``#'') appears at a point where Tcl is
44+
* expecting the first character of the first word of a command, then the hash
45+
* character and the characters that follow it, up through the next newline,
46+
* are treated as a comment and ignored. The comment character only has
47+
* significance when it appears at the beginning of a command.
48+
*
49+
* N.b. this "OrdinaryWord" is for OpenGrok's purpose of symbol tokenization
50+
* and deviates from the above definitions by treating backslash escapes as
51+
* word breaking and precluding some characters from starting words and mostly
52+
* the same from continuing words. E.g., hyphen is not allowed by OpenGrok to
53+
* start OrdinaryWord but can be present afterward.
54+
*/
55+
OrdinaryWord = [\S--\-,=#\"\}\{\]\[\)\(\\] [\S--#\"\}\{\]\[\)\(\\]*
56+
57+
/*
58+
* [7] Variable substitution.
59+
*
60+
* $name
61+
* Name is the name of a scalar variable; the name is a sequence of one or
62+
* more characters that are a letter, digit, underscore, or namespace
63+
* separators (two or more colons).
64+
*/
65+
Varsub1 = \$ {name_unit}+
66+
name_unit = ([\p{Letter}\p{Digit}_] | [:][:]+)
67+
/*
68+
* $name(index)
69+
* Name gives the name of an array variable and index gives the name of an
70+
* element within that array. Name must contain only letters, digits,
71+
* underscores, and namespace separators, and may be an empty string.
72+
*/
73+
Varsub2 = \$ {name_unit}* \( {name_unit}+ \)
74+
/*
75+
* ${name}
76+
* Name is the name of a scalar variable. It may contain any characters
77+
* whatsoever except for close braces.
78+
*/
79+
Varsub3 = \$\{ [^\}]+ \}
80+
81+
/*
82+
* [8] Backslash substitution.
83+
* Backslash plus a character, where ... in all cases but [for the characters]
84+
* described below, the backslash is dropped and the following character is
85+
* treated as an ordinary character and included in the word.
86+
*
87+
* Special cases:
88+
* a,f,b,n,r,t,v,backslash;
89+
* \<newline>whiteSpace;
90+
* \ooo The digits ooo (one, two, or three of them);
91+
* \xhh The hexadecimal digits hh .... Any number of hexadecimal digits may be
92+
* present;
93+
* \uhhhh The hexadecimal digits hhhh (one, two, three, or four of them)
94+
*
95+
* "Backslash substitution is not performed on words enclosed in braces, except
96+
* for backslash-newline as described above."
97+
*/
98+
Backslash_sub = [\\] ([afbnrtv\\] | \p{Number}{1,3} | [x][0-9a-fA-F]+ |
99+
[u][0-9a-fA-F]{1,4} | [[^]--[afbnrtv\n\p{Number}xu\\]])
100+
Backslash_nl = [\\] \n\s+
101+
102+
WordOperators = ("*" | "&&" | "||")

src/org/opensolaris/opengrok/analysis/tcl/TclSymbolTokenizer.lex

Lines changed: 101 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,9 @@
2727
*/
2828

2929
package org.opensolaris.opengrok.analysis.tcl;
30+
3031
import java.io.IOException;
31-
import java.io.Reader;
3232
import org.opensolaris.opengrok.analysis.JFlexTokenizer;
33-
3433
%%
3534
%public
3635
%class TclSymbolTokenizer
@@ -42,32 +41,119 @@ super(in);
4241
%int
4342
%include CommonTokenizer.lexh
4443
%char
44+
%{
45+
private int braceCount;
4546

46-
Identifier = [\:\=a-zA-Z0-9_]+
47+
@Override
48+
public void reset() throws IOException {
49+
super.reset();
50+
braceCount = 0;
51+
}
52+
%}
4753

48-
%state STRING COMMENT SCOMMENT
54+
%state STRING COMMENT SCOMMENT BRACES VARSUB2
4955

56+
%include Common.lexh
57+
%include Tcl.lexh
5058
%%
5159

5260
<YYINITIAL> {
53-
{Identifier} {String id = yytext();
54-
if (!Consts.kwd.contains(id)) {
55-
setAttribs(id, yychar, yychar + yylength());
56-
return yystate(); }
57-
}
58-
\" { yybegin(STRING); }
59-
"#" { yybegin(SCOMMENT); }
61+
62+
[\{] {
63+
++braceCount;
64+
yypush(BRACES);
65+
}
66+
}
67+
68+
<YYINITIAL, BRACES> {
69+
{Number} {
70+
// noop
71+
}
72+
\" { yypush(STRING); }
73+
"#" { yypush(SCOMMENT); }
74+
{WordOperators} {
75+
// noop
76+
}
77+
}
78+
79+
<YYINITIAL, STRING, BRACES> {
80+
{Backslash_sub} {
81+
// noop
82+
}
83+
{Backslash_nl} {
84+
// noop
85+
}
86+
{Varsub1} {
87+
String capture = yytext();
88+
String sigil = capture.substring(0, 1);
89+
String name = capture.substring(1);
90+
if (!Consts.kwd.contains(name)) {
91+
setAttribs(name, yychar + 1, yychar + yylength());
92+
return yystate();
93+
}
94+
}
95+
{Varsub2} {
96+
yypush(VARSUB2);
97+
String capture = yytext();
98+
int lparen_i = capture.indexOf("(");
99+
String name1 = capture.substring(1, lparen_i);
100+
yypushback(capture.length() - lparen_i - 1);
101+
if (name1.length() > 0 && !Consts.kwd.contains(name1)) {
102+
setAttribs(name1, yychar + 1, yychar + lparen_i);
103+
return yystate();
104+
}
105+
}
106+
{Varsub3} {
107+
String capture = yytext();
108+
String name = capture.substring(2, capture.length() - 1);
109+
if (!Consts.kwd.contains(name)) {
110+
setAttribs(name, yychar + 2, yychar + yylength() - 1);
111+
return yystate();
112+
}
113+
}
114+
}
115+
116+
<VARSUB2> {
117+
{name_unit}+ {
118+
String name2 = yytext();
119+
yypop();
120+
if (!Consts.kwd.contains(name2)) {
121+
setAttribs(name2, yychar, yychar + yylength());
122+
return yystate();
123+
}
124+
}
125+
}
126+
127+
<YYINITIAL, BRACES> {
128+
{OrdinaryWord} {
129+
String id = yytext();
130+
if (!Consts.kwd.contains(id)) {
131+
setAttribs(id, yychar, yychar + yylength());
132+
return yystate();
133+
}
134+
}
60135
}
61136

62137
<STRING> {
63-
\" { yybegin(YYINITIAL); }
64-
\\\\ | \\\" {}
138+
\" { yypop(); }
139+
}
140+
141+
<BRACES> {
142+
[\}] {
143+
if (--braceCount == 0) {
144+
yypop();
145+
}
146+
}
147+
[\{] {
148+
++braceCount;
149+
}
65150
}
66151

67152
<SCOMMENT> {
68-
\n { yybegin(YYINITIAL);}
153+
{EOL} { yypop(); }
69154
}
70155

71-
<YYINITIAL, STRING, COMMENT, SCOMMENT> {
156+
<YYINITIAL, STRING, COMMENT, SCOMMENT, BRACES> {
157+
{WhiteSpace} |
72158
[^] {}
73159
}
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
/*
2+
* CDDL HEADER START
3+
*
4+
* The contents of this file are subject to the terms of the
5+
* Common Development and Distribution License (the "License").
6+
* You may not use this file except in compliance with the License.
7+
*
8+
* See LICENSE.txt included in this distribution for the specific
9+
* language governing permissions and limitations under the License.
10+
*
11+
* When distributing Covered Code, include this CDDL HEADER in each
12+
* file and include the License file at LICENSE.txt.
13+
* If applicable, add the following below this CDDL HEADER, with the
14+
* fields enclosed by brackets "[]" replaced with your own identifying
15+
* information: Portions Copyright [yyyy] [name of copyright owner]
16+
*
17+
* CDDL HEADER END
18+
*/
19+
20+
/*
21+
* Copyright (c) 2017, [email protected].
22+
*/
23+
24+
package org.opensolaris.opengrok.analysis.tcl;
25+
26+
import java.io.IOException;
27+
import org.opensolaris.opengrok.analysis.JFlexXref;
28+
29+
/**
30+
* Represents a container for Tcl-related utility methods.
31+
*/
32+
public class TclUtils {
33+
/**
34+
* Write {@code whsp} to the {@code xref} output -- if the whitespace does
35+
* not contain any LFs then the full String is written; otherwise, pre-LF
36+
* spaces are condensed as usual.
37+
* @param xref the target instance
38+
* @param whsp a defined whitespace capture
39+
* @throws java.io.IOException if an output error occurs
40+
*/
41+
public static void writeWhitespace(JFlexXref xref, String whsp)
42+
throws IOException {
43+
int i;
44+
if ((i = whsp.indexOf("\n")) == -1) {
45+
xref.out.write(whsp);
46+
} else {
47+
int numlf = 1, off = i + 1;
48+
while ((i = whsp.indexOf("\n", off)) != -1) {
49+
++numlf;
50+
off = i + 1;
51+
}
52+
while (numlf-- > 0) xref.startNewLine();
53+
if (off < whsp.length()) xref.out.write(whsp.substring(off));
54+
}
55+
}
56+
57+
/** private to enforce static */
58+
private TclUtils() {
59+
}
60+
}

0 commit comments

Comments
 (0)