Skip to content

Commit 33305de

Browse files
committed
Improve Tcl syntax support
1 parent d92b4a3 commit 33305de

File tree

7 files changed

+753
-51
lines changed

7 files changed

+753
-51
lines changed

src/org/opensolaris/opengrok/analysis/tcl/Tcl.lexh

Lines changed: 77 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,83 @@
2020
/*
2121
* Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
2222
* Portions Copyright (c) 2017, Chris Fraire <[email protected]>.
23+
*
24+
* Copyright © 1993 The Regents of the University of California.
25+
* Copyright © 1994-1996 Sun Microsystems, Inc.
26+
* Copyright © 1995-1997 Roger E. Critchlow Jr.
2327
*/
2428

25-
Identifier = [\:\=a-zA-Z0-9_]+
26-
2729
Number = ([0-9]+\.[0-9]+|[0-9][0-9]*|"#" [boxBOX] [0-9a-fA-F]+)
30+
31+
/*
32+
* [1] Commands. ... Semi-colons and newlines are command separators unless
33+
* quoted as described below.
34+
*
35+
* [3] Words. Words of a command are separated by white space (except for
36+
* newlines, which are command separators).
37+
* [4] Double quotes. If the first character of a word is double-quote (``"'')
38+
* then the word is terminated by the next double-quote character.
39+
* [5] Braces. If the first character of a word is an open brace (``{'') then
40+
* the word is terminated by the matching close brace (``}'').
41+
* N.b. OpenGrok handles [4] and [5] as special matches distinct from {Word}.
42+
*
43+
* [9] Comments. If a hash character (``#'') appears at a point where Tcl is
44+
* expecting the first character of the first word of a command, then the hash
45+
* character and the characters that follow it, up through the next newline,
46+
* are treated as a comment and ignored. The comment character only has
47+
* significance when it appears at the beginning of a command.
48+
*
49+
* N.b. this "OrdinaryWord" is for OpenGrok's purpose of symbol tokenization
50+
* and deviates from the above definitions by treating backslash escapes as
51+
* word breaking and precluding some characters from starting words and mostly
52+
* the same from continuing words. E.g., hyphen is not allowed by OpenGrok to
53+
* start OrdinaryWord but can be present afterward.
54+
*/
55+
OrdinaryWord = [\S--\-,=#\"\}\{\]\[\)\(\\] [\S--#\"\}\{\]\[\)\(\\]*
56+
57+
/*
58+
* [7] Variable substitution.
59+
*
60+
* $name
61+
* Name is the name of a scalar variable; the name is a sequence of one or
62+
* more characters that are a letter, digit, underscore, or namespace
63+
* separators (two or more colons).
64+
*/
65+
Varsub1 = \$ {name_unit}+
66+
name_unit = ([\p{Letter}\p{Digit}_] | [:][:]+)
67+
/*
68+
* $name(index)
69+
* Name gives the name of an array variable and index gives the name of an
70+
* element within that array. Name must contain only letters, digits,
71+
* underscores, and namespace separators, and may be an empty string.
72+
*/
73+
Varsub2 = \$ {name_unit}* \( {name_unit}+ \)
74+
/*
75+
* ${name}
76+
* Name is the name of a scalar variable. It may contain any characters
77+
* whatsoever except for close braces.
78+
*/
79+
Varsub3 = \$\{ [^\}]+ \}
80+
81+
/*
82+
* [8] Backslash substitution.
83+
* Backslash plus a character, where ... in all cases but [for the characters]
84+
* described below, the backslash is dropped and the following character is
85+
* treated as an ordinary character and included in the word.
86+
*
87+
* Special cases:
88+
* a,f,b,n,r,t,v,backslash;
89+
* \<newline>whiteSpace;
90+
* \ooo The digits ooo (one, two, or three of them);
91+
* \xhh The hexadecimal digits hh .... Any number of hexadecimal digits may be
92+
* present;
93+
* \uhhhh The hexadecimal digits hhhh (one, two, three, or four of them)
94+
*
95+
* "Backslash substitution is not performed on words enclosed in braces, except
96+
* for backslash-newline as described above."
97+
*/
98+
Backslash_sub = [\\] ([afbnrtv\\] | \p{Number}{1,3} | [x][0-9a-fA-F]+ |
99+
[u][0-9a-fA-F]{1,4} | [[^]--[afbnrtv\n\p{Number}xu\\]])
100+
Backslash_nl = [\\] \n\s+
101+
102+
WordOperators = ("*" | "&&" | "||")

src/org/opensolaris/opengrok/analysis/tcl/TclSymbolTokenizer.lex

Lines changed: 99 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828

2929
package org.opensolaris.opengrok.analysis.tcl;
3030

31+
import java.io.IOException;
3132
import org.opensolaris.opengrok.analysis.JFlexTokenizer;
3233
%%
3334
%public
@@ -40,36 +41,119 @@ super(in);
4041
%int
4142
%include CommonTokenizer.lexh
4243
%char
44+
%{
45+
private int braceCount;
4346

44-
%state STRING COMMENT SCOMMENT
47+
@Override
48+
public void reset() throws IOException {
49+
super.reset();
50+
braceCount = 0;
51+
}
52+
%}
53+
54+
%state STRING COMMENT SCOMMENT BRACES VARSUB2
4555

4656
%include Common.lexh
4757
%include Tcl.lexh
4858
%%
4959

5060
<YYINITIAL> {
51-
{Identifier} {String id = yytext();
52-
if (!Consts.kwd.contains(id)) {
53-
setAttribs(id, yychar, yychar + yylength());
54-
return yystate(); }
55-
}
56-
{Number} {}
57-
\" { yybegin(STRING); }
58-
"#" { yybegin(SCOMMENT); }
59-
60-
\\\" {}
61+
62+
[\{] {
63+
++braceCount;
64+
yypush(BRACES);
65+
}
66+
}
67+
68+
<YYINITIAL, BRACES> {
69+
{Number} {
70+
// noop
71+
}
72+
\" { yypush(STRING); }
73+
"#" { yypush(SCOMMENT); }
74+
{WordOperators} {
75+
// noop
76+
}
77+
}
78+
79+
<YYINITIAL, STRING, BRACES> {
80+
{Backslash_sub} {
81+
// noop
82+
}
83+
{Backslash_nl} {
84+
// noop
85+
}
86+
{Varsub1} {
87+
String capture = yytext();
88+
String sigil = capture.substring(0, 1);
89+
String name = capture.substring(1);
90+
if (!Consts.kwd.contains(name)) {
91+
setAttribs(name, yychar + 1, yychar + yylength());
92+
return yystate();
93+
}
94+
}
95+
{Varsub2} {
96+
yypush(VARSUB2);
97+
String capture = yytext();
98+
int lparen_i = capture.indexOf("(");
99+
String name1 = capture.substring(1, lparen_i);
100+
yypushback(capture.length() - lparen_i - 1);
101+
if (name1.length() > 0 && !Consts.kwd.contains(name1)) {
102+
setAttribs(name1, yychar + 1, yychar + lparen_i);
103+
return yystate();
104+
}
105+
}
106+
{Varsub3} {
107+
String capture = yytext();
108+
String name = capture.substring(2, capture.length() - 1);
109+
if (!Consts.kwd.contains(name)) {
110+
setAttribs(name, yychar + 2, yychar + yylength() - 1);
111+
return yystate();
112+
}
113+
}
114+
}
115+
116+
<VARSUB2> {
117+
{name_unit}+ {
118+
String name2 = yytext();
119+
yypop();
120+
if (!Consts.kwd.contains(name2)) {
121+
setAttribs(name2, yychar, yychar + yylength());
122+
return yystate();
123+
}
124+
}
125+
}
126+
127+
<YYINITIAL, BRACES> {
128+
{OrdinaryWord} {
129+
String id = yytext();
130+
if (!Consts.kwd.contains(id)) {
131+
setAttribs(id, yychar, yychar + yylength());
132+
return yystate();
133+
}
134+
}
61135
}
62136

63137
<STRING> {
64-
\\[\"\\] {}
65-
\" { yybegin(YYINITIAL); }
138+
\" { yypop(); }
139+
}
140+
141+
<BRACES> {
142+
[\}] {
143+
if (--braceCount == 0) {
144+
yypop();
145+
}
146+
}
147+
[\{] {
148+
++braceCount;
149+
}
66150
}
67151

68152
<SCOMMENT> {
69-
{EOL} { yybegin(YYINITIAL);}
153+
{EOL} { yypop(); }
70154
}
71155

72-
<YYINITIAL, STRING, COMMENT, SCOMMENT> {
156+
<YYINITIAL, STRING, COMMENT, SCOMMENT, BRACES> {
73157
{WhiteSpace} |
74158
[^] {}
75159
}
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
/*
2+
* CDDL HEADER START
3+
*
4+
* The contents of this file are subject to the terms of the
5+
* Common Development and Distribution License (the "License").
6+
* You may not use this file except in compliance with the License.
7+
*
8+
* See LICENSE.txt included in this distribution for the specific
9+
* language governing permissions and limitations under the License.
10+
*
11+
* When distributing Covered Code, include this CDDL HEADER in each
12+
* file and include the License file at LICENSE.txt.
13+
* If applicable, add the following below this CDDL HEADER, with the
14+
* fields enclosed by brackets "[]" replaced with your own identifying
15+
* information: Portions Copyright [yyyy] [name of copyright owner]
16+
*
17+
* CDDL HEADER END
18+
*/
19+
20+
/*
21+
* Copyright (c) 2017, [email protected].
22+
*/
23+
24+
package org.opensolaris.opengrok.analysis.tcl;
25+
26+
import java.io.IOException;
27+
import org.opensolaris.opengrok.analysis.JFlexXref;
28+
29+
/**
30+
* Represents a container for Tcl-related utility methods.
31+
*/
32+
public class TclUtils {
33+
/**
34+
* Write {@code whsp} to the {@code xref} output -- if the whitespace does
35+
* not contain any LFs then the full String is written; otherwise, pre-LF
36+
* spaces are condensed as usual.
37+
* @param xref the target instance
38+
* @param whsp a defined whitespace capture
39+
* @throws java.io.IOException if an output error occurs
40+
*/
41+
public static void writeWhitespace(JFlexXref xref, String whsp)
42+
throws IOException {
43+
int i;
44+
if ((i = whsp.indexOf("\n")) == -1) {
45+
xref.out.write(whsp);
46+
} else {
47+
int numlf = 1, off = i + 1;
48+
while ((i = whsp.indexOf("\n", off)) != -1) {
49+
++numlf;
50+
off = i + 1;
51+
}
52+
while (numlf-- > 0) xref.startNewLine();
53+
if (off < whsp.length()) xref.out.write(whsp.substring(off));
54+
}
55+
}
56+
57+
/** private to enforce static */
58+
private TclUtils() {
59+
}
60+
}

0 commit comments

Comments
 (0)