Skip to content

Commit b825a55

Browse files
committed
Add ErlangSymbolTokenizerTest, and fix exposed bugs
- ErlangSymbolTokenizer should recognize and skip numbers. - Erlang module keywords and syntax should be recognized.
1 parent 6a64198 commit b825a55

File tree

8 files changed

+266
-53
lines changed

8 files changed

+266
-53
lines changed

src/org/opensolaris/opengrok/analysis/erlang/Consts.java

Lines changed: 47 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
/*
2121
* Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
22+
* Portions Copyright (c) 2017, Chris Fraire <[email protected]>.
2223
*/
2324

2425
package org.opensolaris.opengrok.analysis.erlang;
@@ -31,34 +32,52 @@
3132
*/
3233
public class Consts{
3334
public static final Set<String> kwd = new HashSet<String>() ;
35+
public static final Set<String> modules_kwd = new HashSet<String>() ;
3436
static {
35-
kwd.add("after");
36-
kwd.add("begin");
37-
kwd.add("case");
38-
kwd.add("try");
39-
kwd.add("cond");
40-
kwd.add("catch");
41-
kwd.add("andalso");
42-
kwd.add("orelse");
43-
kwd.add("end");
44-
kwd.add("fun");
45-
kwd.add("if");
46-
kwd.add("let");
47-
kwd.add("of");
48-
kwd.add("query");
49-
kwd.add("receive");
50-
kwd.add("when");
51-
kwd.add("bnot");
52-
kwd.add("not");
53-
kwd.add("div");
54-
kwd.add("rem");
55-
kwd.add("band");
56-
kwd.add("and");
57-
kwd.add("bor");
58-
kwd.add("bxor");
59-
kwd.add("bsl");
60-
kwd.add("bsr");
61-
kwd.add("or");
62-
kwd.add("xor");
37+
kwd.add("after"); // Ref. 9.1 "1.5 Reserved Words"
38+
kwd.add("and"); // Ref. 9.1 "1.5 Reserved Words"
39+
kwd.add("andalso"); // Ref. 9.1 "1.5 Reserved Words"
40+
kwd.add("band"); // Ref. 9.1 "1.5 Reserved Words"
41+
kwd.add("begin"); // Ref. 9.1 "1.5 Reserved Words"
42+
kwd.add("bnot"); // Ref. 9.1 "1.5 Reserved Words"
43+
kwd.add("bor"); // Ref. 9.1 "1.5 Reserved Words"
44+
kwd.add("bsl"); // Ref. 9.1 "1.5 Reserved Words"
45+
kwd.add("bsr"); // Ref. 9.1 "1.5 Reserved Words"
46+
kwd.add("bxor"); // Ref. 9.1 "1.5 Reserved Words"
47+
kwd.add("case"); // Ref. 9.1 "1.5 Reserved Words"
48+
kwd.add("catch"); // Ref. 9.1 "1.5 Reserved Words"
49+
kwd.add("cond"); // Ref. 9.1 "1.5 Reserved Words"
50+
kwd.add("div"); // Ref. 9.1 "1.5 Reserved Words"
51+
kwd.add("end"); // Ref. 9.1 "1.5 Reserved Words"
52+
kwd.add("fun"); // Ref. 9.1 "1.5 Reserved Words"
53+
kwd.add("if"); // Ref. 9.1 "1.5 Reserved Words"
54+
kwd.add("let"); // Ref. 9.1 "1.5 Reserved Words"
55+
kwd.add("not"); // Ref. 9.1 "1.5 Reserved Words"
56+
kwd.add("of"); // Ref. 9.1 "1.5 Reserved Words"
57+
kwd.add("or"); // Ref. 9.1 "1.5 Reserved Words"
58+
kwd.add("orelse"); // Ref. 9.1 "1.5 Reserved Words"
59+
kwd.add("receive"); // Ref. 9.1 "1.5 Reserved Words"
60+
kwd.add("rem"); // Ref. 9.1 "1.5 Reserved Words"
61+
kwd.add("try"); // Ref. 9.1 "1.5 Reserved Words"
62+
kwd.add("when"); // Ref. 9.1 "1.5 Reserved Words"
63+
kwd.add("xor"); // Ref. 9.1 "1.5 Reserved Words"
64+
65+
kwd.add("query"); // pre-existing here of unknown provenance
66+
67+
modules_kwd.add("behavior"); // Ref. 9.1 "5.2 Module Attributes"
68+
modules_kwd.add("behaviour"); // Ref. 9.1 "5.2 Module Attributes"
69+
modules_kwd.add("callback"); // Ref. 9.1 "5.2 Module Attributes"
70+
modules_kwd.add("compile"); // Ref. 9.1 "5.2 Module Attributes"
71+
modules_kwd.add("define"); // Ref. 9.1 "5.2 Module Attributes"
72+
modules_kwd.add("export"); // Ref. 9.1 "5.2 Module Attributes"
73+
modules_kwd.add("file"); // Ref. 9.1 "5.2 Module Attributes"
74+
modules_kwd.add("import"); // Ref. 9.1 "5.2 Module Attributes"
75+
modules_kwd.add("include"); // Ref. 9.1 "5.2 Module Attributes"
76+
modules_kwd.add("module"); // Ref. 9.1 "5.2 Module Attributes"
77+
modules_kwd.add("on_load"); // Ref. 9.1 "5.2 Module Attributes"
78+
modules_kwd.add("record"); // Ref. 9.1 "5.2 Module Attributes"
79+
modules_kwd.add("spec"); // Ref. 9.1 "5.2 Module Attributes"
80+
modules_kwd.add("type"); // Ref. 9.1 "5.2 Module Attributes"
81+
modules_kwd.add("vsn"); // Ref. 9.1 "5.2 Module Attributes"
6382
}
6483
}
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
/*
2+
* CDDL HEADER START
3+
*
4+
* The contents of this file are subject to the terms of the
5+
* Common Development and Distribution License (the "License").
6+
* You may not use this file except in compliance with the License.
7+
*
8+
* See LICENSE.txt included in this distribution for the specific
9+
* language governing permissions and limitations under the License.
10+
*
11+
* When distributing Covered Code, include this CDDL HEADER in each
12+
* file and include the License file at LICENSE.txt.
13+
* If applicable, add the following below this CDDL HEADER, with the
14+
* fields enclosed by brackets "[]" replaced with your own identifying
15+
* information: Portions Copyright [yyyy] [name of copyright owner]
16+
*
17+
* CDDL HEADER END
18+
*/
19+
20+
/*
21+
* Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved.
22+
* Portions Copyright (c) 2017, Chris Fraire <[email protected]>.
23+
*/
24+
25+
Identifier = [a-zA-Z_] [a-zA-Z0-9_@]*
26+
ErlInt = ([12][0-9]|3[0-6]|[1-9])#[0-9]+
27+
Number = (0[xX][0-9a-fA-F]+|[0-9]+\.[0-9]+|[0-9]+)(([eE][+-]?[0-9]+)?[loxbLOXBjJ]*)?
28+
29+
ErlangWhspChar = ({WhspChar} | [\u{B}])
30+
ErlangWhiteSpace = {ErlangWhspChar}+ | {WhiteSpace}

src/org/opensolaris/opengrok/analysis/erlang/ErlangSymbolTokenizer.lex

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -43,18 +43,37 @@ super(in);
4343
%include CommonTokenizer.lexh
4444
%char
4545

46-
Identifier = [A-Z_] [a-zA-Z0-9_@]*
47-
4846
%state STRING COMMENT QATOM
4947

48+
%include Erlang.lexh
5049
%%
5150

5251
<YYINITIAL> {
53-
{Identifier} {String id = yytext();
54-
if(!Consts.kwd.contains(id)){
52+
53+
"?" {Identifier} { // Macros
54+
}
55+
56+
{Identifier} {
57+
String id = yytext();
58+
if (id.length() > 1 && !Consts.kwd.contains(id)) {
5559
setAttribs(id, yychar, yychar + yylength());
56-
return yystate(); }
57-
}
60+
return yystate();
61+
}
62+
}
63+
64+
^"-" {Identifier} {
65+
String capture = yytext();
66+
String punc = capture.substring(0, 1);
67+
String id = capture.substring(1);
68+
if (!Consts.modules_kwd.contains(id)) {
69+
setAttribs(id, yychar + 1, yychar + yylength());
70+
return yystate();
71+
}
72+
}
73+
74+
{ErlInt} {}
75+
{Number} {}
76+
5877
\" { yybegin(STRING); }
5978
\' { yybegin(QATOM); }
6079
"%" { yybegin(COMMENT); }

src/org/opensolaris/opengrok/analysis/erlang/ErlangXref.lex

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -49,26 +49,16 @@ import org.opensolaris.opengrok.web.Util;
4949
protected void setLineNumber(int x) { yyline = x; }
5050
%}
5151

52-
ErlangWhspChar = ({WhspChar} | [\u{B}])
53-
ErlangWhiteSpace = {ErlangWhspChar}+ | {WhiteSpace}
54-
Identifier = [a-zA-Z_] [a-zA-Z0-9_@]+
55-
5652
IncludeDirective = (include|include_lib)
57-
//PPDirective = (define|undef|ifdef|else|endif)
58-
//Directive = (module|author|compile|export|import)
59-
60-
// ErlChar = \$ASCII
61-
ErlInt = ([12][0-9]|3[0-6]|[1-9])#[0-9]+
6253

6354
File = [a-zA-Z]{FNameChar}* "." ("erl"|"hrl"|"app"|"asn"|"yrl"|"asn1"|"xml"|"html")
6455

65-
Number = (0[xX][0-9a-fA-F]+|[0-9]+\.[0-9]+|[0-9]+)(([eE][+-]?[0-9]+)?[loxbLOXBjJ]*)?
66-
6756
%state STRING COMMENT QATOM
6857

6958
%include Common.lexh
7059
%include CommonURI.lexh
7160
%include CommonPath.lexh
61+
%include Erlang.lexh
7262
%%
7363
<YYINITIAL>{
7464

@@ -78,7 +68,13 @@ Number = (0[xX][0-9a-fA-F]+|[0-9]+\.[0-9]+|[0-9]+)(([eE][+-]?[0-9]+)?[loxbLOXBjJ
7868

7969
{Identifier} {
8070
String id = yytext();
81-
writeSymbol(id, Consts.kwd, yyline);
71+
// N.b. for historical reasons, ErlangXref does not link identifiers of
72+
// length=1
73+
if (id.length() > 1) {
74+
writeSymbol(id, Consts.kwd, yyline);
75+
} else {
76+
out.write(id);
77+
}
8278
}
8379

8480
"-" {IncludeDirective} "(" ({File}|{FPath}) ")." {
@@ -94,6 +90,14 @@ Number = (0[xX][0-9a-fA-F]+|[0-9]+\.[0-9]+|[0-9]+)(([eE][+-]?[0-9]+)?[loxbLOXBjJ
9490
out.write("&gt;");
9591
}
9692

93+
^"-" {Identifier} {
94+
String capture = yytext();
95+
String punc = capture.substring(0, 1);
96+
String id = capture.substring(1);
97+
out.write(punc);
98+
writeSymbol(id, Consts.modules_kwd, yyline);
99+
}
100+
97101
{ErlInt} { out.write("<span class=\"n\">"); out.write(yytext()); out.write("</span>"); }
98102
{Number} { out.write("<span class=\"n\">"); out.write(yytext()); out.write("</span>"); }
99103

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
/*
2+
* CDDL HEADER START
3+
*
4+
* The contents of this file are subject to the terms of the
5+
* Common Development and Distribution License (the "License").
6+
* You may not use this file except in compliance with the License.
7+
*
8+
* See LICENSE.txt included in this distribution for the specific
9+
* language governing permissions and limitations under the License.
10+
*
11+
* When distributing Covered Code, include this CDDL HEADER in each
12+
* file and include the License file at LICENSE.txt.
13+
* If applicable, add the following below this CDDL HEADER, with the
14+
* fields enclosed by brackets "[]" replaced with your own identifying
15+
* information: Portions Copyright [yyyy] [name of copyright owner]
16+
*
17+
* CDDL HEADER END
18+
*/
19+
20+
/*
21+
* Copyright (c) 2010, 2015, Oracle and/or its affiliates. All rights reserved.
22+
* Portions Copyright (c) 2017, Chris Fraire <[email protected]>.
23+
*/
24+
25+
package org.opensolaris.opengrok.analysis.erlang;
26+
27+
import java.io.BufferedReader;
28+
import java.io.InputStream;
29+
import java.io.InputStreamReader;
30+
import java.util.ArrayList;
31+
import java.util.List;
32+
import static org.junit.Assert.assertNotNull;
33+
import org.junit.Test;
34+
import static org.opensolaris.opengrok.util.CustomAssertions.assertSymbolStream;
35+
36+
/**
37+
* Tests the {@link ErlangSymbolTokenizer} class.
38+
*/
39+
public class ErlangSymbolTokenizerTest {
40+
41+
/**
42+
* Test sample.erl v. samplesymbols.txt
43+
* @throws java.lang.Exception thrown on error
44+
*/
45+
@Test
46+
public void testErlangSymbolStream() throws Exception {
47+
InputStream erlres = getClass().getClassLoader().getResourceAsStream(
48+
"org/opensolaris/opengrok/analysis/erlang/sample.erl");
49+
assertNotNull("despite sample.erl as resource,", erlres);
50+
InputStream symres = getClass().getClassLoader().getResourceAsStream(
51+
"org/opensolaris/opengrok/analysis/erlang/samplesymbols.txt");
52+
assertNotNull("despite samplesymbols.txt as resource,", symres);
53+
54+
List<String> expectedSymbols = new ArrayList<>();
55+
try (BufferedReader wdsr = new BufferedReader(new InputStreamReader(
56+
symres, "UTF-8"))) {
57+
String line;
58+
while ((line = wdsr.readLine()) != null) {
59+
int hasho = line.indexOf('#');
60+
if (hasho != -1) line = line.substring(0, hasho);
61+
expectedSymbols.add(line.trim());
62+
}
63+
}
64+
65+
assertSymbolStream(ErlangSymbolTokenizer.class, erlres,
66+
expectedSymbols);
67+
}
68+
}

test/org/opensolaris/opengrok/analysis/erlang/sample.erl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ loop(LSocket, Transport, ConnsSup) ->
4040
%% a little for the situation to resolve itself.
4141
{error, emfile} ->
4242
error_logger:warning_msg("Ranch acceptor reducing accept rate: out of file descriptors~n"),
43-
receive after 100 -> ok end;
43+
receive after 0xFF -> ok end;
4444
%% We want to crash if the listening socket got closed.
4545
{error, Reason} when Reason =/= closed ->
4646
ok

0 commit comments

Comments
 (0)