Skip to content

Commit 2261682

Browse files
committed
Add more HaskellSymbolTokenizerTest, and fix exposed bugs
- Fix not to take fractured hex numbers as symbols. - Fix to recognize "_" as a keyword.
1 parent acd9eb6 commit 2261682

File tree

6 files changed

+828
-9
lines changed

6 files changed

+828
-9
lines changed

src/org/opensolaris/opengrok/analysis/haskell/Consts.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
/*
2121
* Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
22+
* Portions Copyright (c) 2017, Chris Fraire <[email protected]>.
2223
*/
2324

2425
package org.opensolaris.opengrok.analysis.haskell;
@@ -57,5 +58,7 @@ public class Consts {
5758
kwd.add("then");
5859
kwd.add("type");
5960
kwd.add("where");
61+
62+
kwd.add("_"); // 2.4 Identifiers and Operators
6063
}
6164
}

src/org/opensolaris/opengrok/analysis/haskell/HaskellSymbolTokenizer.lex

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ super(in);
4646
%char
4747

4848
Identifier = [a-zA-Z_] [a-zA-Z0-9_']*
49+
Number = (0[xX][0-9a-fA-F]+|0[oO][0-7]+|[0-9]+\.[0-9]+|[0-9][0-9_]*)([eE][+-]?[0-9]+)?
4950

5051
%state STRING CHAR COMMENT BCOMMENT
5152

@@ -59,6 +60,7 @@ Identifier = [a-zA-Z_] [a-zA-Z0-9_']*
5960
return yystate();
6061
}
6162
}
63+
{Number} {}
6264
\" { yybegin(STRING); }
6365
\' { yybegin(CHAR); }
6466
"--" { yybegin(COMMENT); }

src/org/opensolaris/opengrok/analysis/haskell/HaskellXref.lex

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ import org.opensolaris.opengrok.web.Util;
5151
%}
5252

5353
Identifier = [a-zA-Z_] [a-zA-Z0-9_']*
54-
Number = (0[xX][0-9a-fA-F]+|[0-9]+\.[0-9]+|[0-9][0-9_]*)([eE][+-]?[0-9]+)?
54+
Number = (0[xX][0-9a-fA-F]+|0[oO][0-7]+|[0-9]+\.[0-9]+|[0-9][0-9_]*)([eE][+-]?[0-9]+)?
5555

5656
%state STRING CHAR COMMENT BCOMMENT
5757

test/org/opensolaris/opengrok/analysis/haskell/HaskellSymbolTokenizerTest.java

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,22 +23,23 @@
2323
*/
2424
package org.opensolaris.opengrok.analysis.haskell;
2525

26+
import java.io.BufferedReader;
2627
import static org.junit.Assert.assertArrayEquals;
2728

2829
import java.io.IOException;
2930
import java.io.InputStream;
3031
import java.io.InputStreamReader;
3132
import java.io.Reader;
3233
import java.io.UnsupportedEncodingException;
34+
import java.util.ArrayList;
3335
import java.util.LinkedList;
3436
import java.util.List;
35-
import java.util.logging.Logger;
36-
3737
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
38+
import static org.junit.Assert.assertNotNull;
3839
import org.junit.Test;
3940
import org.opensolaris.opengrok.analysis.FileAnalyzer;
4041
import org.opensolaris.opengrok.analysis.JFlexTokenizer;
41-
import org.opensolaris.opengrok.logger.LoggerFactory;
42+
import static org.opensolaris.opengrok.util.CustomAssertions.assertSymbolStream;
4243

4344
/**
4445
* Tests the {@link HaskellSymbolTokenizer} class.
@@ -47,8 +48,6 @@
4748
*/
4849
public class HaskellSymbolTokenizerTest {
4950

50-
private static final Logger LOGGER = LoggerFactory.getLogger(HaskellSymbolTokenizerTest.class);
51-
5251
private final FileAnalyzer analyzer;
5352

5453
public HaskellSymbolTokenizerTest() {
@@ -86,4 +85,32 @@ public void sampleTest() throws UnsupportedEncodingException {
8685
},
8786
termsFor);
8887
}
88+
89+
/**
90+
* Test sample2.hs v. sample2symbols.txt
91+
* @throws java.lang.Exception thrown on error
92+
*/
93+
@Test
94+
public void testHaskellSymbolStream() throws Exception {
95+
InputStream pyres = getClass().getClassLoader().getResourceAsStream(
96+
"org/opensolaris/opengrok/analysis/haskell/sample2.hs");
97+
assertNotNull("despite sample.py as resource,", pyres);
98+
InputStream symres = getClass().getClassLoader().getResourceAsStream(
99+
"org/opensolaris/opengrok/analysis/haskell/sample2symbols.txt");
100+
assertNotNull("despite samplesymbols.txt as resource,", symres);
101+
102+
List<String> expectedSymbols = new ArrayList<>();
103+
try (BufferedReader wdsr = new BufferedReader(new InputStreamReader(
104+
symres, "UTF-8"))) {
105+
String line;
106+
while ((line = wdsr.readLine()) != null) {
107+
int hasho = line.indexOf('#');
108+
if (hasho != -1) line = line.substring(0, hasho);
109+
expectedSymbols.add(line.trim());
110+
}
111+
}
112+
113+
assertSymbolStream(HaskellSymbolTokenizer.class, pyres,
114+
expectedSymbols);
115+
}
89116
}

test/org/opensolaris/opengrok/analysis/haskell/sample2_xref.html

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -90,10 +90,10 @@
9090
<a class="l" name="87" href="#87">87</a> <b>where</b>
9191
<a class="l" name="88" href="#88">88</a> <a class="xf" name="readQuery"/><a href="/source/s?refs=readQuery" class="xf intelliWindow-symbol" data-definition-place="def">readQuery</a> = <a href="/source/s?defs=Select" class="intelliWindow-symbol" data-definition-place="undefined-in-file">Select</a> [] [<a class="d intelliWindow-symbol" href="#rootNodeName" data-definition-place="defined-in-file">rootNodeName</a>] [] <a href="/source/s?defs=Nothing" class="intelliWindow-symbol" data-definition-place="undefined-in-file">Nothing</a> <a class="d intelliWindow-symbol" href="#allRange" data-definition-place="defined-in-file">allRange</a>
9292
<a class="l" name="89" href="#89">89</a> <a class="d intelliWindow-symbol" href="#treeEntry" data-definition-place="defined-in-file">treeEntry</a> :: <a href="/source/s?defs=Tree" class="intelliWindow-symbol" data-definition-place="undefined-in-file">Tree</a> <a href="/source/s?defs=SelectItem" class="intelliWindow-symbol" data-definition-place="undefined-in-file">SelectItem</a> -&gt; <a href="/source/s?defs=ReadRequest" class="intelliWindow-symbol" data-definition-place="undefined-in-file">ReadRequest</a> -&gt; <a href="/source/s?defs=ReadRequest" class="intelliWindow-symbol" data-definition-place="undefined-in-file">ReadRequest</a>
93-
<a class="hl" name="90" href="#90">90</a> <a class="xf" name="treeEntry"/><a href="/source/s?refs=treeEntry" class="xf intelliWindow-symbol" data-definition-place="def">treeEntry</a> (<a href="/source/s?defs=Node" class="intelliWindow-symbol" data-definition-place="undefined-in-file">Node</a> <a class="d intelliWindow-symbol" href="#fld" data-definition-place="defined-in-file">fld</a>@((<a href="/source/s?defs=fn" class="intelliWindow-symbol" data-definition-place="undefined-in-file">fn</a>, <a href="/source/s?defs=_" class="intelliWindow-symbol" data-definition-place="undefined-in-file">_</a>),<a href="/source/s?defs=_" class="intelliWindow-symbol" data-definition-place="undefined-in-file">_</a>,<a href="/source/s?defs=alias" class="intelliWindow-symbol" data-definition-place="undefined-in-file">alias</a>,<a href="/source/s?defs=relationDetail" class="intelliWindow-symbol" data-definition-place="undefined-in-file">relationDetail</a>) <a href="/source/s?defs=fldForest" class="intelliWindow-symbol" data-definition-place="undefined-in-file">fldForest</a>) (<a href="/source/s?defs=Node" class="intelliWindow-symbol" data-definition-place="undefined-in-file">Node</a> (<a href="/source/s?defs=q" class="intelliWindow-symbol" data-definition-place="undefined-in-file">q</a>, <a href="/source/s?defs=i" class="intelliWindow-symbol" data-definition-place="undefined-in-file">i</a>) <a href="/source/s?defs=rForest" class="intelliWindow-symbol" data-definition-place="undefined-in-file">rForest</a>) =
93+
<a class="hl" name="90" href="#90">90</a> <a class="xf" name="treeEntry"/><a href="/source/s?refs=treeEntry" class="xf intelliWindow-symbol" data-definition-place="def">treeEntry</a> (<a href="/source/s?defs=Node" class="intelliWindow-symbol" data-definition-place="undefined-in-file">Node</a> <a class="d intelliWindow-symbol" href="#fld" data-definition-place="defined-in-file">fld</a>@((<a href="/source/s?defs=fn" class="intelliWindow-symbol" data-definition-place="undefined-in-file">fn</a>, <b>_</b>),<b>_</b>,<a href="/source/s?defs=alias" class="intelliWindow-symbol" data-definition-place="undefined-in-file">alias</a>,<a href="/source/s?defs=relationDetail" class="intelliWindow-symbol" data-definition-place="undefined-in-file">relationDetail</a>) <a href="/source/s?defs=fldForest" class="intelliWindow-symbol" data-definition-place="undefined-in-file">fldForest</a>) (<a href="/source/s?defs=Node" class="intelliWindow-symbol" data-definition-place="undefined-in-file">Node</a> (<a href="/source/s?defs=q" class="intelliWindow-symbol" data-definition-place="undefined-in-file">q</a>, <a href="/source/s?defs=i" class="intelliWindow-symbol" data-definition-place="undefined-in-file">i</a>) <a href="/source/s?defs=rForest" class="intelliWindow-symbol" data-definition-place="undefined-in-file">rForest</a>) =
9494
<a class="l" name="91" href="#91">91</a> <b>case</b> <a href="/source/s?defs=fldForest" class="intelliWindow-symbol" data-definition-place="undefined-in-file">fldForest</a> <b>of</b>
9595
<a class="l" name="92" href="#92">92</a> [] -&gt; <a href="/source/s?defs=Node" class="intelliWindow-symbol" data-definition-place="undefined-in-file">Node</a> (<a href="/source/s?defs=q" class="intelliWindow-symbol" data-definition-place="undefined-in-file">q</a> {<a href="/source/s?defs=select" class="intelliWindow-symbol" data-definition-place="undefined-in-file">select</a>=<a class="d intelliWindow-symbol" href="#fld" data-definition-place="defined-in-file">fld</a>:<a href="/source/s?defs=select" class="intelliWindow-symbol" data-definition-place="undefined-in-file">select</a> <a href="/source/s?defs=q" class="intelliWindow-symbol" data-definition-place="undefined-in-file">q</a>}, <a href="/source/s?defs=i" class="intelliWindow-symbol" data-definition-place="undefined-in-file">i</a>) <a href="/source/s?defs=rForest" class="intelliWindow-symbol" data-definition-place="undefined-in-file">rForest</a>
96-
<a class="l" name="93" href="#93">93</a> <a href="/source/s?defs=_" class="intelliWindow-symbol" data-definition-place="undefined-in-file">_</a> -&gt; <a href="/source/s?defs=Node" class="intelliWindow-symbol" data-definition-place="undefined-in-file">Node</a> (<a href="/source/s?defs=q" class="intelliWindow-symbol" data-definition-place="undefined-in-file">q</a>, <a href="/source/s?defs=i" class="intelliWindow-symbol" data-definition-place="undefined-in-file">i</a>) <a class="d intelliWindow-symbol" href="#newForest" data-definition-place="defined-in-file">newForest</a>
96+
<a class="l" name="93" href="#93">93</a> <b>_</b> -&gt; <a href="/source/s?defs=Node" class="intelliWindow-symbol" data-definition-place="undefined-in-file">Node</a> (<a href="/source/s?defs=q" class="intelliWindow-symbol" data-definition-place="undefined-in-file">q</a>, <a href="/source/s?defs=i" class="intelliWindow-symbol" data-definition-place="undefined-in-file">i</a>) <a class="d intelliWindow-symbol" href="#newForest" data-definition-place="defined-in-file">newForest</a>
9797
<a class="l" name="94" href="#94">94</a> <b>where</b>
9898
<a class="l" name="95" href="#95">95</a> <a class="xf" name="newForest"/><a href="/source/s?refs=newForest" class="xf intelliWindow-symbol" data-definition-place="def">newForest</a> =
9999
<a class="l" name="96" href="#96">96</a> <a href="/source/s?defs=foldr" class="intelliWindow-symbol" data-definition-place="undefined-in-file">foldr</a> <a class="d intelliWindow-symbol" href="#treeEntry" data-definition-place="defined-in-file">treeEntry</a> (<a href="/source/s?defs=Node" class="intelliWindow-symbol" data-definition-place="undefined-in-file">Node</a> (<a href="/source/s?defs=Select" class="intelliWindow-symbol" data-definition-place="undefined-in-file">Select</a> [] [<a href="/source/s?defs=fn" class="intelliWindow-symbol" data-definition-place="undefined-in-file">fn</a>] [] <a href="/source/s?defs=Nothing" class="intelliWindow-symbol" data-definition-place="undefined-in-file">Nothing</a> <a class="d intelliWindow-symbol" href="#allRange" data-definition-place="defined-in-file">allRange</a>, (<a href="/source/s?defs=fn" class="intelliWindow-symbol" data-definition-place="undefined-in-file">fn</a>, <a href="/source/s?defs=Nothing" class="intelliWindow-symbol" data-definition-place="undefined-in-file">Nothing</a>, <a href="/source/s?defs=alias" class="intelliWindow-symbol" data-definition-place="undefined-in-file">alias</a>, <a href="/source/s?defs=relationDetail" class="intelliWindow-symbol" data-definition-place="undefined-in-file">relationDetail</a>)) []) <a href="/source/s?defs=fldForest" class="intelliWindow-symbol" data-definition-place="undefined-in-file">fldForest</a>:<a href="/source/s?defs=rForest" class="intelliWindow-symbol" data-definition-place="undefined-in-file">rForest</a>
@@ -260,7 +260,7 @@
260260
<a class="l" name="257" href="#257">257</a> $ <a href="/source/s?defs=showErrorMessages" class="intelliWindow-symbol" data-definition-place="undefined-in-file">showErrorMessages</a> <span class="s">"or"</span> <span class="s">"unknown parse error"</span> <span class="s">"expecting"</span> <span class="s">"unexpected"</span> <span class="s">"end of input"</span> (<a href="/source/s?defs=errorMessages" class="intelliWindow-symbol" data-definition-place="undefined-in-file">errorMessages</a> <a href="/source/s?defs=e" class="intelliWindow-symbol" data-definition-place="undefined-in-file">e</a>)
261261
<a class="l" name="258" href="#258">258</a>
262262
<a class="l" name="259" href="#259">259</a><a class="d intelliWindow-symbol" href="#allRange" data-definition-place="defined-in-file">allRange</a> :: <a href="/source/s?defs=NonnegRange" class="intelliWindow-symbol" data-definition-place="undefined-in-file">NonnegRange</a>
263-
<a class="hl" name="260" href="#260">260</a><a class="xf" name="allRange"/><a href="/source/s?refs=allRange" class="xf intelliWindow-symbol" data-definition-place="def">allRange</a> = <a href="/source/s?defs=rangeGeq" class="intelliWindow-symbol" data-definition-place="undefined-in-file">rangeGeq</a> <span class="n">0</span> + <span class="n">0xFF</span> - <span class="n">0XFF</span> + <span class="n">0</span><a href="/source/s?defs=o7" class="intelliWindow-symbol" data-definition-place="undefined-in-file">o7</a> - <span class="n">0</span><a href="/source/s?defs=O7" class="intelliWindow-symbol" data-definition-place="undefined-in-file">O7</a> + <span class="n">1.0e2</span> - <span class="n">1.0E2</span> + <span class="n">1e2</span> - <span class="n">1E2</span>
263+
<a class="hl" name="260" href="#260">260</a><a class="xf" name="allRange"/><a href="/source/s?refs=allRange" class="xf intelliWindow-symbol" data-definition-place="def">allRange</a> = <a href="/source/s?defs=rangeGeq" class="intelliWindow-symbol" data-definition-place="undefined-in-file">rangeGeq</a> <span class="n">0</span> + <span class="n">0xFF</span> - <span class="n">0XFF</span> + <span class="n">0o7</span> - <span class="n">0O7</span> + <span class="n">1.0e2</span> - <span class="n">1.0E2</span> + <span class="n">1e2</span> - <span class="n">1E2</span>
264264
<a class="l" name="261" href="#261">261</a><span class="c">{- comment {- comment -}</span>
265265
<a class="l" name="262" href="#262">262</a><a href="/source/s?defs=comment" class="intelliWindow-symbol" data-definition-place="undefined-in-file">comment</a>
266266
<a class="l" name="263" href="#263">263</a>-}

0 commit comments

Comments
 (0)