Skip to content

Commit 4e390c5

Browse files
authored
Merge pull request #1916 from idodeclare/feature/rust_tests
Feature/rust tests
2 parents 8a86b9c + ac9a81f commit 4e390c5

File tree

14 files changed

+1065
-66
lines changed

14 files changed

+1065
-66
lines changed

opengrok-indexer/pom.xml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,13 @@ Portions Copyright (c) 2017, Chris Fraire <[email protected]>.
137137
<exclude>*.java</exclude>
138138
</excludes>
139139
</testResource>
140+
<testResource>
141+
<targetPath>org/opensolaris/opengrok/analysis/rust/</targetPath>
142+
<directory>../test/org/opensolaris/opengrok/analysis/rust/</directory>
143+
<excludes>
144+
<exclude>*.java</exclude>
145+
</excludes>
146+
</testResource>
140147
<testResource>
141148
<targetPath>org/opensolaris/opengrok/analysis/haskell/</targetPath>
142149
<directory>../test/org/opensolaris/opengrok/analysis/haskell/</directory>

src/org/opensolaris/opengrok/analysis/Ctags.java

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -210,16 +210,18 @@ private void initialize() throws IOException {
210210

211211
command.add("--langdef=rust");
212212
command.add("--langmap=rust:+.rs");
213-
//command.add("--regex-rust=/^[[:space:]]*(#\\[[^\\]]\\][[:space:]]*)*(pub[[:space:]]+)?(extern[[:space:]]+)?(\"[^\"]+\"[[:space:]]+)?(unsafe[[:space:]]+)?fn[[:space:]]+([[:alnum:]_]+)/\\6/h,functions,function definitions/");
214-
command.add("--regex-rust=/^[[:space:]]*(#\\[[^]]+\\][[:space:]]*)*(pub[[:space:]]+)?(extern[[:space:]]+)?(\\\"[^\\\"]+\\\"[[:space:]]+)?(unsafe[[:space:]]+)?fn[[:space:]]+([[:alnum:]_]+)/\\6/h,functions,function definitions/");
215-
command.add("--regex-rust=/^[[:space:]]*(pub[[:space:]]+)?type[[:space:]]+([[:alnum:]_]+)/\\2/T,types,type definitions/");
216-
command.add("--regex-rust=/^[[:space:]]*(pub[[:space:]]+)?enum[[:space:]]+([[:alnum:]_]+)/\\2/g,enum,enumeration names/");
217-
command.add("--regex-rust=/^[[:space:]]*(pub[[:space:]]+)?struct[[:space:]]+([[:alnum:]_]+)/\\2/S,structure names/");
218-
command.add("--regex-rust=/^[[:space:]]*(pub[[:space:]]+)?mod[[:space:]]+([[:alnum:]_]+)/\\2/N,modules,module names/");
213+
if (!env.isUniversalCtags()) {
214+
command.add("--regex-rust=/^[[:space:]]*(#\\[[^]]+\\][[:space:]]*)*(pub[[:space:]]+)?(extern[[:space:]]+)?(\\\"[^\\\"]+\\\"[[:space:]]+)?(unsafe[[:space:]]+)?fn[[:space:]]+([[:alnum:]_]+)/\\6/h,functions,function definitions/");
215+
command.add("--regex-rust=/^[[:space:]]*(pub[[:space:]]+)?type[[:space:]]+([[:alnum:]_]+)/\\2/T,types,type definitions/");
216+
command.add("--regex-rust=/^[[:space:]]*(pub[[:space:]]+)?enum[[:space:]]+([[:alnum:]_]+)/\\2/g,enum,enumeration names/");
217+
command.add("--regex-rust=/^[[:space:]]*(pub[[:space:]]+)?struct[[:space:]]+([[:alnum:]_]+)/\\2/S,structure names/");
218+
command.add("--regex-rust=/^[[:space:]]*(pub[[:space:]]+)?mod[[:space:]]+([[:alnum:]_]+)/\\2/N,modules,module names/");
219+
command.add("--regex-rust=/^[[:space:]]*macro_rules![[:space:]]+([[:alnum:]_]+)/\\1/d,macros,macro definitions/");
220+
}
221+
// The following are not supported yet in Universal Ctags b13cb551
219222
command.add("--regex-rust=/^[[:space:]]*(pub[[:space:]]+)?(static|const)[[:space:]]+(mut[[:space:]]+)?([[:alnum:]_]+)/\\4/C,consts,static constants/");
220-
command.add("--regex-rust=/^[[:space:]]*(pub[[:space:]]+)?(unsafe[[:space:]]+)?trait[[:space:]]+([[:alnum:]_]+)/\\3/r,traits,traits/");
221223
command.add("--regex-rust=/^[[:space:]]*(pub[[:space:]]+)?(unsafe[[:space:]]+)?impl([[:space:]\n]*<[^>]*>)?[[:space:]]+(([[:alnum:]_:]+)[[:space:]]*(<[^>]*>)?[[:space:]]+(for)[[:space:]]+)?([[:alnum:]_]+)/\\5 \\7 \\8/I,impls,trait implementations/");
222-
command.add("--regex-rust=/^[[:space:]]*macro_rules![[:space:]]+([[:alnum:]_]+)/\\1/d,macros,macro definitions/");
224+
command.add("--regex-rust=/^[[:space:]]*(pub[[:space:]]+)?(unsafe[[:space:]]+)?trait[[:space:]]+([[:alnum:]_]+)/\\3/r,traits,traits/");
223225
command.add("--regex-rust=/^[[:space:]]*let[[:space:]]+(mut)?[[:space:]]+([[:alnum:]_]+)/\\2/V,variables/");
224226

225227
command.add("--langdef=pascal");
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
/*
2+
* CDDL HEADER START
3+
*
4+
* The contents of this file are subject to the terms of the
5+
* Common Development and Distribution License (the "License").
6+
* You may not use this file except in compliance with the License.
7+
*
8+
* See LICENSE.txt included in this distribution for the specific
9+
* language governing permissions and limitations under the License.
10+
*
11+
* When distributing Covered Code, include this CDDL HEADER in each
12+
* file and include the License file at LICENSE.txt.
13+
* If applicable, add the following below this CDDL HEADER, with the
14+
* fields enclosed by brackets "[]" replaced with your own identifying
15+
* information: Portions Copyright [yyyy] [name of copyright owner]
16+
*
17+
* CDDL HEADER END
18+
*/
19+
20+
/*
21+
* Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
22+
* Portions Copyright (c) 2016 Nikolay Denev.
23+
* Portions Copyright (c) 2017, Chris Fraire <[email protected]>.
24+
*/
25+
26+
Identifier = ([\p{XID_Start}][\p{XID_Continue}]* | [_][\p{XID_Continue}]+)
27+
28+
HEXDIG = [0-9A-Fa-f]
29+
DIGIT = [0-9]
30+
OCTDIG = [0-7]
31+
BINDIG = [01]
32+
33+
Integer_suffix = [_]* ("u8" | "i8" | "u16" | "i16" | "u32" | "i32" | "u64" |
34+
"i64" | "isize" | "usize")
35+
Fp_suffix = [_]* ("f32" | "f64")
36+
Decimal_integer = {DIGIT} ([_]*{DIGIT})* {Integer_suffix}?
37+
Hex_integer = 0[x] {HEXDIG} ([_]*{HEXDIG})* {Integer_suffix}?
38+
Octal_integer = 0[o] {OCTDIG} ([_]*{OCTDIG})* {Integer_suffix}?
39+
Binary_integer = 0[b] {BINDIG} ([_]*{BINDIG})* {Integer_suffix}?
40+
Floating_point = ({DIGIT} ([_]*{DIGIT})* \. {DIGIT} ([_]*{DIGIT})* |
41+
{DIGIT} ([_]*{DIGIT})*) ([eE][+-]?{DIGIT}+)? {Fp_suffix}?
42+
Number = ({Decimal_integer} | {Hex_integer} | {Octal_integer} |
43+
{Binary_integer} | {Floating_point})

src/org/opensolaris/opengrok/analysis/rust/RustSymbolTokenizer.lex

Lines changed: 59 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,9 @@
2828
*/
2929

3030
package org.opensolaris.opengrok.analysis.rust;
31-
import org.opensolaris.opengrok.analysis.JFlexTokenizer;
3231

32+
import java.io.IOException;
33+
import org.opensolaris.opengrok.analysis.JFlexTokenizer;
3334
%%
3435
%public
3536
%class RustSymbolTokenizer
@@ -41,42 +42,85 @@ super(in);
4142
%int
4243
%include CommonTokenizer.lexh
4344
%char
45+
%{
46+
/**
47+
* Stores the number of hashes beginning and ending a raw string or raw byte
48+
* string. E.g., r##"blah"## has rawHashCount == 2.
49+
*/
50+
int rawHashCount;
51+
52+
int nestedComment;
4453

45-
Identifier = [a-zA-Z_] [a-zA-Z0-9_]*
54+
@Override
55+
public void reset() throws IOException {
56+
super.reset();
57+
rawHashCount = 0;
58+
nestedComment = 0;
59+
}
60+
%}
4661

47-
%state STRING COMMENT SCOMMENT QSTRING
62+
%state STRING RSTRING COMMENT SCOMMENT
4863

64+
%include Common.lexh
65+
%include Rust.lexh
4966
%%
5067

5168
<YYINITIAL> {
52-
{Identifier} {String id = yytext();
69+
{Identifier} {
70+
String id = yytext();
5371
if(!Consts.kwd.contains(id)){
5472
setAttribs(id, yychar, yychar + yylength());
55-
return yystate(); }
56-
}
57-
\" { yybegin(STRING); }
58-
\' { yybegin(QSTRING); }
59-
"/*" { yybegin(COMMENT); }
73+
return yystate();
74+
}
75+
}
76+
{Number} {}
77+
[b]?\" { yybegin(STRING); }
78+
[b]?[r][#]*\" {
79+
yybegin(RSTRING);
80+
rawHashCount = RustUtils.countRawHashes(yytext());
81+
}
82+
[b]?\' ([^\n\r\'\\] | \\[^\n\r]) \' |
83+
[b]?\' \\[xX]{HEXDIG}{HEXDIG} \' |
84+
[b]?\' \\[uU]\{ {HEXDIG}{1,6} \}\' {}
85+
"/*" {
86+
++nestedComment;
87+
yybegin(COMMENT);
88+
}
6089
"//" { yybegin(SCOMMENT); }
6190
}
6291

6392
<STRING> {
93+
\\[\"\\] {}
6494
\" { yybegin(YYINITIAL); }
65-
\\\\ | \\\" {}
6695
}
6796

68-
<QSTRING> {
69-
\' { yybegin(YYINITIAL); }
97+
<RSTRING> {
98+
\"[#]* {
99+
String capture = yytext();
100+
if (RustUtils.isRawEnding(capture, rawHashCount)) {
101+
yybegin(YYINITIAL);
102+
int excess = capture.length() - 1 - rawHashCount;
103+
if (excess > 0) yypushback(excess);
104+
}
105+
}
106+
}
107+
108+
<STRING, RSTRING> {
109+
{WhspChar}*{EOL} {
110+
// no-op
111+
}
70112
}
71113

72114
<COMMENT> {
73-
"*/" { yybegin(YYINITIAL);}
115+
"*/" { if (--nestedComment == 0) yybegin(YYINITIAL); }
116+
"/*" { ++nestedComment; }
74117
}
75118

76119
<SCOMMENT> {
77-
\n { yybegin(YYINITIAL);}
120+
{WhiteSpace} {}
121+
{EOL} { yybegin(YYINITIAL);}
78122
}
79123

80-
<YYINITIAL, STRING, COMMENT, SCOMMENT, QSTRING> {
124+
<YYINITIAL, STRING, RSTRING, COMMENT, SCOMMENT> {
81125
[^] {}
82126
}
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
/*
2+
* CDDL HEADER START
3+
*
4+
* The contents of this file are subject to the terms of the
5+
* Common Development and Distribution License (the "License").
6+
* You may not use this file except in compliance with the License.
7+
*
8+
* See LICENSE.txt included in this distribution for the specific
9+
* language governing permissions and limitations under the License.
10+
*
11+
* When distributing Covered Code, include this CDDL HEADER in each
12+
* file and include the License file at LICENSE.txt.
13+
* If applicable, add the following below this CDDL HEADER, with the
14+
* fields enclosed by brackets "[]" replaced with your own identifying
15+
* information: Portions Copyright [yyyy] [name of copyright owner]
16+
*
17+
* CDDL HEADER END
18+
*/
19+
20+
/*
21+
* Copyright (c) 2017, Chris Fraire <[email protected]>.
22+
*/
23+
package org.opensolaris.opengrok.analysis.rust;
24+
25+
/**
26+
* Represents a container for Rust-related utility methods.
27+
*/
28+
public class RustUtils {
29+
30+
/**
31+
* private to enforce singleton
32+
*/
33+
private RustUtils() {
34+
}
35+
36+
/**
37+
* Counts the number of hashes ('#') before a terminating quote ('"') in
38+
* {@code capture}.
39+
* @param capture the Rust raw- or raw-byte-string initiator (e.g.,
40+
* {@code "r##\""})
41+
* @return the number of hashes counted
42+
*/
43+
public static int countRawHashes(String capture) {
44+
if (!capture.endsWith("\"")) {
45+
throw new IllegalArgumentException("`capture' does not end in \"");
46+
}
47+
48+
int n = 0;
49+
for (int i = capture.length() - 2; i >= 0; --i) {
50+
if (capture.charAt(i) != '#') break;
51+
++n;
52+
}
53+
return n;
54+
}
55+
56+
/**
57+
* Determines if the specified {@code capture} starts with a quote ('"')
58+
* and is followed by the specified number of hashes (plus possibly an
59+
* excess number of hashes), indicating the end of a raw- or raw-byte-
60+
* string.
61+
* @param capture the possible Rust raw- or raw-byte-string ender (e.g.,
62+
* {@code "\"####"})
63+
* @param rawHashCount the number of required hashes in order to be
64+
* considered "raw-ending"
65+
* @return true if the {@code capture} is determined to be "raw-ending" or
66+
* false otherwise (N.b., there may have been too many hashes captured, so
67+
* any excess of {@code yylength()} minus one minus {@code rawHashCount}
68+
* should be pushed back.
69+
*/
70+
public static boolean isRawEnding(String capture, int rawHashCount) {
71+
if (!capture.startsWith("\"")) {
72+
throw new IllegalArgumentException(
73+
"`capture' does not start with \"");
74+
}
75+
76+
int n = 0;
77+
for (int i = 1; i < capture.length(); ++i) {
78+
if (capture.charAt(i) != '#') break;
79+
++n;
80+
if (n >= rawHashCount) break;
81+
}
82+
return n >= rawHashCount;
83+
}
84+
}

0 commit comments

Comments
 (0)