Skip to content

Commit e967aa5

Browse files
authored
Merge pull request #341 from SeeSharpSoft/fb_lexer_improvements
Lexer & Parser Improvements
2 parents 8a58565 + 05dce5d commit e967aa5

File tree

24 files changed

+424
-173
lines changed

24 files changed

+424
-173
lines changed

.github/workflows/CIBuild.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,4 +24,4 @@ jobs:
2424
- name: Build with Gradle
2525
env:
2626
IDEA_SOURCES: false
27-
run: xvfb-run ./gradlew build
27+
run: xvfb-run ./gradlew test

.github/workflows/CronEAP.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,4 +26,4 @@ jobs:
2626
IDEA_VERSION: LATEST-EAP-SNAPSHOT
2727
GRAMMAR_KIT_VERSION: 2021.1.2
2828
IDEA_SOURCES: false
29-
run: xvfb-run ./gradlew build
29+
run: xvfb-run ./gradlew test

.github/workflows/PullRequest.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,4 +35,4 @@ jobs:
3535
IDEA_VERSION: ${{ matrix.ideaVersion }}
3636
GRAMMAR_KIT_VERSION: ${{ matrix.gkVersion }}
3737
IDEA_SOURCES: false
38-
run: xvfb-run ./gradlew build
38+
run: xvfb-run ./gradlew test

CHANGELOG

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
1-
2.19.1
2-
Oct 21, 2022
1+
2.20.0
2+
Oct 24, 2022
33

44
FIX: Cannot load from object array because "data" is null #335 #337
5+
NEW: Support fast lexing for default comments
6+
NEW: Simplify & unify both lexers
7+
FIX: Empty comment indicator
58

69
2.19.0
710
Jul 24, 2022

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
[![EAP Status](https://github.com/SeeSharpSoft/intellij-csv-validator/actions/workflows/CronEAP.yml/badge.svg)](https://github.com/SeeSharpSoft/intellij-csv-validator/actions)
44
[![Coverage Status](https://coveralls.io/repos/github/SeeSharpSoft/intellij-csv-validator/badge.svg?branch=master)](https://coveralls.io/github/SeeSharpSoft/intellij-csv-validator?branch=master)
55
[![Known Vulnerabilities](https://snyk.io/test/github/SeeSharpSoft/intellij-csv-validator/badge.svg?targetFile=build.gradle)](https://snyk.io/test/github/SeeSharpSoft/intellij-csv-validator?targetFile=build.gradle)
6-
[![Codacy Badge](https://api.codacy.com/project/badge/Grade/97769359388e44bfb7101346d510fccf)](https://www.codacy.com/app/github_124/intellij-csv-validator?utm_source=github.com&utm_medium=referral&utm_content=SeeSharpSoft/intellij-csv-validator&utm_campaign=Badge_Grade)
6+
[![Codacy Badge](https://app.codacy.com/project/badge/Grade/97769359388e44bfb7101346d510fccf)](https://www.codacy.com/gh/SeeSharpSoft/intellij-csv-validator/dashboard?utm_source=github.com&utm_medium=referral&utm_content=SeeSharpSoft/intellij-csv-validator&utm_campaign=Badge_Grade)
77
[![BCH compliance](https://bettercodehub.com/edge/badge/SeeSharpSoft/intellij-csv-validator?branch=master)](https://bettercodehub.com/results/SeeSharpSoft/intellij-csv-validator/)
88

99
# Lightweight CSV Plugin for JetBrains IDE family

build.gradle

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,3 @@
1-
buildscript {
2-
repositories {
3-
mavenCentral()
4-
maven { url 'https://jitpack.io' }
5-
}
6-
}
7-
81
plugins {
92
// https://github.com/JetBrains/gradle-intellij-plugin
103
id 'org.jetbrains.intellij' version '1.9.0'
@@ -16,7 +9,7 @@ plugins {
169
}
1710

1811
jacoco {
19-
toolVersion = "0.8.6"
12+
toolVersion = "0.8.8"
2013
}
2114

2215
jacocoTestReport {
@@ -26,7 +19,7 @@ jacocoTestReport {
2619
}
2720

2821
group 'net.seesharpsoft.intellij.plugins'
29-
version '2.19.1'
22+
version '2.20.0'
3023

3124
apply plugin: 'java'
3225
project.sourceCompatibility = JavaVersion.VERSION_11
@@ -41,7 +34,7 @@ repositories {
4134
dependencies {
4235
implementation 'net.seesharpsoft.sharping:sharping-commons:0.21.0'
4336
compileOnly 'org.apache.ant:ant:1.10.12'
44-
testImplementation 'org.mockito:mockito-core:4.8.0'
37+
testImplementation 'org.mockito:mockito-core:4.8.1'
4538
}
4639
sourceSets {
4740
main {
@@ -85,6 +78,9 @@ intellij {
8578
patchPluginXml {
8679
changeNotes = """<pre style="font-family: sans-serif">
8780
FIX: Cannot load from object array because "data" is null #335 #337
81+
NEW: Support fast lexing for default comments
82+
NEW: Simplify & unify both lexers
83+
FIX: Empty comment indicator
8884
</pre>"""
8985
}
9086
publishPlugin {
@@ -129,3 +125,10 @@ tasks.named("generateLexer").configure {
129125
compileJava {
130126
dependsOn generateLexer
131127
}
128+
129+
// TODO https://youtrack.jetbrains.com/issue/IDEA-298989 - remove after gradle plugin v1.9.1 or v1.10.0 released
130+
setupDependencies {
131+
doLast {
132+
fileTree("$buildDir/instrumented/instrumentCode") { include("**/*TableEditorSwing.class") }.files.forEach { delete(it) }
133+
}
134+
}

src/main/java/net/seesharpsoft/intellij/plugins/csv/Csv.bnf

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,12 @@
1313
tokenTypeClass="net.seesharpsoft.intellij.plugins.csv.psi.CsvTokenType"
1414

1515
tokens=[
16-
TEXT='regexp:[^ ,:;|\t\r\n"\\]+'
17-
ESCAPED_TEXT='regexp:[,:;|\t\r\n]|""|\\"'
18-
ESCAPE_CHARACTER='regexp:\\'
19-
COMMA='regexp:[,:;|\t]'
20-
QUOTE='regexp:"'
21-
CRLF='regexp:\n'
22-
COMMENT='regexp:#.*(\n|$)'
16+
TEXT
17+
ESCAPED_TEXT
18+
COMMA
19+
QUOTE
20+
CRLF
21+
COMMENT
2322
]
2423
}
2524

@@ -29,6 +28,8 @@ record ::= (COMMENT | (field (COMMA field)*))
2928

3029
field ::= (escaped | nonEscaped)
3130

32-
private escaped ::= QUOTE (TEXT | ESCAPE_CHARACTER | ESCAPED_TEXT)* QUOTE
31+
private escaped ::= QUOTE quotedText QUOTE
3332

34-
private nonEscaped ::= (TEXT | ESCAPE_CHARACTER)*
33+
private quotedText ::= (TEXT | ESCAPED_TEXT)*
34+
35+
private nonEscaped ::= TEXT*

src/main/java/net/seesharpsoft/intellij/plugins/csv/CsvLexer.flex

Lines changed: 69 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,7 @@ package net.seesharpsoft.intellij.plugins.csv;
33
import com.intellij.psi.tree.IElementType;
44
import net.seesharpsoft.intellij.plugins.csv.psi.CsvTypes;
55
import com.intellij.psi.TokenType;
6-
import com.intellij.lexer.FlexLexer;import org.intellij.grammar.livePreview.LivePreviewElementType;
7-
8-
import java.util.regex.Pattern;
6+
import com.intellij.lexer.FlexLexer;
97

108
%%
119

@@ -17,120 +15,125 @@ import java.util.regex.Pattern;
1715
%{
1816
private CsvValueSeparator myValueSeparator;
1917
private CsvEscapeCharacter myEscapeCharacter;
18+
private boolean mySupportComments;
2019

21-
private static final Pattern ESCAPE_TEXT_PATTERN = Pattern.compile("[,:;|\\t\\r\\n]");
20+
private boolean isActualValueSeparator() {
21+
return myValueSeparator.isValueSeparator(yytext().toString());
22+
}
2223

2324
/**
24-
* Provide constructor that supports a Project as parameter.
25+
* Provide constructor that support parameters to customize lexer.
2526
*/
26-
CsvLexer(java.io.Reader in, CsvValueSeparator valueSeparator, CsvEscapeCharacter escapeCharacter) {
27+
CsvLexer(java.io.Reader in, CsvValueSeparator valueSeparator, CsvEscapeCharacter escapeCharacter, boolean supportComments) {
2728
this(in);
2829
myValueSeparator = valueSeparator;
2930
myEscapeCharacter = escapeCharacter;
31+
mySupportComments = supportComments;
3032
}
3133
%}
3234
%eof{ return;
3335
%eof}
3436

35-
TEXT=[^ ,:;|\t\r\n\"\\]+
36-
ESCAPED_TEXT=[,:;|\t\r\n]|\"\"|\\\"
37-
ESCAPE_CHAR=\\
38-
QUOTE=\"
39-
COMMA=[,:;|\t]
40-
EOL=\n
4137
WHITE_SPACE=[ \f]+
38+
VALUE_SEPARATOR=[,:;|\t]
39+
RECORD_SEPARATOR=\n
40+
ESCAPED_QUOTE=\"\"|\\\"
41+
QUOTE=\"
42+
TEXT=[^ ,:;|\t\r\n\"\\]+
43+
BACKSLASH=\\+
44+
COMMENT=\#[^\n]*
4245

43-
%state AFTER_TEXT
44-
%state ESCAPED_TEXT
45-
%state UNESCAPED_TEXT
46-
%state ESCAPING
46+
%state UNQUOTED
47+
%state QUOTED
4748

4849
%%
4950

50-
<YYINITIAL> {QUOTE}
51+
<YYINITIAL, UNQUOTED> {TEXT}
5152
{
52-
yybegin(ESCAPED_TEXT);
53-
return CsvTypes.QUOTE;
53+
yybegin(UNQUOTED);
54+
return CsvTypes.TEXT;
5455
}
5556

56-
<ESCAPED_TEXT> {QUOTE}
57+
<YYINITIAL, UNQUOTED> {BACKSLASH}
5758
{
58-
yybegin(AFTER_TEXT);
59-
return CsvTypes.QUOTE;
59+
yybegin(UNQUOTED);
60+
return CsvTypes.TEXT;
6061
}
6162

62-
<YYINITIAL> {TEXT}
63+
<YYINITIAL, UNQUOTED> {VALUE_SEPARATOR}
6364
{
64-
yybegin(UNESCAPED_TEXT);
65+
yybegin(UNQUOTED);
66+
if (isActualValueSeparator()) {
67+
return CsvTypes.COMMA;
68+
}
6569
return CsvTypes.TEXT;
6670
}
6771

68-
<UNESCAPED_TEXT, ESCAPED_TEXT> {TEXT}
72+
<YYINITIAL, UNQUOTED> {QUOTE}
6973
{
70-
return CsvTypes.TEXT;
74+
yybegin(QUOTED);
75+
return CsvTypes.QUOTE;
7176
}
7277

73-
<YYINITIAL, UNESCAPED_TEXT> {ESCAPE_CHAR}
78+
<YYINITIAL, UNQUOTED> {RECORD_SEPARATOR}
7479
{
75-
String text = yytext().toString();
76-
if (myEscapeCharacter.getCharacter().equals(text)) {
77-
return TokenType.BAD_CHARACTER;
80+
yybegin(YYINITIAL);
81+
return CsvTypes.CRLF;
82+
}
83+
84+
<YYINITIAL> {COMMENT}
85+
{
86+
if (mySupportComments) {
87+
return CsvTypes.COMMENT;
7888
}
79-
yybegin(UNESCAPED_TEXT);
89+
yypushback(yylength() - 1);
90+
yybegin(UNQUOTED);
8091
return CsvTypes.TEXT;
8192
}
8293

83-
<ESCAPED_TEXT, ESCAPING> {ESCAPE_CHAR} {
84-
String text = yytext().toString();
85-
if (myEscapeCharacter.getCharacter().equals(text)) {
86-
switch (yystate()) {
87-
case ESCAPED_TEXT:
88-
yybegin(ESCAPING);
89-
break;
90-
case ESCAPING:
91-
yybegin(ESCAPED_TEXT);
92-
break;
93-
default:
94-
throw new RuntimeException("unhandled state: " + yystate());
94+
<QUOTED> {TEXT}
95+
{
96+
return CsvTypes.TEXT;
97+
}
98+
99+
<QUOTED> {BACKSLASH}
100+
{
101+
if (myEscapeCharacter == CsvEscapeCharacter.BACKSLASH) {
102+
int backslashCount = yylength();
103+
if (backslashCount > 1 && (backslashCount % 2 != 0)) {
104+
yypushback(1);
95105
}
96-
return CsvTypes.ESCAPED_TEXT;
97106
}
98107
return CsvTypes.TEXT;
99108
}
100109

101-
<ESCAPED_TEXT> {ESCAPED_TEXT}
110+
<QUOTED> {RECORD_SEPARATOR}
102111
{
103-
String text = yytext().toString();
104-
if (myEscapeCharacter.isEscapedQuote(text)
105-
|| ESCAPE_TEXT_PATTERN.matcher(text).matches()
106-
) {
112+
return CsvTypes.ESCAPED_TEXT;
113+
}
114+
115+
<QUOTED> {VALUE_SEPARATOR}
116+
{
117+
if (isActualValueSeparator()) {
107118
return CsvTypes.ESCAPED_TEXT;
108119
}
109-
if (!text.startsWith(CsvEscapeCharacter.QUOTE.getCharacter())) {
110-
yypushback(1);
111-
return CsvTypes.TEXT;
112-
}
113-
114-
return TokenType.BAD_CHARACTER;
120+
return CsvTypes.TEXT;
115121
}
116122

117-
<YYINITIAL, AFTER_TEXT, UNESCAPED_TEXT> {COMMA}
123+
<QUOTED> {ESCAPED_QUOTE}
118124
{
119-
if (myValueSeparator.isValueSeparator(yytext().toString())) {
120-
yybegin(YYINITIAL);
121-
return CsvTypes.COMMA;
122-
}
123-
if (yystate() != AFTER_TEXT) {
124-
yybegin(UNESCAPED_TEXT);
125+
String text = yytext().toString();
126+
if (!myEscapeCharacter.isEscapedQuote(text)) {
127+
yypushback(1);
125128
return CsvTypes.TEXT;
126129
}
127-
return TokenType.BAD_CHARACTER;
130+
return CsvTypes.ESCAPED_TEXT;
128131
}
129132

130-
<YYINITIAL, AFTER_TEXT, UNESCAPED_TEXT> {EOL}
133+
<QUOTED> {QUOTE}
131134
{
132-
yybegin(YYINITIAL);
133-
return CsvTypes.CRLF;
135+
yybegin(UNQUOTED);
136+
return CsvTypes.QUOTE;
134137
}
135138

136139
{WHITE_SPACE}

src/main/java/net/seesharpsoft/intellij/plugins/csv/CsvLexerAdapter.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import com.intellij.lexer.FlexAdapter;
44

55
public class CsvLexerAdapter extends FlexAdapter {
6-
public CsvLexerAdapter(CsvValueSeparator separator, CsvEscapeCharacter escapeCharacter) {
7-
super(new CsvLexer(null, separator, escapeCharacter));
6+
public CsvLexerAdapter(CsvValueSeparator separator, CsvEscapeCharacter escapeCharacter, boolean supportComments) {
7+
super(new CsvLexer(null, separator, escapeCharacter, supportComments));
88
}
99
}

src/main/java/net/seesharpsoft/intellij/plugins/csv/CsvLexerFactory.java

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
import net.seesharpsoft.intellij.plugins.csv.settings.CsvEditorSettings;
88
import org.jetbrains.annotations.NotNull;
99

10+
import static net.seesharpsoft.intellij.plugins.csv.settings.CsvEditorSettings.COMMENT_INDICATOR_DEFAULT;
11+
1012
public class CsvLexerFactory {
1113
protected static CsvLexerFactory INSTANCE = new CsvLexerFactory();
1214

@@ -15,15 +17,18 @@ public static CsvLexerFactory getInstance() {
1517
}
1618

1719
protected Lexer createLexer(@NotNull CsvValueSeparator separator, @NotNull CsvEscapeCharacter escapeCharacter) {
18-
if (separator.requiresCustomLexer() || !CsvEditorSettings.getInstance().getCommentIndicator().isEmpty()) {
20+
final String commentIndicator = CsvEditorSettings.getInstance().getCommentIndicator();
21+
if (separator.requiresCustomLexer() ||
22+
escapeCharacter.isCustom() ||
23+
(!commentIndicator.isEmpty() && !commentIndicator.equals(COMMENT_INDICATOR_DEFAULT))) {
1924
return new CsvSharpLexer(new CsvSharpLexer.Configuration(
2025
separator.getCharacter(),
2126
"\n",
2227
escapeCharacter.getCharacter(),
2328
"\"",
24-
CsvEditorSettings.getInstance().getCommentIndicator()));
29+
commentIndicator));
2530
}
26-
return new CsvLexerAdapter(separator, escapeCharacter);
31+
return new CsvLexerAdapter(separator, escapeCharacter, !commentIndicator.isEmpty());
2732
}
2833

2934
public Lexer createLexer(Project project, VirtualFile file) {

0 commit comments

Comments
 (0)