Skip to content

Commit 9c8cd69

Browse files
authored
Merge pull request #187 from SeeSharpSoft/fb_backslash_lexing
[FIX] Backslash in text is considered a special character #184
2 parents 3ee2691 + 693fbba commit 9c8cd69

File tree

16 files changed

+135
-29
lines changed

16 files changed

+135
-29
lines changed

src/main/java/net/seesharpsoft/intellij/plugins/csv/Csv.bnf

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@
1414

1515
tokens=[
1616
TEXT='regexp:[^ ,;|\t\r\n"\\]+'
17-
ESCAPED_TEXT='regexp:[,;|\t\r\n\\]|""|\\"'
17+
ESCAPED_TEXT='regexp:[,;|\t\r\n]|""|\\"'
18+
ESCAPE_CHARACTER='regexp:\\'
1819
COMMA='regexp:[,;|\t]'
1920
QUOTE='"'
2021
CRLF='regexp:\n'
@@ -27,6 +28,6 @@ record ::= field (COMMA field)*
2728

2829
field ::= (escaped | nonEscaped)
2930

30-
private escaped ::= QUOTE (TEXT | ESCAPED_TEXT)* QUOTE
31+
private escaped ::= QUOTE (TEXT | ESCAPE_CHARACTER | ESCAPED_TEXT)* QUOTE
3132

32-
private nonEscaped ::= TEXT*
33+
private nonEscaped ::= (TEXT | ESCAPE_CHARACTER)*

src/main/java/net/seesharpsoft/intellij/plugins/csv/CsvEscapeCharacter.java

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,18 @@
33
import java.util.regex.Pattern;
44

55
public enum CsvEscapeCharacter {
6-
QUOTE("\"", "Double Quote (\")"),
7-
BACKSLASH("\\", "Backslash (\\)");
6+
QUOTE("\"", "Double Quote (\")", "\""),
7+
BACKSLASH("\\", "Backslash (\\)", "\\\\");
88

99
private final String myCharacter;
1010
private final String myDisplay;
1111
private final Pattern myPattern;
12+
private final String myRegexPattern;
1213

13-
CsvEscapeCharacter(String character, String display) {
14+
CsvEscapeCharacter(String character, String display, String regexPattern) {
1415
myCharacter = character;
1516
myDisplay = display;
17+
myRegexPattern = regexPattern;
1618
myPattern = Pattern.compile(Pattern.quote(myCharacter + "\""));
1719
}
1820

@@ -24,6 +26,10 @@ public String getDisplay() {
2426
return myDisplay;
2527
}
2628

29+
public String getRegexPattern() {
30+
return myRegexPattern;
31+
}
32+
2733
public boolean isEscapedQuote(String text) {
2834
return myPattern.matcher(text).matches();
2935
}

src/main/java/net/seesharpsoft/intellij/plugins/csv/CsvHelper.java

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727
import java.util.HashMap;
2828
import java.util.Map;
2929
import java.util.function.Function;
30-
import java.util.regex.Pattern;
3130

3231
public final class CsvHelper {
3332

@@ -221,13 +220,18 @@ public static String unquoteCsvValue(String content, CsvEscapeCharacter escapeCh
221220
if (result.length() > 1 && result.startsWith("\"") && result.endsWith("\"")) {
222221
result = result.substring(1, result.length() - 1);
223222
}
224-
result = result.replaceAll("(?:" + Pattern.quote(escapeCharacter.getCharacter()) + ")\"", "\"");
223+
if (escapeCharacter != CsvEscapeCharacter.QUOTE) {
224+
result = result.replaceAll("(?:" + escapeCharacter.getRegexPattern() + ")" +
225+
escapeCharacter.getRegexPattern(), escapeCharacter.getRegexPattern());
226+
}
227+
result = result.replaceAll("(?:" + escapeCharacter.getRegexPattern() + ")\"", "\"");
225228
return result;
226229
}
227230

228-
private static boolean isQuotingRequired(String content, CsvValueSeparator valueSeparator) {
231+
private static boolean isQuotingRequired(String content, CsvEscapeCharacter escapeCharacter, CsvValueSeparator valueSeparator) {
229232
return content != null &&
230-
(content.contains(valueSeparator.getCharacter()) || content.contains("\"") || content.contains("\n") || content.startsWith(" ") || content.endsWith(" "));
233+
(content.contains(valueSeparator.getCharacter()) || content.contains("\"") || content.contains("\n") || content.contains(escapeCharacter.getCharacter()) ||
234+
content.startsWith(" ") || content.endsWith(" "));
231235
}
232236

233237
public static String quoteCsvField(String content,
@@ -237,8 +241,13 @@ public static String quoteCsvField(String content,
237241
if (content == null) {
238242
return "";
239243
}
240-
if (quotingEnforced || isQuotingRequired(content, valueSeparator)) {
241-
String result = content.replaceAll("\"", escapeCharacter.getCharacter() + "\"");
244+
if (quotingEnforced || isQuotingRequired(content, escapeCharacter, valueSeparator)) {
245+
String result = content;
246+
if (escapeCharacter != CsvEscapeCharacter.QUOTE) {
247+
result = result.replaceAll(escapeCharacter.getRegexPattern(),
248+
escapeCharacter.getRegexPattern() + escapeCharacter.getRegexPattern());
249+
}
250+
result = result.replaceAll("\"", escapeCharacter.getRegexPattern() + "\"");
242251
return "\"" + result + "\"";
243252
}
244253
return content;

src/main/java/net/seesharpsoft/intellij/plugins/csv/CsvLexer.flex

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ package net.seesharpsoft.intellij.plugins.csv;
33
import com.intellij.psi.tree.IElementType;
44
import net.seesharpsoft.intellij.plugins.csv.psi.CsvTypes;
55
import com.intellij.psi.TokenType;
6-
import com.intellij.lexer.FlexLexer;
6+
import com.intellij.lexer.FlexLexer;import org.intellij.grammar.livePreview.LivePreviewElementType;
77

88
import java.util.regex.Pattern;
99

@@ -33,7 +33,8 @@ import java.util.regex.Pattern;
3333
%eof}
3434

3535
TEXT=[^ ,;|\t\r\n\"\\]+
36-
ESCAPED_TEXT=[,;|\t\r\n\\]|\"\"|\\\"
36+
ESCAPED_TEXT=[,;|\t\r\n]|\"\"|\\\"
37+
ESCAPE_CHAR=\\
3738
QUOTE=\"
3839
COMMA=[,;|\t]
3940
EOL=\n
@@ -42,6 +43,7 @@ WHITE_SPACE=[ \f]+
4243
%state AFTER_TEXT
4344
%state ESCAPED_TEXT
4445
%state UNESCAPED_TEXT
46+
%state ESCAPING
4547

4648
%%
4749

@@ -68,6 +70,33 @@ WHITE_SPACE=[ \f]+
6870
return CsvTypes.TEXT;
6971
}
7072

73+
<YYINITIAL, UNESCAPED_TEXT> {ESCAPE_CHAR}
74+
{
75+
String text = yytext().toString();
76+
if (myEscapeCharacter.getCharacter().equals(text)) {
77+
return TokenType.BAD_CHARACTER;
78+
}
79+
yybegin(UNESCAPED_TEXT);
80+
return CsvTypes.TEXT;
81+
}
82+
83+
<ESCAPED_TEXT, ESCAPING> {ESCAPE_CHAR} {
84+
String text = yytext().toString();
85+
if (myEscapeCharacter.getCharacter().equals(text)) {
86+
switch (yystate()) {
87+
case ESCAPED_TEXT:
88+
yybegin(ESCAPING);
89+
break;
90+
case ESCAPING:
91+
yybegin(ESCAPED_TEXT);
92+
break;
93+
default:
94+
throw new RuntimeException("unhandled state: " + yystate());
95+
}
96+
}
97+
return CsvTypes.TEXT;
98+
}
99+
71100
<ESCAPED_TEXT> {ESCAPED_TEXT}
72101
{
73102
String text = yytext().toString();
@@ -76,6 +105,11 @@ WHITE_SPACE=[ \f]+
76105
) {
77106
return CsvTypes.ESCAPED_TEXT;
78107
}
108+
if (!text.startsWith(CsvEscapeCharacter.QUOTE.getCharacter())) {
109+
yypushback(1);
110+
return CsvTypes.TEXT;
111+
}
112+
79113
return TokenType.BAD_CHARACTER;
80114
}
81115

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
package net.seesharpsoft.intellij.plugins.csv;
2+
3+
import com.intellij.testFramework.PlatformLiteFixture;
4+
5+
public class CsvHelperTest extends PlatformLiteFixture {
6+
7+
public void testUnquoteCsvValue() {
8+
String csv = "\"Header\"\" \\\\1\\\\\"";
9+
assertEquals("Header\" \\\\1\\\\", CsvHelper.unquoteCsvValue(csv, CsvEscapeCharacter.QUOTE));
10+
}
11+
12+
public void testUnquoteCsvValueWithBackslash() {
13+
String csv = "\"Header\\\" \\\\1\\\\\"";
14+
assertEquals("Header\" \\1\\", CsvHelper.unquoteCsvValue(csv, CsvEscapeCharacter.BACKSLASH));
15+
}
16+
17+
public void testQuoteCsvValue() {
18+
String csv = "Header\" \\1\\";
19+
assertEquals( "\"Header\"\" \\1\\\"", CsvHelper.quoteCsvField(csv, CsvEscapeCharacter.QUOTE, CsvValueSeparator.COMMA, false));
20+
}
21+
22+
public void testQuoteCsvValueWithBackslash() {
23+
String csv = "Header\" \\1\\";
24+
assertEquals("\"Header\\\" \\\\1\\\\\"", CsvHelper.quoteCsvField(csv, CsvEscapeCharacter.BACKSLASH, CsvValueSeparator.COMMA, false));
25+
}
26+
}

src/test/java/net/seesharpsoft/intellij/plugins/csv/intention/CsvIntentionTest.java

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,37 @@ protected String getTestDataPath() {
1212
return "./src/test/resources/intention";
1313
}
1414

15+
@Override
16+
protected void tearDown() throws Exception {
17+
CsvEditorSettings.getInstance().setDefaultEscapeCharacter(CsvEditorSettings.ESCAPE_CHARACTER_DEFAULT);
18+
super.tearDown();
19+
}
20+
1521
protected void doTestIntention(String testName, String hint) throws Throwable {
22+
doTestIntention(testName, hint, false);
23+
}
24+
25+
protected void doTestIntention(String testName, String hint, boolean expectError) throws Throwable {
1626
myFixture.configureByFile(testName + "/before.csv");
1727
final IntentionAction action = myFixture.filterAvailableIntentions(hint).stream()
1828
.filter(intentionAction -> intentionAction.getText().equals(hint))
19-
.findFirst().get();
20-
myFixture.launchAction(action);
21-
myFixture.checkResultByFile(testName + "/after.csv");
29+
.findFirst().orElse(null);
30+
if (action == null) {
31+
assertTrue("action not found -> this was expected: " + expectError, expectError);
32+
} else {
33+
assertFalse("action was found -> this was expected: " + !expectError, expectError);
34+
myFixture.launchAction(action);
35+
myFixture.checkResultByFile(testName + "/after.csv");
36+
}
37+
}
38+
39+
public void testErroneousCsv() throws Throwable {
40+
doTestIntention("Erroneous", "Quote All", true);
41+
}
42+
43+
public void testErroneousBackslashCsv() throws Throwable {
44+
CsvEditorSettings.getInstance().setDefaultEscapeCharacter(CsvEscapeCharacter.BACKSLASH);
45+
doTestIntention("ErroneousBackslash", "Quote All", true);
2246
}
2347

2448
public void testQuoteAllIntention() throws Throwable {
@@ -28,7 +52,6 @@ public void testQuoteAllIntention() throws Throwable {
2852
public void testQuoteAllBackslashIntention() throws Throwable {
2953
CsvEditorSettings.getInstance().setDefaultEscapeCharacter(CsvEscapeCharacter.BACKSLASH);
3054
doTestIntention("QuoteAllBackslash", "Quote All");
31-
CsvEditorSettings.getInstance().setDefaultEscapeCharacter(CsvEditorSettings.ESCAPE_CHARACTER_DEFAULT);
3255
}
3356

3457
public void testUnquoteAllIntention() throws Throwable {
@@ -38,7 +61,6 @@ public void testUnquoteAllIntention() throws Throwable {
3861
public void testUnquoteAllBackslashIntention() throws Throwable {
3962
CsvEditorSettings.getInstance().setDefaultEscapeCharacter(CsvEscapeCharacter.BACKSLASH);
4063
doTestIntention("UnquoteAllBackslash", "Unquote All");
41-
CsvEditorSettings.getInstance().setDefaultEscapeCharacter(CsvEditorSettings.ESCAPE_CHARACTER_DEFAULT);
4264
}
4365

4466
public void testQuoteIntention() throws Throwable {
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
"Value ""1"", "Value 2"
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Header \\1\
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
"Header 1", "Header, 2"
1+
"\Header \\1\", "Header, 2"
22
"Value ""1"""," Value 2"
3-
""
3+
"back\\""\""\slash\"
Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
1-
Header 1, "Header, 2"
1+
\Header \\1\, "Header, 2"
22
"Value ""1""", Value 2
3+
"back\\""\""\slash\"

0 commit comments

Comments
 (0)