[FEATURE] support for comments

SeeSharpSoft · SeeSharpSoft · commit b847b35a37de · 2020-07-18T16:57:42.000+02:00
diff --git a/src/main/java/net/seesharpsoft/intellij/plugins/csv/Csv.bnf b/src/main/java/net/seesharpsoft/intellij/plugins/csv/Csv.bnf
@@ -19,12 +19,13 @@
     COMMA='regexp:[,:;|\t]'
     QUOTE='regexp:"'
     CRLF='regexp:\n'
+    COMMENT='regexp:#.*(\n|$)'
    ]
 }
 
 csvFile ::= record (CRLF record)* [CRLF]
 
-record ::= field (COMMA field)*
+record ::= (COMMENT | (field (COMMA field)*))
 
 field ::= (escaped | nonEscaped)
 
diff --git a/src/main/java/net/seesharpsoft/intellij/plugins/csv/CsvColumnInfoMap.java b/src/main/java/net/seesharpsoft/intellij/plugins/csv/CsvColumnInfoMap.java
@@ -10,16 +10,18 @@ public class CsvColumnInfoMap<T> {
     private final Map<T, CsvColumnInfo<T>> myReverseInfoColumnMap;
 
     private boolean hasErrors = false;
+    private boolean hasComments = false;
 
-    public CsvColumnInfoMap(Map<Integer, CsvColumnInfo<T>> infoColumnMap, boolean hasErrorsArg) {
+    public CsvColumnInfoMap(Map<Integer, CsvColumnInfo<T>> infoColumnMap, boolean hasErrorsArg, boolean hasCommentsArg) {
         this.myInfoColumnMap = infoColumnMap;
         this.myReverseInfoColumnMap = new HashMap<>();
         buildReverseMap();
         setHasErrors(hasErrorsArg);
+        setHasComments(hasCommentsArg);
     }
 
     public CsvColumnInfoMap(Map<Integer, CsvColumnInfo<T>> infoColumnMap) {
-        this(infoColumnMap, false);
+        this(infoColumnMap, false, false);
     }
 
     private void buildReverseMap() {
@@ -55,6 +57,14 @@ public void setHasErrors(boolean hasErrorsArg) {
         hasErrors = hasErrorsArg;
     }
 
+    public boolean hasComments() {
+        return hasComments;
+    }
+
+    public void setHasComments(boolean hasCommentsArg) {
+        hasComments = hasCommentsArg;
+    }
+
     public boolean hasEmptyLastLine() {
         CsvColumnInfo<T> columnInfo = myInfoColumnMap.get(0);
         int size = columnInfo.getSize();
diff --git a/src/main/java/net/seesharpsoft/intellij/plugins/csv/CsvHelper.java b/src/main/java/net/seesharpsoft/intellij/plugins/csv/CsvHelper.java
@@ -201,6 +201,7 @@ public static CsvColumnInfoMap<PsiElement> createColumnInfoMap(CsvFile csvFile)
         Map<Integer, CsvColumnInfo<PsiElement>> columnInfoMap = new HashMap<>();
         CsvRecord[] records = PsiTreeUtil.getChildrenOfType(csvFile, CsvRecord.class);
         int row = 0;
+        boolean hasComments = false;
         for (CsvRecord record : records) {
             int column = 0;
             for (CsvField field : record.getFieldList()) {
@@ -213,9 +214,12 @@ public static CsvColumnInfoMap<PsiElement> createColumnInfoMap(CsvFile csvFile)
                 columnInfoMap.get(column).addElement(field, row, getFieldStartOffset(field), getFieldEndOffset(field));
                 ++column;
             }
+            if (record.getComment() != null) {
+                hasComments = true;
+            }
             ++row;
         }
-        return new CsvColumnInfoMap(columnInfoMap, PsiTreeUtil.hasErrorElements(csvFile));
+        return new CsvColumnInfoMap(columnInfoMap, PsiTreeUtil.hasErrorElements(csvFile), hasComments);
     }
 
     public static String unquoteCsvValue(String content, CsvEscapeCharacter escapeCharacter) {
diff --git a/src/main/java/net/seesharpsoft/intellij/plugins/csv/CsvLexerFactory.java b/src/main/java/net/seesharpsoft/intellij/plugins/csv/CsvLexerFactory.java
@@ -4,6 +4,7 @@
 import com.intellij.openapi.project.Project;
 import com.intellij.openapi.vfs.VirtualFile;
 import com.intellij.psi.PsiFile;
+import net.seesharpsoft.intellij.plugins.csv.settings.CsvEditorSettings;
 import org.jetbrains.annotations.NotNull;
 
 public class CsvLexerFactory {
@@ -14,12 +15,13 @@ public static CsvLexerFactory getInstance() {
     }
 
     protected Lexer createLexer(@NotNull CsvValueSeparator separator, @NotNull CsvEscapeCharacter escapeCharacter) {
-        if (separator.isCustom()) {
+        if (separator.isCustom() || !CsvEditorSettings.getInstance().getCommentIndicator().isEmpty()) {
             return new CsvSharpLexer(new CsvSharpLexer.Configuration(
                     separator.getCharacter(),
                     "\n",
                     escapeCharacter.getCharacter(),
-                    "\""));
+                    "\"",
+                    CsvEditorSettings.getInstance().getCommentIndicator()));
         }
         return new CsvLexerAdapter(separator, escapeCharacter);
     }
diff --git a/src/main/java/net/seesharpsoft/intellij/plugins/csv/CsvSharpLexer.java b/src/main/java/net/seesharpsoft/intellij/plugins/csv/CsvSharpLexer.java
@@ -5,6 +5,7 @@
 import net.seesharpsoft.UnhandledSwitchCaseException;
 import net.seesharpsoft.commons.util.Tokenizer;
 import net.seesharpsoft.intellij.plugins.csv.psi.CsvTypes;
+import net.seesharpsoft.intellij.plugins.csv.settings.CsvEditorSettings;
 import org.jetbrains.annotations.NotNull;
 import org.jetbrains.annotations.Nullable;
 
@@ -18,6 +19,7 @@
 public class CsvSharpLexer extends LexerBase {
 
     private final Tokenizer<TokenType> tokenizer;
+    private final List<Tokenizer.Token<TokenType>> initialNextStateTokens;
     private final List<Tokenizer.Token<TokenType>> unquotedNextStateTokens;
     private final List<Tokenizer.Token<TokenType>> quotedNextStateTokens;
 
@@ -29,23 +31,34 @@ public class CsvSharpLexer extends LexerBase {
     private IElementType currentTokenType;
     private boolean failed;
 
+    private static final Map<TokenType, LexerState> INITIAL_NEXT_STATES = new HashMap<>();
     private static final Map<TokenType, LexerState> UNQUOTED_NEXT_STATES = new HashMap<>();
     private static final Map<TokenType, LexerState> QUOTED_NEXT_STATES = new HashMap<>();
 
     static {
+        INITIAL_NEXT_STATES.put(TokenType.WHITESPACE, LexerState.Initial);
+        INITIAL_NEXT_STATES.put(TokenType.TEXT, LexerState.Unquoted);
+        INITIAL_NEXT_STATES.put(TokenType.VALUE_SEPARATOR, LexerState.Unquoted);
+        INITIAL_NEXT_STATES.put(TokenType.BEGIN_QUOTE, LexerState.Quoted);
+        INITIAL_NEXT_STATES.put(TokenType.RECORD_SEPARATOR, LexerState.Initial);
+        INITIAL_NEXT_STATES.put(TokenType.COMMENT, LexerState.Initial);
+
         UNQUOTED_NEXT_STATES.put(TokenType.WHITESPACE, LexerState.Unquoted);
         UNQUOTED_NEXT_STATES.put(TokenType.TEXT, LexerState.Unquoted);
+        UNQUOTED_NEXT_STATES.put(TokenType.COMMENT_CHARACTER, LexerState.Unquoted);
         UNQUOTED_NEXT_STATES.put(TokenType.VALUE_SEPARATOR, LexerState.Unquoted);
-        UNQUOTED_NEXT_STATES.put(TokenType.RECORD_SEPARATOR, LexerState.Unquoted);
         UNQUOTED_NEXT_STATES.put(TokenType.BEGIN_QUOTE, LexerState.Quoted);
+        UNQUOTED_NEXT_STATES.put(TokenType.RECORD_SEPARATOR, LexerState.Initial);
 
         QUOTED_NEXT_STATES.put(TokenType.WHITESPACE, LexerState.Quoted);
         QUOTED_NEXT_STATES.put(TokenType.TEXT, LexerState.Quoted);
+        QUOTED_NEXT_STATES.put(TokenType.COMMENT_CHARACTER, LexerState.Quoted);
         QUOTED_NEXT_STATES.put(TokenType.ESCAPED_CHARACTER, LexerState.Quoted);
         QUOTED_NEXT_STATES.put(TokenType.END_QUOTE, LexerState.Unquoted);
     }
 
     enum LexerState {
+        Initial(INITIAL_NEXT_STATES),
         Unquoted(UNQUOTED_NEXT_STATES),
         Quoted(QUOTED_NEXT_STATES);
 
@@ -71,22 +84,26 @@ enum TokenType {
         ESCAPED_CHARACTER,
         VALUE_SEPARATOR,
         RECORD_SEPARATOR,
-        WHITESPACE
+        WHITESPACE,
+        COMMENT,
+        COMMENT_CHARACTER
     }
 
     public static class Configuration {
-        public static final Configuration DEFAULT = new Configuration(",", "\n", "\"", "\"");
+        public static final Configuration DEFAULT = new Configuration(",", "\n", "\"", "\"", "#");
 
         public String valueSeparator;
         public String recordSeparator;
         public String escapeCharacter;
         public String quoteCharacter;
+        public String commentCharacter;
 
-        public Configuration(String valueSeparator, String recordSeparator, String escapeCharacter, String quoteCharacter) {
+        public Configuration(String valueSeparator, String recordSeparator, String escapeCharacter, String quoteCharacter, String commentCharacter) {
             this.valueSeparator = Pattern.quote(valueSeparator);
             this.recordSeparator = Pattern.quote(recordSeparator);
             this.escapeCharacter = Pattern.quote(escapeCharacter);
             this.quoteCharacter = Pattern.quote(quoteCharacter);
+            this.commentCharacter = Pattern.quote(commentCharacter);
         }
     }
 
@@ -102,17 +119,32 @@ public CsvSharpLexer(Configuration configuration) {
         tokenizer.add(TokenType.BEGIN_QUOTE, String.format("%s", configuration.quoteCharacter));
         tokenizer.add(TokenType.VALUE_SEPARATOR, configuration.valueSeparator);
         tokenizer.add(TokenType.RECORD_SEPARATOR, configuration.recordSeparator);
+        if (!configuration.commentCharacter.isEmpty()) {
+            tokenizer.add(TokenType.COMMENT_CHARACTER, configuration.commentCharacter);
+            tokenizer.add(TokenType.COMMENT, configuration.commentCharacter + ".*(?=(\n|$))");
+        }
 
         if (configuration.escapeCharacter.equals(configuration.quoteCharacter)) {
             tokenizer.add(TokenType.END_QUOTE, String.format("%s(?!%s)", configuration.quoteCharacter, configuration.quoteCharacter));
             tokenizer.add(TokenType.ESCAPED_CHARACTER, String.format("(%s%s|%s|%s)+", configuration.quoteCharacter, configuration.quoteCharacter, configuration.valueSeparator, configuration.recordSeparator));
-            tokenizer.add(TokenType.TEXT, String.format("((?!%s)[^ \f%s%s])+", configuration.valueSeparator, configuration.quoteCharacter, configuration.recordSeparator));
+            if (!configuration.commentCharacter.isEmpty()) {
+                tokenizer.add(TokenType.TEXT, String.format("((?!(%s|%s))[^ \f%s%s])+", configuration.commentCharacter, configuration.valueSeparator, configuration.quoteCharacter, configuration.recordSeparator));
+            } else {
+                tokenizer.add(TokenType.TEXT, String.format("((?!%s)[^ \f%s%s])+", configuration.valueSeparator, configuration.quoteCharacter, configuration.recordSeparator));
+            }
         } else {
             tokenizer.add(TokenType.END_QUOTE, String.format("%s", configuration.quoteCharacter));
             tokenizer.add(TokenType.ESCAPED_CHARACTER, String.format("(%s%s|%s%s|%s|%s)+", configuration.escapeCharacter, configuration.quoteCharacter, configuration.escapeCharacter, configuration.escapeCharacter, configuration.valueSeparator, configuration.recordSeparator));
-            tokenizer.add(TokenType.TEXT, String.format("((?!%s)[^ \f%s%s%s])+", configuration.valueSeparator, configuration.escapeCharacter, configuration.quoteCharacter, configuration.recordSeparator));
+            if (!configuration.commentCharacter.isEmpty()) {
+                tokenizer.add(TokenType.TEXT, String.format("((?!(%s|%s))[^ \f%s%s%s])+", configuration.commentCharacter, configuration.valueSeparator, configuration.escapeCharacter, configuration.quoteCharacter, configuration.recordSeparator));
+            } else {
+                tokenizer.add(TokenType.TEXT, String.format("((?!%s)[^ \f%s%s%s])+", configuration.valueSeparator, configuration.escapeCharacter, configuration.quoteCharacter, configuration.recordSeparator));
+            }
         }
 
+        initialNextStateTokens = LexerState.Initial.getPossibleTokens().stream()
+                .map(tokenizer::getToken)
+                .collect(Collectors.toList());
         unquotedNextStateTokens = LexerState.Unquoted.getPossibleTokens().stream()
                 .map(tokenizer::getToken)
                 .collect(Collectors.toList());
@@ -126,14 +158,14 @@ public void start(@NotNull CharSequence buffer, int startOffset, int endOffset,
         this.buffer = buffer;
         this.tokenStart = this.tokenEnd = startOffset;
         this.bufferEnd = endOffset;
-        this.currentState = initialState == 0 ? LexerState.Unquoted : LexerState.Quoted;
+        this.currentState = LexerState.values()[initialState];
         this.currentTokenType = null;
     }
 
     @Override
     public int getState() {
         locateToken();
-        return currentState == LexerState.Unquoted ? 0 : 1;
+        return currentState.ordinal();
     }
 
     @Nullable
@@ -178,6 +210,19 @@ protected void raiseFailure() {
         tokenEnd = bufferEnd;
     }
 
+    protected Collection<Tokenizer.Token<TokenType>> getCurrentTokenCollection() {
+        switch(this.currentState) {
+            case Initial:
+                return initialNextStateTokens;
+            case Unquoted:
+                return unquotedNextStateTokens;
+            case Quoted:
+                return quotedNextStateTokens;
+            default:
+                throw new UnhandledSwitchCaseException(this.currentState);
+        }
+    }
+
     protected synchronized void locateToken() {
         if (currentTokenType != null) {
             return;
@@ -193,7 +238,7 @@ protected synchronized void locateToken() {
                     tokenizer.findToken(buffer,
                             tokenStart,
                             bufferEnd,
-                            currentState == LexerState.Unquoted ? unquotedNextStateTokens : quotedNextStateTokens,
+                            getCurrentTokenCollection(),
                             null
                     );
 
@@ -222,8 +267,12 @@ protected synchronized void locateToken() {
                     currentTokenType = CsvTypes.COMMA;
                     break;
                 case TEXT:
+                case COMMENT_CHARACTER:
                     currentTokenType = CsvTypes.TEXT;
                     break;
+                case COMMENT:
+                    currentTokenType = CsvTypes.COMMENT;
+                    break;
                 case WHITESPACE:
                     currentTokenType = com.intellij.psi.TokenType.WHITE_SPACE;
                     break;
diff --git a/src/main/java/net/seesharpsoft/intellij/plugins/csv/editor/table/CsvTableEditor.java b/src/main/java/net/seesharpsoft/intellij/plugins/csv/editor/table/CsvTableEditor.java
@@ -102,7 +102,7 @@ public void setEditable(boolean editable) {
     }
 
     public boolean isEditable() {
-        return this.tableIsEditable && !this.hasErrors();
+        return this.tableIsEditable && !this.hasErrors() && !hasComments();
     }
 
     public CsvColumnInfoMap<PsiElement> getColumnInfoMap() {
@@ -118,6 +118,14 @@ public boolean hasErrors() {
         return (columnInfoMap != null && columnInfoMap.hasErrors());
     }
 
+    public boolean hasComments() {
+        if (!isValid()) {
+            return false;
+        }
+        CsvColumnInfoMap columnInfoMap = getColumnInfoMap();
+        return (columnInfoMap != null && columnInfoMap.hasComments());
+    }
+
     protected Object[][] storeStateChange(Object[][] data) {
         Object[][] result = this.dataManagement.addState(data);
         saveChanges();
diff --git a/src/main/java/net/seesharpsoft/intellij/plugins/csv/highlighter/CsvSyntaxHighlighter.java b/src/main/java/net/seesharpsoft/intellij/plugins/csv/highlighter/CsvSyntaxHighlighter.java
@@ -24,6 +24,8 @@ public class CsvSyntaxHighlighter extends SyntaxHighlighterBase {
             createTextAttributesKey("CSV_DEFAULT_STRING", DefaultLanguageHighlighterColors.STRING);
     public static final TextAttributesKey ESCAPED_TEXT =
             createTextAttributesKey("CSV_ESCAPED_STRING", DefaultLanguageHighlighterColors.VALID_STRING_ESCAPE);
+    public static final TextAttributesKey COMMENT =
+            createTextAttributesKey("CSV_DEFAULT_COMMENT", DefaultLanguageHighlighterColors.LINE_COMMENT);
     public static final TextAttributesKey BAD_CHARACTER =
             createTextAttributesKey("CSV_BAD_CHARACTER", HighlighterColors.BAD_CHARACTER);
 
@@ -32,6 +34,7 @@ public class CsvSyntaxHighlighter extends SyntaxHighlighterBase {
     private static final TextAttributesKey[] QUOTE_KEYS = new TextAttributesKey[] {QUOTE};
     private static final TextAttributesKey[] TEXT_KEYS = new TextAttributesKey[] {TEXT};
     private static final TextAttributesKey[] ESCAPED_TEXT_KEYS = new TextAttributesKey[] {ESCAPED_TEXT};
+    private static final TextAttributesKey[] COMMENT_KEYS = new TextAttributesKey[] {COMMENT};
     private static final TextAttributesKey[] EMPTY_KEYS = new TextAttributesKey[0];
 
     private final Project myProject;
@@ -57,6 +60,8 @@ public TextAttributesKey[] getTokenHighlights(IElementType tokenType) {
             return QUOTE_KEYS;
         } else if (tokenType.equals(CsvTypes.TEXT)) {
             return TEXT_KEYS;
+        } else if (tokenType.equals(CsvTypes.COMMENT)) {
+            return COMMENT_KEYS;
         } else if (tokenType.equals(CsvTypes.ESCAPED_TEXT)) {
             return ESCAPED_TEXT_KEYS;
         } else if (tokenType.equals(TokenType.BAD_CHARACTER)) {
diff --git a/src/main/java/net/seesharpsoft/intellij/plugins/csv/psi/CsvFile.java b/src/main/java/net/seesharpsoft/intellij/plugins/csv/psi/CsvFile.java
@@ -27,6 +27,7 @@ public void propertyChange(PropertyChangeEvent evt) {
             switch (evt.getPropertyName()) {
                 case "defaultEscapeCharacter":
                 case "defaultValueSeparator":
+                case "commentIndicator":
                     FileContentUtilCore.reparseFiles(CsvFile.this.getVirtualFile());
                     break;
                 default:
diff --git a/src/main/java/net/seesharpsoft/intellij/plugins/csv/settings/CsvColorSettings.java b/src/main/java/net/seesharpsoft/intellij/plugins/csv/settings/CsvColorSettings.java
@@ -36,6 +36,7 @@ public class CsvColorSettings implements ColorSettingsPage {
         attributesDescriptors.add(new AttributesDescriptor("Quote", CsvSyntaxHighlighter.QUOTE));
         attributesDescriptors.add(new AttributesDescriptor("Text", CsvSyntaxHighlighter.TEXT));
         attributesDescriptors.add(new AttributesDescriptor("Escaped Text", CsvSyntaxHighlighter.ESCAPED_TEXT));
+        attributesDescriptors.add(new AttributesDescriptor("Comment", CsvSyntaxHighlighter.COMMENT));
 
         COLUMN_HIGHLIGHT_ATTRIBUTES = new ArrayList<>();
         for (int i = 0; i < MAX_COLUMN_HIGHLIGHT_COLORS; ++i) {
diff --git a/src/main/java/net/seesharpsoft/intellij/plugins/csv/settings/CsvEditorSettings.java b/src/main/java/net/seesharpsoft/intellij/plugins/csv/settings/CsvEditorSettings.java
@@ -32,6 +32,8 @@ public class CsvEditorSettings implements PersistentStateComponent<CsvEditorSett
     public static final CsvEscapeCharacter ESCAPE_CHARACTER_DEFAULT = CsvEscapeCharacter.QUOTE;
     public static final CsvValueSeparator VALUE_SEPARATOR_DEFAULT = CsvValueSeparator.COMMA;
 
+    public static final String COMMENT_INDICATOR_DEFAULT = "#";
+
     private static final CsvEditorSettings STATIC_INSTANCE = new CsvEditorSettings();
 
     public enum EditorPrio {
@@ -63,6 +65,7 @@ public static final class OptionSet {
         @OptionTag(converter = CsvValueSeparator.CsvValueSeparatorConverter.class)
         public CsvValueSeparator DEFAULT_VALUE_SEPARATOR = VALUE_SEPARATOR_DEFAULT;
         public boolean KEEP_TRAILING_SPACES = false;
+        public String COMMENT_INDICATOR = COMMENT_INDICATOR_DEFAULT;
 
         public OptionSet() {
             EditorSettingsExternalizable editorSettingsExternalizable = EditorSettingsExternalizable.getInstance();
@@ -271,4 +274,16 @@ public void setKeepTrailingSpaces(boolean keepTrailingSpaces) {
     public boolean getKeepTrailingSpaces() {
         return getState().KEEP_TRAILING_SPACES;
     }
+
+    public void setCommentIndicator(String commentIndicator) {
+        String oldValue = getCommentIndicator();
+        getState().COMMENT_INDICATOR = commentIndicator.trim();
+        if (commentIndicator != oldValue) {
+            myPropertyChangeSupport.firePropertyChange("commentIndicator", oldValue, getCommentIndicator());
+        }
+    }
+
+    public String getCommentIndicator() {
+        return getState().COMMENT_INDICATOR;
+    }
 }
diff --git a/src/main/java/net/seesharpsoft/intellij/plugins/csv/settings/CsvEditorSettingsProvider.form b/src/main/java/net/seesharpsoft/intellij/plugins/csv/settings/CsvEditorSettingsProvider.form
diff --git a/src/main/java/net/seesharpsoft/intellij/plugins/csv/settings/CsvEditorSettingsProvider.java b/src/main/java/net/seesharpsoft/intellij/plugins/csv/settings/CsvEditorSettingsProvider.java
diff --git a/src/test/java/net/seesharpsoft/intellij/plugins/csv/parser/CsvParsingTest.java b/src/test/java/net/seesharpsoft/intellij/plugins/csv/parser/CsvParsingTest.java
diff --git a/src/test/resources/parser/CsvWithComments.csv b/src/test/resources/parser/CsvWithComments.csv
diff --git a/src/test/resources/parser/CsvWithComments.txt b/src/test/resources/parser/CsvWithComments.txt

Original file line number	Diff line number	Diff line change
`@@ -19,12 +19,13 @@`
`19`	`19`	`COMMA='regexp:[,:;\|\t]'`
`20`	`20`	`QUOTE='regexp:"'`
`21`	`21`	`CRLF='regexp:\n'`
	`22`	`+ COMMENT='regexp:#.*(\n\|$)'`
`22`	`23`	`]`
`23`	`24`	`}`
`24`	`25`
`25`	`26`	`csvFile ::= record (CRLF record)* [CRLF]`
`26`	`27`
`27`		`-record ::= field (COMMA field)*`
	`28`	`+record ::= (COMMENT \| (field (COMMA field)*))`
`28`	`29`
`29`	`30`	`field ::= (escaped \| nonEscaped)`
`30`	`31`