diff --git a/CHANGELOG.md b/CHANGELOG.md index ac00ec7c3..2e18f4ad0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,11 +9,21 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- Support for multiline string literals. - Support for numeric literals prefixed by ampersands. - Support for identifiers prefixed by more than 2 ampersands. +- **API:** `TextLiteralNode::isMultiline` method. +- **API:** `TextLiteralNode::getValue` method, which returns the effective contents of a text + literal. + +### Changed + +- `TextLiteralNode::getImage` now returns the text literal exactly as it appears in source code. +- `TextLiteralNode::getImageWithoutQuotes` now simply calls the new `getValue` method. ### Deprecated +- `TextLiteralNode::getImageWithoutQuotes`, use `getValue` instead. - `DelphiTokenType.AMPERSAND`, as `&` is now lexed directly into numeric literals and identifiers. ### Fixed diff --git a/delphi-checks/src/main/java/au/com/integradev/delphi/checks/AbstractFormatArgumentCheck.java b/delphi-checks/src/main/java/au/com/integradev/delphi/checks/AbstractFormatArgumentCheck.java index 31308c022..93f675b99 100644 --- a/delphi-checks/src/main/java/au/com/integradev/delphi/checks/AbstractFormatArgumentCheck.java +++ b/delphi-checks/src/main/java/au/com/integradev/delphi/checks/AbstractFormatArgumentCheck.java @@ -73,7 +73,7 @@ private void checkViolation(NameReferenceNode nameReference, DelphiCheckContext return; } - String rawFormatString = textLiteral.get().getImageWithoutQuotes().toString(); + String rawFormatString = textLiteral.get().getValue(); FormatStringParser parser = new FormatStringParser(rawFormatString); try { checkFormatStringViolation(parser.parse(), arrayConstructor.get(), context); diff --git a/delphi-checks/src/main/java/au/com/integradev/delphi/checks/StringLiteralRegularExpressionCheck.java b/delphi-checks/src/main/java/au/com/integradev/delphi/checks/StringLiteralRegularExpressionCheck.java index c68430bf5..6a7b1e923 100644 --- a/delphi-checks/src/main/java/au/com/integradev/delphi/checks/StringLiteralRegularExpressionCheck.java +++ b/delphi-checks/src/main/java/au/com/integradev/delphi/checks/StringLiteralRegularExpressionCheck.java @@ -59,7 +59,7 @@ public void start(DelphiCheckContext context) { @Override public DelphiCheckContext visit(TextLiteralNode string, DelphiCheckContext context) { - if (pattern != null && pattern.matcher(string.getImageWithoutQuotes()).matches()) { + if (pattern != null && pattern.matcher(string.getValue()).matches()) { reportIssue(context, string, message); } return super.visit(string, context); diff --git a/delphi-frontend/src/main/antlr3/au/com/integradev/delphi/antlr/Delphi.g b/delphi-frontend/src/main/antlr3/au/com/integradev/delphi/antlr/Delphi.g index fb10cb6b0..0fa382e03 100644 --- a/delphi-frontend/src/main/antlr3/au/com/integradev/delphi/antlr/Delphi.g +++ b/delphi-frontend/src/main/antlr3/au/com/integradev/delphi/antlr/Delphi.g @@ -55,6 +55,7 @@ tokens { TkPrimaryExpression; TkNestedExpression; TkTextLiteral; + TkMultilineString; TkNameDeclaration; TkNameReference; TkUnitImport; @@ -149,6 +150,43 @@ package au.com.integradev.delphi.antlr; super(message, cause); } } + + private int lookaheadMultilineString() { + int startQuotes = lookaheadSingleQuotes(1); + if (startQuotes >= 3 && (startQuotes & 1) != 0 && isNewLine(input.LA(startQuotes + 1))) { + int i = startQuotes; + while (true) { + switch (input.LA(++i)) { + case '\'': + int quotes = Math.min(startQuotes, lookaheadSingleQuotes(i)); + i += quotes; + if (quotes == startQuotes) { + return i; + } + break; + + case EOF: + return 0; + + default: + // do nothing + } + } + } + return 0; + } + + private int lookaheadSingleQuotes(int i) { + int result = 0; + while (input.LA(i++) == '\'') { + ++result; + } + return result; + } + + private static boolean isNewLine(int c) { + return c == '\r' || c == '\n'; + } } @parser::members { @@ -719,11 +757,14 @@ expressionOrRangeList : (expressionOrRange (','!)?)+ ; exprOrRangeOrAnonMethodList : (exprOrRangeOrAnonMethod (','!)?)+ ; -textLiteral : textLiteral_ -> ^(TkTextLiteral textLiteral_) +textLiteral : singleLineTextLiteral -> ^(TkTextLiteral singleLineTextLiteral) + | multilineTextLiteral -> ^(TkTextLiteral multilineTextLiteral) ; -textLiteral_ : TkQuotedString (escapedCharacter+ TkQuotedString)* escapedCharacter* +singleLineTextLiteral : TkQuotedString (escapedCharacter+ TkQuotedString)* escapedCharacter* | escapedCharacter+ (TkQuotedString escapedCharacter+)* TkQuotedString? ; +multilineTextLiteral : TkMultilineString + ; escapedCharacter : TkCharacterEscapeCode | '^' (TkIdentifier | TkIntNumber | TkAnyChar) -> ^({changeTokenType(TkEscapedCharacter)}) ; @@ -1179,7 +1220,16 @@ TkAsmId : { asmMode }? => '@' '@'? (Alpha | '_' | Digit)+ ; TkAsmHexNum : { asmMode }? => HexDigitSeq ('h'|'H') ; -TkQuotedString : '\'' ('\'\'' | ~('\''))* '\'' +TkQuotedString @init { int multilineStringRemaining = lookaheadMultilineString(); } + : '\'' + ({ multilineStringRemaining != 0 }? => { + int i = multilineStringRemaining - 1; + while (--i > 0) { + matchAny(); + } + $type = TkMultilineString; + })? + ({ multilineStringRemaining == 0 }? => ('\'\'' | ~('\''))* '\'')? ; TkAsmDoubleQuotedString : { asmMode }? => '"' (~('\"'))* '"' ; diff --git a/delphi-frontend/src/main/java/au/com/integradev/delphi/antlr/ast/node/TextLiteralNodeImpl.java b/delphi-frontend/src/main/java/au/com/integradev/delphi/antlr/ast/node/TextLiteralNodeImpl.java index 5b2d66c0e..1ff4f4cd1 100644 --- a/delphi-frontend/src/main/java/au/com/integradev/delphi/antlr/ast/node/TextLiteralNodeImpl.java +++ b/delphi-frontend/src/main/java/au/com/integradev/delphi/antlr/ast/node/TextLiteralNodeImpl.java @@ -19,14 +19,20 @@ package au.com.integradev.delphi.antlr.ast.node; import au.com.integradev.delphi.antlr.ast.visitors.DelphiParserVisitor; +import java.util.ArrayDeque; +import java.util.Deque; +import java.util.stream.Collectors; import org.antlr.runtime.Token; +import org.apache.commons.lang3.StringUtils; import org.sonar.plugins.communitydelphi.api.ast.DelphiNode; import org.sonar.plugins.communitydelphi.api.ast.TextLiteralNode; +import org.sonar.plugins.communitydelphi.api.token.DelphiTokenType; import org.sonar.plugins.communitydelphi.api.type.IntrinsicType; import org.sonar.plugins.communitydelphi.api.type.Type; public final class TextLiteralNodeImpl extends DelphiNodeImpl implements TextLiteralNode { private String image; + private String value; public TextLiteralNodeImpl(Token token) { super(token); @@ -44,7 +50,7 @@ public T accept(DelphiParserVisitor visitor, T data) { @Override public Type getType() { IntrinsicType intrinsic = - (getImageWithoutQuotes().length() == 1) ? IntrinsicType.CHAR : IntrinsicType.STRING; + (getValue().length() == 1) ? IntrinsicType.CHAR : IntrinsicType.STRING; return getTypeFactory().getIntrinsic(intrinsic); } @@ -52,42 +58,122 @@ public Type getType() { @Override public String getImage() { if (image == null) { - StringBuilder imageBuilder = new StringBuilder("'"); - for (DelphiNode child : getChildren()) { - switch (child.getTokenType()) { - case QUOTED_STRING: - String withoutQuotes = getStringWithoutQuotes(child.getImage()).toString(); - String stringImage = withoutQuotes.replace("''", "'"); - imageBuilder.append(stringImage); - break; - - case CHARACTER_ESCAPE_CODE: - String escapedChar = child.getImage(); - boolean isHex = escapedChar.startsWith("#$"); - escapedChar = escapedChar.substring(isHex ? 2 : 1); - imageBuilder.append((char) Integer.parseInt(escapedChar, isHex ? 16 : 10)); - break; - - case ESCAPED_CHARACTER: - imageBuilder.append(child.getImage()); - break; - - default: - // Do nothing - } - } - imageBuilder.append("'"); - image = imageBuilder.toString(); + image = + getChildren().stream() + .map( + child -> { + String result = child.getImage(); + if (child.getTokenType() == DelphiTokenType.ESCAPED_CHARACTER) { + result = '^' + result; + } + return result; + }) + .collect(Collectors.joining()); } return image; } + @Override + public String getValue() { + if (value == null) { + value = createValue(); + } + return value; + } + + @SuppressWarnings("removal") @Override public CharSequence getImageWithoutQuotes() { - return getStringWithoutQuotes(getImage()); + return getValue(); + } + + private String createValue() { + if (isMultiline()) { + return createMultilineValue(); + } else { + return createSingleLineValue(); + } + } + + private String createMultilineValue() { + Deque lines = + getChild(0).getImage().lines().collect(Collectors.toCollection(ArrayDeque::new)); + + lines.removeFirst(); + + String last = lines.removeLast(); + String indentation = readLeadingWhitespace(last); + + return lines.stream() + .map(line -> StringUtils.removeStart(line, indentation)) + .collect(Collectors.joining("\n")); + } + + private static String readLeadingWhitespace(String input) { + StringBuilder result = new StringBuilder(); + for (int i = 0; i < input.length(); ++i) { + char c = input.charAt(i); + if (c <= 0x20 || c == 0x3000) { + result.append(c); + } else { + break; + } + } + return result.toString(); } - private static CharSequence getStringWithoutQuotes(String string) { - return string.subSequence(1, string.length() - 1); + private String createSingleLineValue() { + StringBuilder imageBuilder = new StringBuilder(); + + for (DelphiNode child : getChildren()) { + switch (child.getTokenType()) { + case QUOTED_STRING: + String stringImage = child.getImage(); + stringImage = stringImage.substring(1, stringImage.length() - 1); + stringImage = stringImage.replace("''", "'"); + imageBuilder.append(stringImage); + break; + + case CHARACTER_ESCAPE_CODE: + imageBuilder.append(characterEscapeToChar(child.getImage())); + break; + + case ESCAPED_CHARACTER: + imageBuilder.append((char) ((child.getImage().charAt(0) + 64) % 128)); + break; + + default: + // Do nothing + } + } + + return imageBuilder.toString(); + } + + private static char characterEscapeToChar(String image) { + image = image.substring(1); + int radix = 10; + + switch (image.charAt(0)) { + case '$': + radix = 16; + image = image.substring(1); + break; + case '%': + radix = 2; + image = image.substring(1); + break; + default: + // do nothing + } + + image = StringUtils.remove(image, '_'); + + return (char) Integer.parseInt(image, radix); + } + + @Override + public boolean isMultiline() { + return getChild(0).getTokenType() == DelphiTokenType.MULTILINE_STRING; } } diff --git a/delphi-frontend/src/main/java/org/sonar/plugins/communitydelphi/api/ast/TextLiteralNode.java b/delphi-frontend/src/main/java/org/sonar/plugins/communitydelphi/api/ast/TextLiteralNode.java index 6dd434a6b..af3cb7ae9 100644 --- a/delphi-frontend/src/main/java/org/sonar/plugins/communitydelphi/api/ast/TextLiteralNode.java +++ b/delphi-frontend/src/main/java/org/sonar/plugins/communitydelphi/api/ast/TextLiteralNode.java @@ -21,5 +21,26 @@ import org.sonar.plugins.communitydelphi.api.type.Typed; public interface TextLiteralNode extends DelphiNode, Typed { + /** + * Returns the evaluated value of the text literal. + * + * @return evaluated value of the text literal + * @deprecated Use {@link TextLiteralNode#getValue} instead. + */ + @Deprecated(forRemoval = true) CharSequence getImageWithoutQuotes(); + + /** + * Returns the evaluated value of the text literal. + * + * @return evaluated value of the text literal + */ + String getValue(); + + /** + * Returns whether this is a multiline text literal. + * + * @return true if this is a multiline text literal + */ + boolean isMultiline(); } diff --git a/delphi-frontend/src/test/java/au/com/integradev/delphi/antlr/GrammarTest.java b/delphi-frontend/src/test/java/au/com/integradev/delphi/antlr/GrammarTest.java index 0af7c7317..45059e4a9 100644 --- a/delphi-frontend/src/test/java/au/com/integradev/delphi/antlr/GrammarTest.java +++ b/delphi-frontend/src/test/java/au/com/integradev/delphi/antlr/GrammarTest.java @@ -65,6 +65,21 @@ private void assertParsed(String fileName) { } } + @Test + void testMultilineStrings() { + assertParsed("MultilineStrings.pas"); + } + + @Test + void testMultilineLookalikeStrings() { + assertParsed("MultilineLookalikeStrings.pas"); + } + + @Test + void testMultilineInvalidButAcceptedStrings() { + assertParsed("MultilineInvalidButAcceptedStrings.pas"); + } + @Test void testEmptyBeginStatement() { assertParsed("EmptyProcs.pas"); diff --git a/delphi-frontend/src/test/java/au/com/integradev/delphi/antlr/ast/node/TextLiteralNodeImplTest.java b/delphi-frontend/src/test/java/au/com/integradev/delphi/antlr/ast/node/TextLiteralNodeImplTest.java new file mode 100644 index 000000000..e2a3ed0a8 --- /dev/null +++ b/delphi-frontend/src/test/java/au/com/integradev/delphi/antlr/ast/node/TextLiteralNodeImplTest.java @@ -0,0 +1,85 @@ +/* + * Sonar Delphi Plugin + * Copyright (C) 2024 Integrated Application Development + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02 + */ +package au.com.integradev.delphi.antlr.ast.node; + +import static org.assertj.core.api.Assertions.assertThat; + +import au.com.integradev.delphi.antlr.DelphiLexer; +import org.antlr.runtime.CommonToken; +import org.junit.jupiter.api.Test; +import org.sonar.plugins.communitydelphi.api.ast.DelphiNode; + +class TextLiteralNodeImplTest { + @Test + void testMultilineImage() { + String image = + "'''\n" // + + " Foo\n" + + " Bar\n" + + " Baz\n" + + " '''"; + + TextLiteralNodeImpl node = new TextLiteralNodeImpl(DelphiLexer.TkTextLiteral); + node.addChild(createNode(DelphiLexer.TkMultilineString, image)); + + assertThat(node.getImage()).isEqualTo(image); + assertThat(node.getValue()).isEqualTo(node.getImageWithoutQuotes()).isEqualTo("Foo\nBar\nBaz"); + assertThat(node.isMultiline()).isTrue(); + } + + @Test + void testGetImageWithCharacterEscapes() { + TextLiteralNodeImpl node = new TextLiteralNodeImpl(DelphiLexer.TkTextLiteral); + node.addChild(createNode(DelphiLexer.TkQuotedString, "'F'")); + node.addChild(createNode(DelphiLexer.TkCharacterEscapeCode, "#111")); + node.addChild(createNode(DelphiLexer.TkCharacterEscapeCode, "#111")); + node.addChild(createNode(DelphiLexer.TkQuotedString, "'B'")); + node.addChild(createNode(DelphiLexer.TkCharacterEscapeCode, "#$61")); + node.addChild(createNode(DelphiLexer.TkCharacterEscapeCode, "#$72")); + node.addChild(createNode(DelphiLexer.TkQuotedString, "'B'")); + node.addChild(createNode(DelphiLexer.TkCharacterEscapeCode, "#%01100001")); + node.addChild(createNode(DelphiLexer.TkCharacterEscapeCode, "#%01111010")); + + assertThat(node.getImage()).isEqualTo("'F'#111#111'B'#$61#$72'B'#%01100001#%01111010"); + assertThat(node.getValue()).isEqualTo(node.getImageWithoutQuotes()).isEqualTo("FooBarBaz"); + assertThat(node.isMultiline()).isFalse(); + } + + @Test + void testGetImageWithCaretNotation() { + TextLiteralNodeImpl node = new TextLiteralNodeImpl(DelphiLexer.TkTextLiteral); + node.addChild(createNode(DelphiLexer.TkQuotedString, "'F'")); + node.addChild(createNode(DelphiLexer.TkEscapedCharacter, "/")); + node.addChild(createNode(DelphiLexer.TkEscapedCharacter, "/")); + node.addChild(createNode(DelphiLexer.TkQuotedString, "'B'")); + node.addChild(createNode(DelphiLexer.TkEscapedCharacter, "!")); + node.addChild(createNode(DelphiLexer.TkEscapedCharacter, "2")); + node.addChild(createNode(DelphiLexer.TkQuotedString, "'B'")); + node.addChild(createNode(DelphiLexer.TkEscapedCharacter, "!")); + node.addChild(createNode(DelphiLexer.TkEscapedCharacter, ":")); + + assertThat(node.getImage()).isEqualTo("'F'^/^/'B'^!^2'B'^!^:"); + assertThat(node.getValue()).isEqualTo(node.getImageWithoutQuotes()).isEqualTo("FooBarBaz"); + assertThat(node.isMultiline()).isFalse(); + } + + private static DelphiNode createNode(int tokenType, String image) { + return new CommonDelphiNodeImpl(new CommonToken(tokenType, image)); + } +} diff --git a/delphi-frontend/src/test/resources/au/com/integradev/delphi/grammar/MultilineInvalidButAcceptedStrings.pas b/delphi-frontend/src/test/resources/au/com/integradev/delphi/grammar/MultilineInvalidButAcceptedStrings.pas new file mode 100644 index 000000000..3a1178bc6 --- /dev/null +++ b/delphi-frontend/src/test/resources/au/com/integradev/delphi/grammar/MultilineInvalidButAcceptedStrings.pas @@ -0,0 +1,19 @@ +unit MultilineInvalidButAcceptedStrings; + +interface + +const + Foo = ''' + '''; + + Bar = ''' + bar + '''; + + Bar = ''''' + baz + flarp'''''; + +implementation + +end. \ No newline at end of file diff --git a/delphi-frontend/src/test/resources/au/com/integradev/delphi/grammar/MultilineLookalikeStrings.pas b/delphi-frontend/src/test/resources/au/com/integradev/delphi/grammar/MultilineLookalikeStrings.pas new file mode 100644 index 000000000..71cb8a6d8 --- /dev/null +++ b/delphi-frontend/src/test/resources/au/com/integradev/delphi/grammar/MultilineLookalikeStrings.pas @@ -0,0 +1,14 @@ +unit MultilineLookalikeStrings; + +interface + +const + Foo = ''' Hello this is a foo text '''; + Bar = ''''' Hello this is a ''bar'' text '''''; + Baz = ''''''''' Hello this is a''''baz''''text '''''''''; + Flarp = ''''; + Boop = ''' Hello this is a boop text '; + +implementation + +end. \ No newline at end of file diff --git a/delphi-frontend/src/test/resources/au/com/integradev/delphi/grammar/MultilineStrings.pas b/delphi-frontend/src/test/resources/au/com/integradev/delphi/grammar/MultilineStrings.pas new file mode 100644 index 000000000..3b6f48e81 --- /dev/null +++ b/delphi-frontend/src/test/resources/au/com/integradev/delphi/grammar/MultilineStrings.pas @@ -0,0 +1,23 @@ +unit MultilineStrings; + +interface + +const + Foo = ''' + Hello this is a + foo text +'''; + Bar = ''''' + Hello this is a + 'bar' + text +'''''; + Baz = ''''''''' + Hello this is a + '''baz''' + text +'''''''''; + +implementation + +end. \ No newline at end of file