Skip to content

Commit 1ad9936

Browse files
authored
SONARPY-625 Avoid throwing when highlighting symbols in nested f-string (#654)
1 parent f8b8262 commit 1ad9936

File tree

19 files changed

+373
-249
lines changed

19 files changed

+373
-249
lines changed

python-frontend/src/main/java/org/sonar/python/api/PythonGrammar.java

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
*/
2020
package org.sonar.python.api;
2121

22+
import com.sonar.sslr.api.GenericTokenType;
2223
import com.sonar.sslr.api.Grammar;
2324
import org.sonar.sslr.grammar.GrammarRuleKey;
2425
import org.sonar.sslr.grammar.LexerfulGrammarBuilder;
@@ -83,6 +84,7 @@ public enum PythonGrammar implements GrammarRuleKey {
8384

8485
NAMED_EXPR_TEST,
8586
FORMATTED_EXPR,
87+
F_STRING_CONTENT,
8688

8789
COMPARISON,
8890
COMP_OPERATOR,
@@ -199,7 +201,16 @@ public static void grammar(LexerfulGrammarBuilder b) {
199201

200202
b.rule(STAR_EXPR).is("*", EXPR);
201203
b.rule(EXPR).is(XOR_EXPR, b.zeroOrMore("|", XOR_EXPR));
202-
b.rule(FORMATTED_EXPR).is(b.sequence(EXPR, b.optional(PythonPunctuator.ASSIGN)));
204+
205+
// https://docs.python.org/3/reference/lexical_analysis.html#formatted-string-literals
206+
b.rule(F_STRING_CONTENT).is(b.zeroOrMore(b.firstOf(GenericTokenType.UNKNOWN_CHAR, FORMATTED_EXPR)));
207+
b.rule(FORMATTED_EXPR).is(
208+
PythonPunctuator.LCURLYBRACE,
209+
EXPR,
210+
b.optional(PythonPunctuator.ASSIGN),
211+
b.optional("!", b.firstOf("s", "r", "a")),
212+
b.optional(":", b.oneOrMore(b.firstOf(FORMATTED_EXPR, b.anyTokenButNot(PythonPunctuator.RCURLYBRACE)))),
213+
PythonPunctuator.RCURLYBRACE);
203214

204215
b.rule(FACTOR).is(b.firstOf(
205216
b.sequence(b.firstOf("+", "-", "~"), FACTOR),
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
/*
2+
* SonarQube Python Plugin
3+
* Copyright (C) 2011-2020 SonarSource SA
4+
* mailto:info AT sonarsource DOT com
5+
*
6+
* This program is free software; you can redistribute it and/or
7+
* modify it under the terms of the GNU Lesser General Public
8+
* License as published by the Free Software Foundation; either
9+
* version 3 of the License, or (at your option) any later version.
10+
*
11+
* This program is distributed in the hope that it will be useful,
12+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14+
* Lesser General Public License for more details.
15+
*
16+
* You should have received a copy of the GNU Lesser General Public License
17+
* along with this program; if not, write to the Free Software Foundation,
18+
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19+
*/
20+
package org.sonar.python.lexer;
21+
22+
import com.sonar.sslr.api.GenericTokenType;
23+
import com.sonar.sslr.api.Token;
24+
import com.sonar.sslr.impl.Lexer;
25+
import org.sonar.sslr.channel.Channel;
26+
import org.sonar.sslr.channel.CodeReader;
27+
28+
/**
29+
* A channel to handle the literal_char parts inside f-strings.
30+
* See https://docs.python.org/3/reference/lexical_analysis.html#f-strings
31+
*/
32+
public class FStringChannel extends Channel<Lexer> {
33+
34+
private static final char EOF = (char) -1;
35+
36+
private final LexerState lexerState;
37+
private final StringBuilder sb = new StringBuilder();
38+
39+
public FStringChannel(LexerState lexerState) {
40+
this.lexerState = lexerState;
41+
}
42+
43+
@Override
44+
public boolean consume(CodeReader code, Lexer output) {
45+
setInitialLineAndColumn(code);
46+
if (code.charAt(0) == '#') {
47+
// disable comments
48+
addUnknownCharToken("#", output, code.getLinePosition(), code.getColumnPosition());
49+
code.pop();
50+
return true;
51+
}
52+
if (lexerState.brackets == 0) {
53+
int line = code.getLinePosition();
54+
int column = code.getColumnPosition();
55+
while (code.charAt(0) != EOF) {
56+
char c = code.charAt(0);
57+
if (c != '{') {
58+
sb.append((char) code.pop());
59+
} else if (code.charAt(1) == '{') {
60+
sb.append((char) code.pop());
61+
sb.append((char) code.pop());
62+
} else {
63+
break;
64+
}
65+
}
66+
if (sb.length() != 0) {
67+
addUnknownCharToken(sb.toString(), output, line, column);
68+
sb.setLength(0);
69+
return true;
70+
}
71+
}
72+
return false;
73+
}
74+
75+
private static void addUnknownCharToken(String value, Lexer output, int line, int column) {
76+
output.addToken(Token.builder()
77+
.setType(GenericTokenType.UNKNOWN_CHAR)
78+
.setValueAndOriginalValue(value)
79+
.setURI(output.getURI())
80+
.setLine(line)
81+
.setColumn(column)
82+
.build());
83+
}
84+
85+
private void setInitialLineAndColumn(CodeReader code) {
86+
if (code.getLinePosition() == 1 && code.getColumnPosition() == 0) {
87+
code.setLinePosition(lexerState.initialLine);
88+
code.setColumnPosition(lexerState.initialColumn);
89+
}
90+
}
91+
}

python-frontend/src/main/java/org/sonar/python/lexer/LexerState.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ public class LexerState {
2828

2929
int brackets;
3030
boolean joined;
31+
int initialLine = 1;
32+
int initialColumn = 0;
3133

3234
public void reset() {
3335
indentationStack.clear();
@@ -37,4 +39,9 @@ public void reset() {
3739
joined = false;
3840
}
3941

42+
public void reset(int initialLine, int initialColumn) {
43+
reset();
44+
this.initialLine = initialLine;
45+
this.initialColumn = initialColumn;
46+
}
4047
}

python-frontend/src/main/java/org/sonar/python/lexer/PythonLexer.java

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
import com.sonar.sslr.impl.channel.IdentifierAndKeywordChannel;
2525
import com.sonar.sslr.impl.channel.PunctuatorChannel;
2626
import com.sonar.sslr.impl.channel.UnknownCharacterChannel;
27-
import java.nio.charset.Charset;
2827
import org.sonar.python.api.PythonKeyword;
2928
import org.sonar.python.api.PythonPunctuator;
3029
import org.sonar.python.api.PythonTokenType;
@@ -47,11 +46,21 @@ public final class PythonLexer {
4746
private PythonLexer() {
4847
}
4948

50-
public static Lexer create(Charset charset, LexerState lexerState) {
51-
return Lexer.builder()
52-
.withCharset(charset)
53-
.withFailIfNoChannelToConsumeOneCharacter(true)
49+
public static Lexer create(LexerState lexerState) {
50+
Lexer.Builder builder = Lexer.builder().withFailIfNoChannelToConsumeOneCharacter(true);
51+
addCommonChannels(builder, lexerState);
52+
return builder.build();
53+
}
54+
55+
public static Lexer fStringLexer(LexerState lexerState) {
56+
Lexer.Builder builder = Lexer.builder().withFailIfNoChannelToConsumeOneCharacter(true);
57+
builder.withChannel(new FStringChannel(lexerState));
58+
addCommonChannels(builder, lexerState);
59+
return builder.build();
60+
}
5461

62+
private static void addCommonChannels(Lexer.Builder builder, LexerState lexerState) {
63+
builder
5564
.withChannel(new NewLineChannel(lexerState))
5665

5766
.withChannel(new IndentationChannel(lexerState))
@@ -95,8 +104,6 @@ public static Lexer create(Charset charset, LexerState lexerState) {
95104
// http://docs.python.org/reference/lexical_analysis.html#delimiters
96105
.withChannel(new PunctuatorChannel(PythonPunctuator.values()))
97106

98-
.withChannel(new UnknownCharacterChannel())
99-
100-
.build();
107+
.withChannel(new UnknownCharacterChannel());
101108
}
102109
}
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
/*
2+
* SonarQube Python Plugin
3+
* Copyright (C) 2011-2020 SonarSource SA
4+
* mailto:info AT sonarsource DOT com
5+
*
6+
* This program is free software; you can redistribute it and/or
7+
* modify it under the terms of the GNU Lesser General Public
8+
* License as published by the Free Software Foundation; either
9+
* version 3 of the License, or (at your option) any later version.
10+
*
11+
* This program is distributed in the hope that it will be useful,
12+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14+
* Lesser General Public License for more details.
15+
*
16+
* You should have received a copy of the GNU Lesser General Public License
17+
* along with this program; if not, write to the Free Software Foundation,
18+
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19+
*/
20+
package org.sonar.python.parser;
21+
22+
import com.sonar.sslr.api.AstNode;
23+
import com.sonar.sslr.api.Grammar;
24+
import com.sonar.sslr.api.Token;
25+
import com.sonar.sslr.impl.Lexer;
26+
import com.sonar.sslr.impl.Parser;
27+
import java.util.List;
28+
import org.sonar.python.api.PythonGrammar;
29+
import org.sonar.python.lexer.LexerState;
30+
import org.sonar.python.lexer.PythonLexer;
31+
import org.sonar.python.tree.StringElementImpl;
32+
import org.sonar.python.tree.TokenImpl;
33+
34+
public class FStringParser {
35+
36+
private final LexerState lexerState;
37+
private final Lexer lexer;
38+
private final Parser<Grammar> internalParser = Parser.builder(PythonGrammar.create()).build();
39+
40+
public FStringParser() {
41+
this.lexerState = new LexerState();
42+
this.lexer = PythonLexer.fStringLexer(lexerState);
43+
this.internalParser.setRootRule(internalParser.getGrammar().rule(PythonGrammar.F_STRING_CONTENT));
44+
}
45+
46+
public List<AstNode> fStringExpressions(Token fStringToken) {
47+
StringElementImpl element = new StringElementImpl(new TokenImpl(fStringToken));
48+
String literalValue = element.trimmedQuotesValue();
49+
lexerState.reset(fStringToken.getLine(), fStringToken.getColumn() + element.contentStartIndex());
50+
lexer.lex(literalValue);
51+
List<Token> tokens = lexer.getTokens();
52+
AstNode astNode = internalParser.parse(tokens);
53+
return astNode.getChildren(PythonGrammar.FORMATTED_EXPR);
54+
}
55+
56+
}

python-frontend/src/main/java/org/sonar/python/parser/PythonParser.java

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@
2626
import com.sonar.sslr.impl.Lexer;
2727
import com.sonar.sslr.impl.Parser;
2828
import com.sonar.sslr.impl.matcher.RuleDefinition;
29-
import java.nio.charset.StandardCharsets;
3029
import java.util.ArrayList;
3130
import java.util.List;
3231
import org.sonar.python.api.PythonGrammar;
@@ -74,8 +73,7 @@ private SslrPythonParser() {
7473
super(PythonGrammar.create());
7574
super.setRootRule(super.getGrammar().getRootRule());
7675
this.lexerState = new LexerState();
77-
// We don't expose a method to parse anything else than a String, so we don't need to have a configurable charset
78-
this.lexer = PythonLexer.create(StandardCharsets.UTF_8, lexerState);
76+
this.lexer = PythonLexer.create(lexerState);
7977
}
8078

8179
@Override

python-frontend/src/main/java/org/sonar/python/tree/LineOffsetCounter.java

Lines changed: 0 additions & 89 deletions
This file was deleted.

0 commit comments

Comments
 (0)