Skip to content

Commit 3690cb2

Browse files
authored
SONARPY-2068: Tabulations (\t) should be handled correctly as an escape character (#2050)
* SONARPY-2068 add testcase for TokenEnricher * SONARPY-2068 store how many chars are skipped in the hashmap * SONARPY-2068 consider \t, \b, \f as two char characters * SONARPY-2068 refactoring/simplifiying * SONARPY-2068 update tests * SONARPY-2068 change Map<Integer, Integer> to list of ColumnMapping * SONARPY-2068 fix review
1 parent 40449b9 commit 3690cb2

File tree

15 files changed

+269
-109
lines changed

15 files changed

+269
-109
lines changed
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
/*
2+
* SonarQube Python Plugin
3+
* Copyright (C) 2011-2024 SonarSource SA
4+
* mailto:info AT sonarsource DOT com
5+
*
6+
* This program is free software; you can redistribute it and/or
7+
* modify it under the terms of the GNU Lesser General Public
8+
* License as published by the Free Software Foundation; either
9+
* version 3 of the License, or (at your option) any later version.
10+
*
11+
* This program is distributed in the hope that it will be useful,
12+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14+
* Lesser General Public License for more details.
15+
*
16+
* You should have received a copy of the GNU Lesser General Public License
17+
* along with this program; if not, write to the Free Software Foundation,
18+
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19+
*/
20+
package org.sonar.python;
21+
22+
public record EscapeCharPositionInfo(int columnInIpynbFile, int numberOfExtraChars) {
23+
}

python-frontend/src/main/java/org/sonar/python/IPythonLocation.java

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,14 @@
1919
*/
2020
package org.sonar.python;
2121

22-
import java.util.Map;
22+
import java.util.List;
2323

24-
public record IPythonLocation(int line, int column, Map<Integer, Integer> colOffset, boolean isCompresssed) {
25-
public IPythonLocation(int line, int column, Map<Integer, Integer> colOffset) {
26-
this(line, column, colOffset, false);
24+
public record IPythonLocation(int line, int column, List<EscapeCharPositionInfo> colOffsets, boolean isCompresssed) {
25+
public IPythonLocation(int line, int column, List<EscapeCharPositionInfo> colOffsets) {
26+
this(line, column, colOffsets, false);
27+
}
28+
29+
public IPythonLocation(int line, int column) {
30+
this(line, column, List.of(), false);
2731
}
2832
}

python-frontend/src/main/java/org/sonar/python/tree/TokenEnricher.java

Lines changed: 28 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import java.util.Map;
2525
import java.util.Set;
2626
import org.sonar.plugins.python.api.tree.Trivia;
27+
import org.sonar.python.EscapeCharPositionInfo;
2728
import org.sonar.python.IPythonLocation;
2829

2930
public class TokenEnricher {
@@ -43,47 +44,55 @@ public static TokenImpl enrichToken(Token token, Map<Integer, IPythonLocation> o
4344
if (location == null) {
4445
throw new IllegalStateException(String.format("No IPythonLocation found for line %s", token.getLine()));
4546
}
46-
Map<Integer, Integer> escapeCharsMap = location.colOffset();
47-
int startCol = computeColWithEscapes(token.getColumn(), escapeCharsMap, location.column());
48-
int escapedCharInToken = computeEscapeCharsInToken(token.getValue());
47+
List<EscapeCharPositionInfo> escapeCharPositionInfos = location.colOffsets();
48+
int startCol = token.getColumn();
49+
int endCol = token.getColumn() + token.getValue().length();
50+
int ipynbStartCol = computeColWithEscapes(location.column(), startCol, escapeCharPositionInfos);
51+
int escapedCharInToken = computeEscapeCharsInToken(escapeCharPositionInfos, startCol, endCol);
4952
List<Trivia> trivia = token.getTrivia().stream()
50-
.map(t -> computeTriviaLocation(t, location.line(), startCol, token.getLine(), offsetMap))
53+
.map(t -> computeTriviaLocation(t, location.line(), ipynbStartCol, token.getLine(), offsetMap))
5154
.toList();
5255

53-
return new TokenImpl(token, location.line(), startCol, escapedCharInToken, trivia, location.isCompresssed());
56+
return new TokenImpl(token, location.line(), ipynbStartCol, escapedCharInToken, trivia, location.isCompresssed());
5457
}
5558
return new TokenImpl(token);
5659
}
5760

58-
private static Trivia computeTriviaLocation(com.sonar.sslr.api.Trivia trivia, int parentLine, int parentCol, int parentPythonLine, Map<Integer, IPythonLocation> offsetMap) {
59-
int escapedCharInToken = computeEscapeCharsInToken(trivia.getToken().getValue());
61+
private static Trivia computeTriviaLocation(com.sonar.sslr.api.Trivia trivia, int parentLine, int parentCol, int parentPythonLine,
62+
Map<Integer, IPythonLocation> offsetMap) {
6063
var line = parentLine;
64+
int escapedCharInToken = computeEscapeCharsInTrivia(trivia, offsetMap);
6165
var col = parentCol - escapedCharInToken - trivia.getToken().getValue().length();
6266
var isCompressed = false;
6367
if (parentPythonLine != trivia.getToken().getLine()) {
6468
IPythonLocation location = offsetMap.get(trivia.getToken().getLine());
6569
line = location.line();
66-
Map<Integer, Integer> escapeCharsMap = location.colOffset();
67-
col = computeColWithEscapes(trivia.getToken().getColumn(), escapeCharsMap, location.column());
70+
List<EscapeCharPositionInfo> escapeCharPositionInfos = location.colOffsets();
71+
col = computeColWithEscapes(location.column(), trivia.getToken().getColumn(), escapeCharPositionInfos);
6872
isCompressed = location.isCompresssed();
6973
}
7074
return new TriviaImpl(new TokenImpl(trivia.getToken(), line, col,
7175
escapedCharInToken, List.of(), isCompressed));
7276
}
7377

74-
private static int computeEscapeCharsInToken(String tokenValue) {
75-
int escapedCharInToken = 0;
76-
for (int i = 0; i < tokenValue.length(); i++) {
77-
if (ESCAPED_CHARS.contains(tokenValue.charAt(i))) {
78-
escapedCharInToken++;
79-
}
80-
}
81-
return escapedCharInToken;
78+
private static int computeColWithEscapes(int offsetColumn, int currentCol, List<EscapeCharPositionInfo> escapeCharPositionInfos) {
79+
int escapedCharsOffset = computeEscapeCharsInToken(escapeCharPositionInfos, 0, currentCol);
80+
return offsetColumn + currentCol + escapedCharsOffset;
81+
}
8282

83+
private static int computeEscapeCharsInTrivia(com.sonar.sslr.api.Trivia trivia, Map<Integer, IPythonLocation> offsetMap) {
84+
IPythonLocation location = offsetMap.get(trivia.getToken().getLine());
85+
Token token = trivia.getToken();
86+
int startCol = token.getColumn();
87+
int endCol = token.getColumn() + token.getValue().length();
88+
return computeEscapeCharsInToken(location.colOffsets(), startCol, endCol);
8389
}
8490

85-
private static int computeColWithEscapes(int currentCol, Map<Integer, Integer> escapes, int offsetColumn) {
86-
return (int) escapes.keySet().stream().filter(k -> k > 0 && k < currentCol).count() + offsetColumn + currentCol;
91+
private static int computeEscapeCharsInToken(List<EscapeCharPositionInfo> escapeCharPositionInfos, int startCol, int endCol) {
92+
return escapeCharPositionInfos.stream()
93+
.filter(entry -> entry.columnInIpynbFile() >= startCol && entry.columnInIpynbFile() < endCol)
94+
.mapToInt(EscapeCharPositionInfo::numberOfExtraChars)
95+
.sum();
8796
}
8897

8998
}

python-frontend/src/test/java/org/sonar/python/FileLinesVisitorTest.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -76,10 +76,10 @@ void notebook_locs_single_line_file() {
7676
def foo():
7777
return 3
7878
""";
79-
var locations = Map.of(1, new IPythonLocation(1, 383, Map.of(-1, 0)),
80-
2, new IPythonLocation(1, 390, Map.of(-1, 0)),
81-
3, new IPythonLocation(1, 402, Map.of(-1, 0)),
82-
4, new IPythonLocation(1, 402, Map.of(-1, 0)));
79+
var locations = Map.of(1, new IPythonLocation(1, 383),
80+
2, new IPythonLocation(1, 390),
81+
3, new IPythonLocation(1, 402),
82+
4, new IPythonLocation(1, 402));
8383
TestPythonVisitorRunner.scanNotebookFile(new File(BASE_DIR, "notebook_locs_single_line.ipynb"), locations, content, visitor);
8484
assertThat(visitor.getExecutableLines()).isEmpty();
8585
assertThat(visitor.getLinesOfCode()).hasSize(3);

python-frontend/src/test/java/org/sonar/python/PythonTestUtils.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import java.nio.file.Files;
2424
import java.util.ArrayList;
2525
import java.util.List;
26+
import java.util.Map;
2627
import java.util.function.Predicate;
2728
import javax.annotation.CheckForNull;
2829
import org.mockito.Mockito;
@@ -196,4 +197,11 @@ public static Symbol lastSymbolFromDef(String... code) {
196197
}
197198
return ((FunctionDef) tree).name().symbol();
198199
}
200+
201+
public static List<EscapeCharPositionInfo> mapToColumnMappingList(Map<Integer, Integer> map) {
202+
return map.entrySet().stream()
203+
.sorted(Map.Entry.comparingByKey())
204+
.map(entry -> new EscapeCharPositionInfo(entry.getKey(), entry.getValue()))
205+
.toList();
206+
}
199207
}

python-frontend/src/test/java/org/sonar/python/tree/IPythonTreeMakerTest.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,12 +31,14 @@
3131
import org.sonar.plugins.python.api.tree.LineMagic;
3232
import org.sonar.plugins.python.api.tree.Statement;
3333
import org.sonar.plugins.python.api.tree.Tree;
34+
import org.sonar.python.EscapeCharPositionInfo;
3435
import org.sonar.python.IPythonLocation;
3536
import org.sonar.python.api.PythonGrammar;
3637
import org.sonar.python.parser.RuleTest;
3738

3839
import static org.assertj.core.api.Assertions.assertThat;
3940
import static org.assertj.core.api.Assertions.assertThatThrownBy;
41+
import static org.sonar.python.PythonTestUtils.mapToColumnMappingList;
4042

4143
class IPythonTreeMakerTest extends RuleTest {
4244

@@ -308,7 +310,8 @@ void assignmentRhs() {
308310

309311
@Test
310312
void enrichTokens() {
311-
var offsetMap = Map.of(1, new IPythonLocation(7, 10, Map.of(4, 15, 8, 20, -1, 2)));
313+
List<EscapeCharPositionInfo> colOffsets = mapToColumnMappingList(Map.of(4, 1, 8, 1));
314+
var offsetMap = Map.of(1, new IPythonLocation(7, 10, colOffsets));
312315
var statementList = parseIPython(
313316
"a = \"123\"", new IPythonTreeMaker(offsetMap)::fileInput).statements();
314317
assertThat(statementList).isNotNull();
@@ -319,7 +322,7 @@ void enrichTokens() {
319322
assertThat(stringLiteral.get(0).firstToken().line()).isEqualTo(7);
320323
assertThat(stringLiteral.get(0).firstToken().column()).isEqualTo(14);
321324

322-
offsetMap = Map.of(1, new IPythonLocation(7, 10, Map.of(-1, 0)), 2, new IPythonLocation(8, 10, Map.of(-1, 0)));
325+
offsetMap = Map.of(1, new IPythonLocation(7, 10), 2, new IPythonLocation(8, 10));
323326
statementList = parseIPython(
324327
"def foo(): # comment \n pass", new IPythonTreeMaker(offsetMap)::fileInput).statements();
325328
assertThat(statementList).isNotNull();

python-frontend/src/test/java/org/sonar/python/tree/TokenEnricherTest.java

Lines changed: 43 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121

2222
import com.sonar.sslr.api.Token;
2323
import com.sonar.sslr.impl.Lexer;
24-
import java.util.LinkedHashMap;
2524
import java.util.List;
2625
import java.util.Map;
2726
import org.junit.jupiter.api.BeforeAll;
@@ -32,6 +31,7 @@
3231

3332
import static org.assertj.core.api.Assertions.assertThat;
3433
import static org.junit.jupiter.api.Assertions.assertThrows;
34+
import static org.sonar.python.PythonTestUtils.mapToColumnMappingList;
3535

3636
class TokenEnricherTest {
3737
private static TestLexer lexer;
@@ -78,8 +78,8 @@ void shouldThrowIllegalStateException() {
7878
//when the mapping is not present for the current line
7979
var code = "a = 1\n\nb=3";
8080
var offsetMap = Map.of(
81-
1, new IPythonLocation(200, 23, Map.of()),
82-
2, new IPythonLocation(201, 23, Map.of()));
81+
1, new IPythonLocation(200, 23),
82+
2, new IPythonLocation(201, 23));
8383
var originalTokens = lexer.lex(code);
8484
Throwable throwable = assertThrows(IllegalStateException.class, () -> TokenEnricher.enrichTokens(originalTokens, offsetMap));
8585
assertThat(throwable.getMessage()).isEqualTo("No IPythonLocation found for line 3");
@@ -89,26 +89,29 @@ void shouldThrowIllegalStateException() {
8989
void shouldProvideOffsetForEscapeChar() {
9090
var code = "a = \"1\"";
9191
var expectedTokens = lexer.lex(code);
92-
var escapedChars = new LinkedHashMap<Integer, Integer>();
93-
escapedChars.put(4, 305);
94-
escapedChars.put(6, 308);
92+
var escapedChars = mapToColumnMappingList(Map.of(4, 1, 6, 1));
9593
var tokens = TokenEnricher.enrichTokens(expectedTokens, Map.of(1, new IPythonLocation(100, 300, escapedChars)));
9694
var stringToken = tokens.get(2);
9795
assertThat(stringToken.line()).isEqualTo(100);
9896
assertThat(stringToken.column()).isEqualTo(304);
9997
assertThat(stringToken.includedEscapeChars()).isEqualTo(2);
98+
99+
var eofToken = tokens.get(3);
100+
assertThat(eofToken.line()).isEqualTo(100);
101+
assertThat(eofToken.column()).isEqualTo(309);
100102
}
101103

102104
@Test
103105
void shouldComputeColCorrectly() {
104106
var code = "a = f\"{b} \\n test\" + \"1\"";
105107
var expectedTokens = lexer.lex(code);
106-
var escapedChars = new LinkedHashMap<Integer, Integer>();
107-
escapedChars.put(5, 305);
108-
escapedChars.put(10, 311);
109-
escapedChars.put(17, 319);
110-
escapedChars.put(21, 324);
111-
escapedChars.put(23, 327);
108+
var escapedChars = mapToColumnMappingList(Map.ofEntries(
109+
Map.entry(5, 1),
110+
Map.entry(10, 1),
111+
Map.entry(17, 1),
112+
Map.entry(21, 1),
113+
Map.entry(23, 1)
114+
));
112115
var tokens = TokenEnricher.enrichTokens(expectedTokens, Map.of(1, new IPythonLocation(100, 300, escapedChars)));
113116
var stringToken = tokens.get(tokens.size() - 2);
114117
assertThat(stringToken.line()).isEqualTo(100);
@@ -121,11 +124,33 @@ void shouldComputeColCorrectly() {
121124
assertThat(eofToken.includedEscapeChars()).isZero();
122125
}
123126

127+
@Test
128+
void shouldComputeTabColCorrectly() {
129+
var code = "\ta";
130+
var expectedTokens = lexer.lex(code);
131+
var escapedChars = mapToColumnMappingList(Map.of(0, 1));
132+
var tokens = TokenEnricher.enrichTokens(expectedTokens, Map.of(1, new IPythonLocation(100, 300, escapedChars)));
133+
var tabToken = tokens.get(0);
134+
assertThat(tabToken.line()).isEqualTo(100);
135+
assertThat(tabToken.column()).isEqualTo(300);
136+
assertThat(tabToken.includedEscapeChars()).isEqualTo(1);
137+
138+
var idToken = tokens.get(1);
139+
assertThat(idToken.line()).isEqualTo(100);
140+
assertThat(idToken.column()).isEqualTo(302);
141+
assertThat(idToken.includedEscapeChars()).isZero();
142+
143+
var eofToken = tokens.get(2);
144+
assertThat(eofToken.line()).isEqualTo(100);
145+
assertThat(eofToken.column()).isEqualTo(303);
146+
assertThat(eofToken.includedEscapeChars()).isZero();
147+
}
148+
124149
@Test
125150
void shouldComputeColCorrectlyForTrivia() {
126151
var code = "a = 3 # comment";
127152
var expectedTokens = lexer.lex(code);
128-
var tokens = TokenEnricher.enrichTokens(expectedTokens, Map.of(1, new IPythonLocation(100, 300, Map.of(-1, 0))));
153+
var tokens = TokenEnricher.enrichTokens(expectedTokens, Map.of(1, new IPythonLocation(100, 300)));
129154
var trivias = tokens.get(tokens.size() - 1).trivia();
130155
assertThat(trivias).hasSize(1);
131156
assertThat(trivias.get(0).token().line()).isEqualTo(100);
@@ -137,7 +162,8 @@ void shouldComputeColCorrectlyForTrivia() {
137162
void shouldComputeColCorrectlyForTriviaWithEscapeChar() {
138163
var code = "a = 3 # test\\n";
139164
var expectedTokens = lexer.lex(code);
140-
var tokens = TokenEnricher.enrichTokens(expectedTokens, Map.of(1, new IPythonLocation(100, 300, Map.of(-1, 1, 12, 13))));
165+
var escapedChars = mapToColumnMappingList(Map.of(12, 1));
166+
var tokens = TokenEnricher.enrichTokens(expectedTokens, Map.of(1, new IPythonLocation(100, 300, escapedChars)));
141167
var trivias = tokens.get(tokens.size() - 1).trivia();
142168
assertThat(trivias).hasSize(1);
143169
assertThat(trivias.get(0).token().line()).isEqualTo(100);
@@ -149,7 +175,8 @@ void shouldComputeColCorrectlyForTriviaWithEscapeChar() {
149175
void shouldComputeColCorrectlyForTriviaOnDifferentLine() {
150176
var code = "# comment\na = 3";
151177
var expectedTokens = lexer.lex(code);
152-
var tokens = TokenEnricher.enrichTokens(expectedTokens, Map.of(1, new IPythonLocation(100, 300, Map.of(-1, 0)), 2, new IPythonLocation(101, 300, Map.of(-1, 0))));
178+
var tokens = TokenEnricher.enrichTokens(expectedTokens, Map.of(1, new IPythonLocation(100, 300), 2,
179+
new IPythonLocation(101, 300)));
153180
assertThat(tokens.get(0).line()).isEqualTo(101);
154181
var trivias = tokens.get(0).trivia();
155182
assertThat(trivias).hasSize(1);
@@ -162,10 +189,7 @@ void shouldComputeColCorrectlyForTriviaOnDifferentLine() {
162189
void shouldComputeCorrectlyForSingleQuote() {
163190
var code = "a = '1'";
164191
var expectedTokens = lexer.lex(code);
165-
var escapedChars = new LinkedHashMap<Integer, Integer>();
166-
escapedChars.put(4, 305);
167-
escapedChars.put(6, 308);
168-
var tokens = TokenEnricher.enrichTokens(expectedTokens, Map.of(1, new IPythonLocation(100, 300, escapedChars)));
192+
var tokens = TokenEnricher.enrichTokens(expectedTokens, Map.of(1, new IPythonLocation(100, 300)));
169193
var stringToken = tokens.get(2);
170194
assertThat(stringToken.line()).isEqualTo(100);
171195
assertThat(stringToken.column()).isEqualTo(304);

0 commit comments

Comments
 (0)