Skip to content

Commit 7794c9a

Browse files
committed
SONARPY-2004 Track escaped characters correctly in notebook parser (#1869)
1 parent 932a86e commit 7794c9a

File tree

3 files changed

+65
-9
lines changed

3 files changed

+65
-9
lines changed

sonar-python-plugin/src/main/java/org/sonar/plugins/python/IpynbNotebookParser.java

Lines changed: 45 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import com.fasterxml.jackson.core.JsonToken;
2626
import java.io.IOException;
2727
import java.util.HashMap;
28+
import java.util.LinkedHashMap;
2829
import java.util.Map;
2930

3031
public class IpynbNotebookParser {
@@ -91,7 +92,9 @@ private boolean parseSourceArray(JsonParser jParser, JsonToken jsonToken) throws
9192
}
9293
while (jParser.nextToken() != JsonToken.END_ARRAY) {
9394
String sourceLine = jParser.getValueAsString();
94-
addLineToSource(sourceLine, jParser.currentTokenLocation());
95+
var tokenLocation = jParser.currentTokenLocation();
96+
var countEscapedChar = countEscapeCharacters(sourceLine, new LinkedHashMap<>(), tokenLocation.getColumnNr());
97+
addLineToSource(sourceLine, tokenLocation, countEscapedChar);
9598
}
9699
// Account for the last cell delimiter
97100
addDelimiterToSource();
@@ -104,19 +107,29 @@ private boolean parseSourceMultilineString(JsonParser jParser, JsonToken jsonTok
104107
}
105108
String sourceLine = jParser.getValueAsString();
106109
JsonLocation tokenLocation = jParser.currentTokenLocation();
110+
var previousLen = 0;
111+
var previousExtraChars = 0;
107112

108113
for (String line : sourceLine.lines().toList()) {
109-
aggregatedSource.append(line);
110-
addLineToSource("\n", tokenLocation);
114+
var countEscapedChar = countEscapeCharacters(line, new LinkedHashMap<>(), tokenLocation.getColumnNr());
115+
var currentCount = countEscapedChar.get(-1);
116+
addLineToSource(line, new IPythonLocation(tokenLocation.getLineNr(),
117+
tokenLocation.getColumnNr() + previousLen + previousExtraChars + 1, countEscapedChar));
118+
previousLen = line.length() + 2;
119+
previousExtraChars = currentCount;
111120
}
112121
// Account for the last cell delimiter
113122
addDelimiterToSource();
114123
return true;
115124
}
116125

117-
private void addLineToSource(String sourceLine, JsonLocation tokenLocation) {
126+
private void addLineToSource(String sourceLine, JsonLocation tokenLocation, Map<Integer, Integer> colOffset) {
127+
addLineToSource(sourceLine, new IPythonLocation(tokenLocation.getLineNr(), tokenLocation.getColumnNr(), colOffset));
128+
}
129+
130+
private void addLineToSource(String sourceLine, IPythonLocation location) {
118131
aggregatedSource.append(sourceLine);
119-
locationMap.put(aggregatedSourceLine, new IPythonLocation(tokenLocation.getLineNr(), tokenLocation.getColumnNr()));
132+
locationMap.put(aggregatedSourceLine, location);
120133
aggregatedSourceLine++;
121134
}
122135

@@ -125,10 +138,36 @@ private void addDelimiterToSource() {
125138
aggregatedSourceLine++;
126139
}
127140

141+
private static Map<Integer, Integer> countEscapeCharacters(String sourceLine, Map<Integer, Integer> colMap, int colOffSet) {
142+
int count = 0;
143+
var numberOfExtraChars = 0;
144+
var arr = sourceLine.toCharArray();
145+
for (int i = 1; i < sourceLine.length(); ++i) {
146+
char c = arr[i];
147+
switch (c) {
148+
case '"', '\'', '/':
149+
// + 1 as we do have to count the open quote.
150+
colMap.put(i, i + colOffSet + count + 1);
151+
if (c != '/') {
152+
numberOfExtraChars++;
153+
}
154+
break;
155+
case '\\', '\b', '\f', '\n', '\r', '\t':
156+
count += 2;
157+
numberOfExtraChars++;
158+
break;
159+
default:
160+
break;
161+
}
162+
}
163+
colMap.put(-1, numberOfExtraChars);
164+
return colMap;
165+
}
166+
128167
public record ParseResult(PythonInputFile inputFile, String aggregatedSource, Map<Integer, IPythonLocation> locationMap) {
129168
}
130169

131-
public record IPythonLocation(int line, int column) {
170+
public record IPythonLocation(int line, int column, Map<Integer, Integer> colOffset) {
132171
}
133172

134173
}

sonar-python-plugin/src/test/java/org/sonar/plugins/python/IpynbNotebookParserTest.java

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
package org.sonar.plugins.python;
2121

2222
import java.io.File;
23+
import java.util.Map;
2324
import org.junit.jupiter.api.Test;
2425
import org.sonar.api.batch.fs.InputFile;
2526
import org.sonar.api.internal.apachecommons.lang.StringUtils;
@@ -35,11 +36,20 @@ class IpynbNotebookParserTest {
3536
void testParseNotebook() {
3637
var inputFile = createInputFile(baseDir, "notebook.ipynb", InputFile.Status.CHANGED, InputFile.Type.MAIN);
3738

38-
IpynbNotebookParser.ParseResult result = IpynbNotebookParser.parseNotebook(inputFile);
39+
var result = IpynbNotebookParser.parseNotebook(inputFile);
3940

40-
assertThat(result.locationMap().keySet()).hasSize(18);
41+
assertThat(result.locationMap().keySet()).hasSize(20);
4142
assertThat(StringUtils.countMatches(result.aggregatedSource(), IpynbNotebookParser.SONAR_PYTHON_NOTEBOOK_CELL_DELIMITER))
42-
.isEqualTo(6);
43+
.isEqualTo(7);
44+
assertThat(result.locationMap()).extracting(map -> map.get(17)).isEqualTo(new IpynbNotebookParser.IPythonLocation(64, 27, Map.of(6, 21, 20, 37, -1, 3)));
45+
46+
// The wrapped file changes the lines of the notebook
47+
assertThat(result.locationMap()).extracting(map -> map.get(22)).isEqualTo(new IpynbNotebookParser.IPythonLocation(84, 15, Map.of(6, 21, 15, 32, -1, 3)));
48+
assertThat(result.locationMap()).extracting(map -> map.get(23)).isEqualTo(new IpynbNotebookParser.IPythonLocation(84, 37, Map.of(6, 21, 23, 40, -1, 3)));
49+
50+
assertThat(result.locationMap()).extracting(map -> map.get(25))
51+
.isEqualTo(new IpynbNotebookParser.IPythonLocation(91, 15, Map.of(4, 19, 39, 62, 41, 64, 42, 65, 46, 71, -1, 7)));
52+
assertThat(result.locationMap()).extracting(map -> map.get(26)).isEqualTo(new IpynbNotebookParser.IPythonLocation(91, 71, Map.of(-1, 0)));
4353
}
4454

4555
@Test

sonar-python-plugin/src/test/resources/org/sonar/plugins/python/notebook.ipynb

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,13 @@
8282
"metadata": {},
8383
"outputs": [],
8484
"source": "print(\"My\\ntext\")\nprint(\"Something else\\n\")"
85+
},
86+
{
87+
"cell_type": "code",
88+
"execution_count": 1,
89+
"metadata": {},
90+
"outputs": [],
91+
"source": "a = \"A bunch of characters \\n \\t \\f \\r / // \\ \"\nb = None"
8592
}
8693
],
8794
"metadata": {

0 commit comments

Comments
 (0)