Skip to content

Commit 1f51905

Browse files
committed
SONARPY-2003 Implement basic notebook parser with support for multiline string code cells (#1861)
1 parent e8e658a commit 1f51905

File tree

3 files changed

+55
-9
lines changed

3 files changed

+55
-9
lines changed

sonar-python-plugin/src/main/java/org/sonar/plugins/python/IpynbNotebookParser.java

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ private IpynbNotebookParser(PythonInputFile inputFile) {
4444
}
4545

4646
private final PythonInputFile inputFile;
47-
private StringBuilder aggregatedSource = new StringBuilder();
47+
private final StringBuilder aggregatedSource = new StringBuilder();
4848

4949
// Keys are the aggregated source line number
5050
private final Map<Integer, IPythonLocation> locationMap = new HashMap<>();
@@ -76,7 +76,7 @@ private void processCodeCell(JsonParser jParser) throws IOException {
7676
JsonToken jsonToken = jParser.nextToken();
7777
if (JsonToken.FIELD_NAME.equals(jsonToken) && "source".equals(jParser.currentName())) {
7878
jsonToken = jParser.nextToken();
79-
if (parseSourceArray(jParser, jsonToken)) {
79+
if (parseSourceArray(jParser, jsonToken) || parseSourceMultilineString(jParser, jsonToken)) {
8080
break;
8181
} else {
8282
throw new IllegalStateException("Unexpected token: " + jsonToken);
@@ -91,16 +91,38 @@ private boolean parseSourceArray(JsonParser jParser, JsonToken jsonToken) throws
9191
}
9292
while (jParser.nextToken() != JsonToken.END_ARRAY) {
9393
String sourceLine = jParser.getValueAsString();
94-
JsonLocation tokenLocation = jParser.currentTokenLocation();
94+
addLineToSource(sourceLine, jParser.currentTokenLocation());
95+
}
96+
// Account for the last cell delimiter
97+
addDelimiterToSource();
98+
return true;
99+
}
100+
101+
private boolean parseSourceMultilineString(JsonParser jParser, JsonToken jsonToken) throws IOException {
102+
if (jsonToken != JsonToken.VALUE_STRING) {
103+
return false;
104+
}
105+
String sourceLine = jParser.getValueAsString();
106+
JsonLocation tokenLocation = jParser.currentTokenLocation();
95107

96-
aggregatedSource.append(sourceLine);
97-
locationMap.put(aggregatedSourceLine, new IPythonLocation(tokenLocation.getLineNr(), tokenLocation.getColumnNr()));
98-
aggregatedSourceLine++;
108+
for (String line : sourceLine.lines().toList()) {
109+
aggregatedSource.append(line);
110+
addLineToSource("\n", tokenLocation);
99111
}
100112
// Account for the last cell delimiter
113+
addDelimiterToSource();
114+
return true;
115+
}
116+
117+
private void addLineToSource(String sourceLine, JsonLocation tokenLocation) {
118+
aggregatedSource.append(sourceLine);
119+
locationMap.put(aggregatedSourceLine, new IPythonLocation(tokenLocation.getLineNr(), tokenLocation.getColumnNr()));
120+
aggregatedSourceLine++;
121+
}
122+
123+
private void addDelimiterToSource() {
101124
aggregatedSource.append(SONAR_PYTHON_NOTEBOOK_CELL_DELIMITER);
102125
aggregatedSourceLine++;
103-
return true;
104126
}
105127

106128
public record ParseResult(PythonInputFile inputFile, String aggregatedSource, Map<Integer, IPythonLocation> locationMap) {

sonar-python-plugin/src/test/java/org/sonar/plugins/python/IpynbNotebookParserTest.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,9 @@ void testParseNotebook() {
3737

3838
IpynbNotebookParser.ParseResult result = IpynbNotebookParser.parseNotebook(inputFile);
3939

40-
assertThat(result.locationMap().keySet()).hasSize(12);
40+
assertThat(result.locationMap().keySet()).hasSize(18);
4141
assertThat(StringUtils.countMatches(result.aggregatedSource(), IpynbNotebookParser.SONAR_PYTHON_NOTEBOOK_CELL_DELIMITER))
42-
.isEqualTo(3);
42+
.isEqualTo(6);
4343
}
4444

4545
@Test

sonar-python-plugin/src/test/resources/org/sonar/plugins/python/notebook.ipynb

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,30 @@
5858
"source": [
5959
"x = 42"
6060
]
61+
},
62+
{
63+
"cell_type": "code",
64+
"source": "#Some code\nprint(\"hello world\\n\")",
65+
"execution_count": 0,
66+
"outputs": [],
67+
"metadata": {}
68+
},
69+
{
70+
"cell_type": "code",
71+
"execution_count": null,
72+
"metadata": {},
73+
"outputs": [],
74+
"source": [
75+
"print(\"My\\ntext\")\n",
76+
"print(\"Something else\\n\")"
77+
]
78+
},
79+
{
80+
"cell_type": "code",
81+
"execution_count": null,
82+
"metadata": {},
83+
"outputs": [],
84+
"source": "print(\"My\\ntext\")\nprint(\"Something else\\n\")"
6185
}
6286
],
6387
"metadata": {

0 commit comments

Comments
 (0)