Skip to content

Commit e8e658a

Browse files
committed
SONARPY-2002 Implement basic notebook parser with support for array code cells (#1860)
1 parent ba231cf commit e8e658a

File tree

5 files changed

+298
-1
lines changed

5 files changed

+298
-1
lines changed

sonar-python-plugin/pom.xml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,11 @@
109109
<artifactId>junit-jupiter-api</artifactId>
110110
<scope>test</scope>
111111
</dependency>
112+
<dependency>
113+
<groupId>com.fasterxml.jackson.core</groupId>
114+
<artifactId>jackson-databind</artifactId>
115+
<version>2.17.1</version>
116+
</dependency>
112117
</dependencies>
113118

114119
<build>
@@ -182,7 +187,7 @@
182187
<configuration>
183188
<rules>
184189
<requireFilesSize>
185-
<maxsize>15000000</maxsize>
190+
<maxsize>17000000</maxsize>
186191
<minsize>1000000</minsize>
187192
<files>
188193
<file>${project.build.directory}/${project.build.finalName}.jar</file>
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
/*
2+
* SonarQube Python Plugin
3+
* Copyright (C) 2011-2024 SonarSource SA
4+
* mailto:info AT sonarsource DOT com
5+
*
6+
* This program is free software; you can redistribute it and/or
7+
* modify it under the terms of the GNU Lesser General Public
8+
* License as published by the Free Software Foundation; either
9+
* version 3 of the License, or (at your option) any later version.
10+
*
11+
* This program is distributed in the hope that it will be useful,
12+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14+
* Lesser General Public License for more details.
15+
*
16+
* You should have received a copy of the GNU Lesser General Public License
17+
* along with this program; if not, write to the Free Software Foundation,
18+
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19+
*/
20+
package org.sonar.plugins.python;
21+
22+
import com.fasterxml.jackson.core.JsonFactory;
23+
import com.fasterxml.jackson.core.JsonLocation;
24+
import com.fasterxml.jackson.core.JsonParser;
25+
import com.fasterxml.jackson.core.JsonToken;
26+
import java.io.IOException;
27+
import java.util.HashMap;
28+
import java.util.Map;
29+
30+
public class IpynbNotebookParser {
31+
32+
public static final String SONAR_PYTHON_NOTEBOOK_CELL_DELIMITER = "\n#SONAR_PYTHON_NOTEBOOK_CELL_DELIMITER\n";
33+
34+
public static ParseResult parseNotebook(PythonInputFile inputFile) {
35+
try {
36+
return new IpynbNotebookParser(inputFile).parseNotebook();
37+
} catch (IOException e) {
38+
throw new IllegalStateException("Cannot read " + inputFile, e);
39+
}
40+
}
41+
42+
private IpynbNotebookParser(PythonInputFile inputFile) {
43+
this.inputFile = inputFile;
44+
}
45+
46+
private final PythonInputFile inputFile;
47+
private StringBuilder aggregatedSource = new StringBuilder();
48+
49+
// Keys are the aggregated source line number
50+
private final Map<Integer, IPythonLocation> locationMap = new HashMap<>();
51+
private int aggregatedSourceLine = 1;
52+
53+
public ParseResult parseNotebook() throws IOException {
54+
String content = inputFile.wrappedFile().contents();
55+
JsonFactory factory = new JsonFactory();
56+
try (JsonParser jParser = factory.createParser(content)) {
57+
while (!jParser.isClosed()) {
58+
JsonToken jsonToken = jParser.nextToken();
59+
if (JsonToken.FIELD_NAME.equals(jsonToken)) {
60+
String fieldName = jParser.currentName();
61+
if ("cell_type".equals(fieldName)) {
62+
jParser.nextToken();
63+
if ("code".equals(jParser.getValueAsString())) {
64+
processCodeCell(jParser);
65+
}
66+
}
67+
}
68+
}
69+
}
70+
71+
return new ParseResult(inputFile, aggregatedSource.toString(), locationMap);
72+
}
73+
74+
private void processCodeCell(JsonParser jParser) throws IOException {
75+
while (!jParser.isClosed()) {
76+
JsonToken jsonToken = jParser.nextToken();
77+
if (JsonToken.FIELD_NAME.equals(jsonToken) && "source".equals(jParser.currentName())) {
78+
jsonToken = jParser.nextToken();
79+
if (parseSourceArray(jParser, jsonToken)) {
80+
break;
81+
} else {
82+
throw new IllegalStateException("Unexpected token: " + jsonToken);
83+
}
84+
}
85+
}
86+
}
87+
88+
private boolean parseSourceArray(JsonParser jParser, JsonToken jsonToken) throws IOException {
89+
if (jsonToken != JsonToken.START_ARRAY) {
90+
return false;
91+
}
92+
while (jParser.nextToken() != JsonToken.END_ARRAY) {
93+
String sourceLine = jParser.getValueAsString();
94+
JsonLocation tokenLocation = jParser.currentTokenLocation();
95+
96+
aggregatedSource.append(sourceLine);
97+
locationMap.put(aggregatedSourceLine, new IPythonLocation(tokenLocation.getLineNr(), tokenLocation.getColumnNr()));
98+
aggregatedSourceLine++;
99+
}
100+
// Account for the last cell delimiter
101+
aggregatedSource.append(SONAR_PYTHON_NOTEBOOK_CELL_DELIMITER);
102+
aggregatedSourceLine++;
103+
return true;
104+
}
105+
106+
public record ParseResult(PythonInputFile inputFile, String aggregatedSource, Map<Integer, IPythonLocation> locationMap) {
107+
}
108+
109+
public record IPythonLocation(int line, int column) {
110+
}
111+
112+
}
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
/*
2+
* SonarQube Python Plugin
3+
* Copyright (C) 2011-2024 SonarSource SA
4+
* mailto:info AT sonarsource DOT com
5+
*
6+
* This program is free software; you can redistribute it and/or
7+
* modify it under the terms of the GNU Lesser General Public
8+
* License as published by the Free Software Foundation; either
9+
* version 3 of the License, or (at your option) any later version.
10+
*
11+
* This program is distributed in the hope that it will be useful,
12+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14+
* Lesser General Public License for more details.
15+
*
16+
* You should have received a copy of the GNU Lesser General Public License
17+
* along with this program; if not, write to the Free Software Foundation,
18+
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19+
*/
20+
package org.sonar.plugins.python;
21+
22+
import java.io.File;
23+
import org.junit.jupiter.api.Test;
24+
import org.sonar.api.batch.fs.InputFile;
25+
import org.sonar.api.internal.apachecommons.lang.StringUtils;
26+
27+
import static org.assertj.core.api.Assertions.assertThat;
28+
import static org.assertj.core.api.Assertions.assertThatThrownBy;
29+
import static org.sonar.plugins.python.TestUtils.createInputFile;
30+
31+
class IpynbNotebookParserTest {
32+
private final File baseDir = new File("src/test/resources/org/sonar/plugins/python").getAbsoluteFile();
33+
34+
@Test
35+
void testParseNotebook() {
36+
var inputFile = createInputFile(baseDir, "notebook.ipynb", InputFile.Status.CHANGED, InputFile.Type.MAIN);
37+
38+
IpynbNotebookParser.ParseResult result = IpynbNotebookParser.parseNotebook(inputFile);
39+
40+
assertThat(result.locationMap().keySet()).hasSize(12);
41+
assertThat(StringUtils.countMatches(result.aggregatedSource(), IpynbNotebookParser.SONAR_PYTHON_NOTEBOOK_CELL_DELIMITER))
42+
.isEqualTo(3);
43+
}
44+
45+
@Test
46+
void testParseInvalidNotebook() {
47+
var inputFile = createInputFile(baseDir, "invalid_notebook.ipynb", InputFile.Status.CHANGED, InputFile.Type.MAIN);
48+
49+
assertThatThrownBy(() -> IpynbNotebookParser.parseNotebook(inputFile))
50+
.isInstanceOf(IllegalStateException.class)
51+
.hasMessageContaining("Unexpected token");
52+
}
53+
54+
}
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 1,
6+
"metadata": {},
7+
"outputs": [
8+
{
9+
"name": "stdout",
10+
"output_type": "stream",
11+
"text": [
12+
"hello world\n"
13+
]
14+
}
15+
],
16+
"source": {
17+
"something": 1
18+
}
19+
}
20+
],
21+
"metadata": {
22+
"kernelspec": {
23+
"display_name": "jupyter-experiment_venv",
24+
"language": "python",
25+
"name": "python3"
26+
},
27+
"language_info": {
28+
"codemirror_mode": {
29+
"name": "ipython",
30+
"version": 3
31+
},
32+
"file_extension": ".py",
33+
"mimetype": "text/x-python",
34+
"name": "python",
35+
"nbconvert_exporter": "python",
36+
"pygments_lexer": "ipython3",
37+
"version": "3.12.2"
38+
}
39+
},
40+
"nbformat": 4,
41+
"nbformat_minor": 2
42+
}
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 1,
6+
"metadata": {},
7+
"outputs": [
8+
{
9+
"name": "stdout",
10+
"output_type": "stream",
11+
"text": [
12+
"hello world\n"
13+
]
14+
}
15+
],
16+
"source": [
17+
"x = None\n",
18+
"if x is not None:\n",
19+
" print \"not none\"\n",
20+
"\n",
21+
"\n",
22+
"def foo():\n",
23+
" x = 42\n",
24+
" x = 17\n",
25+
" print(x)"
26+
]
27+
},
28+
{
29+
"cell_type": "markdown",
30+
"metadata": {},
31+
"source": [
32+
"# Hello\n",
33+
"This is some markdown"
34+
]
35+
},
36+
{
37+
"cell_type": "markdown",
38+
"metadata": {},
39+
"source": [
40+
"This is another markdown cell"
41+
]
42+
},
43+
{
44+
"cell_type": "code",
45+
"execution_count": null,
46+
"metadata": {},
47+
"outputs": [],
48+
"source": [
49+
"if x is not None:\n",
50+
" print(\"hello\")"
51+
]
52+
},
53+
{
54+
"cell_type": "code",
55+
"execution_count": null,
56+
"metadata": {},
57+
"outputs": [],
58+
"source": [
59+
"x = 42"
60+
]
61+
}
62+
],
63+
"metadata": {
64+
"kernelspec": {
65+
"display_name": "jupyter-experiment_venv",
66+
"language": "python",
67+
"name": "python3"
68+
},
69+
"language_info": {
70+
"codemirror_mode": {
71+
"name": "ipython",
72+
"version": 3
73+
},
74+
"file_extension": ".py",
75+
"mimetype": "text/x-python",
76+
"name": "python",
77+
"nbconvert_exporter": "python",
78+
"pygments_lexer": "ipython3",
79+
"version": "3.12.2"
80+
}
81+
},
82+
"nbformat": 4,
83+
"nbformat_minor": 2
84+
}

0 commit comments

Comments
 (0)