Skip to content

Commit c75ba07

Browse files
joke1196ghislainpiot
authored andcommitted
SONARPY-2057: Markdown content should not leak in the generated file (#1922)
1 parent 054cd8f commit c75ba07

File tree

5 files changed

+411
-85
lines changed

5 files changed

+411
-85
lines changed

sonar-python-plugin/src/main/java/org/sonar/plugins/python/IpynbNotebookParser.java

Lines changed: 85 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
import com.fasterxml.jackson.core.JsonParser;
2525
import com.fasterxml.jackson.core.JsonToken;
2626
import java.io.IOException;
27-
import java.util.HashMap;
2827
import java.util.LinkedHashMap;
2928
import java.util.Map;
3029
import java.util.Optional;
@@ -50,13 +49,8 @@ private IpynbNotebookParser(PythonInputFile inputFile) {
5049
}
5150

5251
private final PythonInputFile inputFile;
53-
private final StringBuilder aggregatedSource = new StringBuilder();
5452

55-
// Keys are the aggregated source line number
56-
private final Map<Integer, IPythonLocation> locationMap = new HashMap<>();
57-
private int aggregatedSourceLine = 0;
5853
private int lastPythonLine = 0;
59-
private boolean isFirstCell = true;
6054

6155
public Optional<GeneratedIPythonFile> parse() throws IOException {
6256
// If the language is not present, we assume it is a Python notebook
@@ -71,12 +65,9 @@ public Optional<String> parseLanguage() throws IOException {
7165
try (JsonParser jParser = factory.createParser(content)) {
7266
while (!jParser.isClosed()) {
7367
JsonToken jsonToken = jParser.nextToken();
74-
if (JsonToken.FIELD_NAME.equals(jsonToken)) {
75-
String fieldName = jParser.currentName();
76-
if ("language".equals(fieldName)) {
77-
jParser.nextToken();
78-
return Optional.ofNullable(jParser.getValueAsString());
79-
}
68+
if (JsonToken.FIELD_NAME.equals(jsonToken) && "language".equals(jParser.currentName())) {
69+
jParser.nextToken();
70+
return Optional.ofNullable(jParser.getValueAsString());
8071
}
8172
}
8273
}
@@ -87,118 +78,128 @@ public GeneratedIPythonFile parseNotebook() throws IOException {
8778
String content = inputFile.wrappedFile().contents();
8879
JsonFactory factory = new JsonFactory();
8980
try (JsonParser jParser = factory.createParser(content)) {
90-
while (!jParser.isClosed()) {
91-
JsonToken jsonToken = jParser.nextToken();
92-
if (JsonToken.FIELD_NAME.equals(jsonToken)) {
93-
String fieldName = jParser.currentName();
94-
if ("cell_type".equals(fieldName)) {
95-
jParser.nextToken();
96-
if ("code".equals(jParser.getValueAsString())) {
97-
processCodeCell(jParser);
98-
}
99-
}
100-
}
101-
}
102-
// Account for EOF token
103-
addDefaultLocation(lastPythonLine, jParser.currentTokenLocation());
81+
return parseCells(jParser).map(notebookData -> {
82+
// Account for EOF token
83+
JsonLocation location = jParser.currentTokenLocation();
84+
notebookData.addDefaultLocation(lastPythonLine, location.getLineNr(), location.getColumnNr());
85+
return new GeneratedIPythonFile(inputFile.wrappedFile(), notebookData.getAggregatedSource().toString(), notebookData.getLocationMap());
86+
}).orElse(new GeneratedIPythonFile(inputFile.wrappedFile(), "", new LinkedHashMap<>()));
10487
}
10588

106-
return new GeneratedIPythonFile(inputFile.wrappedFile(), aggregatedSource.toString(), locationMap);
10789
}
10890

109-
private void processCodeCell(JsonParser jParser) throws IOException {
91+
private Optional<NotebookParsingData> parseCells(JsonParser parser) throws IOException {
92+
while (!parser.isClosed()) {
93+
parser.nextToken();
94+
String fieldName = parser.currentName();
95+
if ("cells".equals(fieldName)) {
96+
// consume array start token
97+
parser.nextToken();
98+
NotebookParsingData data = parseCellArray(parser);
99+
parser.close();
100+
return Optional.of(data);
101+
}
102+
}
103+
return Optional.empty();
104+
}
110105

111-
while (!jParser.isClosed()) {
112-
JsonToken jsonToken = jParser.nextToken();
113-
if (JsonToken.FIELD_NAME.equals(jsonToken) && "source".equals(jParser.currentName())) {
114-
jsonToken = jParser.nextToken();
115-
if (parseSourceArray(jParser, jsonToken) || parseSourceMultilineString(jParser, jsonToken)) {
116-
break;
117-
} else {
118-
throw new IllegalStateException("Unexpected token: " + jsonToken);
119-
}
106+
private NotebookParsingData parseCellArray(JsonParser jParser) throws IOException {
107+
NotebookParsingData aggregatedNotebookData = NotebookParsingData.empty();
108+
109+
while (jParser.nextToken() != JsonToken.END_ARRAY) {
110+
if (jParser.currentToken() == JsonToken.START_OBJECT) {
111+
processCodeCell(aggregatedNotebookData.getAggregatedSourceLine(), jParser).ifPresent(aggregatedNotebookData::combine);
120112
}
121113
}
114+
aggregatedNotebookData.removeTrailingExtraLine();
115+
return aggregatedNotebookData;
122116
}
123117

124-
private void appendNewLineAfterPreviousCellDelimiter() {
125-
if (!isFirstCell) {
126-
aggregatedSource.append("\n");
127-
} else {
128-
isFirstCell = false;
118+
private static void skipNestedObjects(JsonParser parser) throws IOException {
119+
if (parser.currentToken() == JsonToken.START_OBJECT || parser.currentToken() == JsonToken.START_ARRAY) {
120+
parser.skipChildren();
129121
}
130122
}
131123

132-
private boolean parseSourceArray(JsonParser jParser, JsonToken jsonToken) throws IOException {
133-
if (jsonToken != JsonToken.START_ARRAY) {
134-
return false;
124+
private Optional<NotebookParsingData> processCodeCell(int startLine, JsonParser jParser) throws IOException {
125+
boolean isCodeCell = false;
126+
Optional<NotebookParsingData> notebookData = Optional.empty();
127+
while (jParser.nextToken() != JsonToken.END_OBJECT) {
128+
129+
skipNestedObjects(jParser);
130+
131+
if (JsonToken.FIELD_NAME.equals(jParser.currentToken()) && "cell_type".equals(jParser.currentName())) {
132+
jParser.nextToken();
133+
String cellType = jParser.getValueAsString();
134+
if ("code".equals(cellType)) {
135+
isCodeCell = true;
136+
}
137+
}
138+
if (JsonToken.FIELD_NAME.equals(jParser.currentToken()) && "source".equals(jParser.currentName())) {
139+
jParser.nextToken();
140+
switch (jParser.currentToken()) {
141+
case START_ARRAY:
142+
notebookData = Optional.of(parseSourceArray(startLine, jParser));
143+
break;
144+
case VALUE_STRING:
145+
notebookData = Optional.of(parseSourceMultilineString(startLine, jParser));
146+
break;
147+
default:
148+
throw new IllegalStateException("Unexpected token: " + jParser.currentToken());
149+
}
150+
}
151+
}
152+
if (isCodeCell && notebookData.isPresent()) {
153+
lastPythonLine = notebookData.get().getAggregatedSourceLine();
154+
return notebookData;
135155
}
136-
appendNewLineAfterPreviousCellDelimiter();
156+
return Optional.empty();
157+
}
158+
159+
160+
private static NotebookParsingData parseSourceArray(int startLine, JsonParser jParser) throws IOException {
161+
NotebookParsingData cellData = NotebookParsingData.fromLine(startLine);
137162
JsonLocation tokenLocation = jParser.currentTokenLocation();
138163
// In case of an empty cell, we don't add an extra line
139164
var lastSourceLine = "\n";
140165
while (jParser.nextToken() != JsonToken.END_ARRAY) {
141166
String sourceLine = jParser.getValueAsString();
142167
tokenLocation = jParser.currentTokenLocation();
143-
var countEscapedChar = countEscapeCharacters(sourceLine, new LinkedHashMap<>(), tokenLocation.getColumnNr());
144-
addLineToSource(sourceLine, tokenLocation, countEscapedChar);
168+
var countEscapedChar = countEscapeCharacters(sourceLine, tokenLocation.getColumnNr());
169+
cellData.addLineToSource(sourceLine, tokenLocation.getLineNr(), tokenLocation.getColumnNr(), countEscapedChar);
145170
lastSourceLine = sourceLine;
146171
}
147172
if (!lastSourceLine.endsWith("\n")) {
148-
aggregatedSource.append("\n");
173+
cellData.appendToSource("\n");
149174
}
150175
// Account for the last cell delimiter
151-
addDelimiterToSource(tokenLocation);
152-
lastPythonLine = aggregatedSourceLine;
153-
return true;
176+
cellData.addDelimiterToSource(SONAR_PYTHON_NOTEBOOK_CELL_DELIMITER + "\n", tokenLocation.getLineNr(), tokenLocation.getColumnNr());
177+
return cellData;
154178
}
155179

156-
private boolean parseSourceMultilineString(JsonParser jParser, JsonToken jsonToken) throws IOException {
157-
if (jsonToken != JsonToken.VALUE_STRING) {
158-
return false;
159-
}
160-
appendNewLineAfterPreviousCellDelimiter();
180+
private static NotebookParsingData parseSourceMultilineString(int startLine, JsonParser jParser) throws IOException {
181+
NotebookParsingData cellData = NotebookParsingData.fromLine(startLine);
161182
String sourceLine = jParser.getValueAsString();
162183
JsonLocation tokenLocation = jParser.currentTokenLocation();
163184
var previousLen = 0;
164185
var previousExtraChars = 0;
165186

166187
for (String line : sourceLine.lines().toList()) {
167-
var countEscapedChar = countEscapeCharacters(line, new LinkedHashMap<>(), previousLen + previousExtraChars + tokenLocation.getColumnNr());
188+
var countEscapedChar = countEscapeCharacters(line, previousLen + previousExtraChars + tokenLocation.getColumnNr());
168189
var currentCount = countEscapedChar.get(-1);
169-
addLineToSource(line, new IPythonLocation(tokenLocation.getLineNr(),
190+
cellData.addLineToSource(line, new IPythonLocation(tokenLocation.getLineNr(),
170191
tokenLocation.getColumnNr() + previousLen + previousExtraChars, countEscapedChar));
171-
aggregatedSource.append("\n");
192+
cellData.appendToSource("\n");
172193
previousLen = previousLen + line.length() + 2;
173194
previousExtraChars = previousExtraChars + currentCount;
174195
}
175196
// Account for the last cell delimiter
176-
addDelimiterToSource(tokenLocation);
177-
lastPythonLine = aggregatedSourceLine;
178-
return true;
179-
}
180-
181-
private void addLineToSource(String sourceLine, JsonLocation tokenLocation, Map<Integer, Integer> colOffset) {
182-
addLineToSource(sourceLine, new IPythonLocation(tokenLocation.getLineNr(), tokenLocation.getColumnNr(), colOffset));
183-
}
184-
185-
private void addLineToSource(String sourceLine, IPythonLocation location) {
186-
aggregatedSource.append(sourceLine);
187-
aggregatedSourceLine++;
188-
locationMap.put(aggregatedSourceLine, location);
189-
}
190-
191-
private void addDelimiterToSource(JsonLocation tokenLocation) {
192-
aggregatedSource.append(SONAR_PYTHON_NOTEBOOK_CELL_DELIMITER);
193-
aggregatedSourceLine++;
194-
addDefaultLocation(aggregatedSourceLine, tokenLocation);
195-
}
196-
197-
private void addDefaultLocation(int line, JsonLocation tokenLocation) {
198-
locationMap.putIfAbsent(line, new IPythonLocation(tokenLocation.getLineNr(), tokenLocation.getColumnNr(), Map.of(-1, 0)));
197+
cellData.addDelimiterToSource(SONAR_PYTHON_NOTEBOOK_CELL_DELIMITER + "\n", tokenLocation.getLineNr(), tokenLocation.getColumnNr());
198+
return cellData;
199199
}
200200

201-
private static Map<Integer, Integer> countEscapeCharacters(String sourceLine, Map<Integer, Integer> colMap, int colOffSet) {
201+
private static Map<Integer, Integer> countEscapeCharacters(String sourceLine, int colOffSet) {
202+
Map<Integer, Integer> colMap = new LinkedHashMap<>();
202203
int count = 0;
203204
var numberOfExtraChars = 0;
204205
var arr = sourceLine.toCharArray();
@@ -221,5 +222,4 @@ private static Map<Integer, Integer> countEscapeCharacters(String sourceLine, Ma
221222
colMap.put(-1, numberOfExtraChars);
222223
return colMap;
223224
}
224-
225225
}
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
/*
2+
* SonarQube Python Plugin
3+
* Copyright (C) 2011-2024 SonarSource SA
4+
* mailto:info AT sonarsource DOT com
5+
*
6+
* This program is free software; you can redistribute it and/or
7+
* modify it under the terms of the GNU Lesser General Public
8+
* License as published by the Free Software Foundation; either
9+
* version 3 of the License, or (at your option) any later version.
10+
*
11+
* This program is distributed in the hope that it will be useful,
12+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14+
* Lesser General Public License for more details.
15+
*
16+
* You should have received a copy of the GNU Lesser General Public License
17+
* along with this program; if not, write to the Free Software Foundation,
18+
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19+
*/
20+
package org.sonar.plugins.python;
21+
22+
import java.util.LinkedHashMap;
23+
import java.util.Map;
24+
import java.util.Objects;
25+
import org.sonar.python.IPythonLocation;
26+
27+
public class NotebookParsingData {
28+
29+
private StringBuilder aggregatedSource;
30+
31+
private Map<Integer, IPythonLocation> locationMap;
32+
33+
private Integer aggregatedSourceLine;
34+
35+
public NotebookParsingData(StringBuilder aggregatedSource, Map<Integer, IPythonLocation> locationMap, Integer aggregatedSourceLine) {
36+
this.aggregatedSource = aggregatedSource;
37+
// Keys are the aggregated source line number
38+
this.locationMap = locationMap;
39+
this.aggregatedSourceLine = aggregatedSourceLine;
40+
}
41+
42+
public static NotebookParsingData fromLine(int line) {
43+
return new NotebookParsingData(new StringBuilder(), new LinkedHashMap<>(), line);
44+
}
45+
46+
public static NotebookParsingData empty() {
47+
return new NotebookParsingData(new StringBuilder(), new LinkedHashMap<>(), 0);
48+
}
49+
50+
public StringBuilder getAggregatedSource() {
51+
return aggregatedSource;
52+
}
53+
54+
public Map<Integer, IPythonLocation> getLocationMap() {
55+
return locationMap;
56+
}
57+
58+
public Integer getAggregatedSourceLine() {
59+
return aggregatedSourceLine;
60+
}
61+
62+
public void combine(NotebookParsingData other) {
63+
aggregatedSource.append(other.aggregatedSource);
64+
aggregatedSourceLine = other.aggregatedSourceLine;
65+
locationMap.putAll(other.locationMap);
66+
}
67+
68+
public void appendToSource(String str) {
69+
aggregatedSource.append(str);
70+
}
71+
72+
public void addLineToSource(String sourceLine, int lineNr, int columnNr, Map<Integer, Integer> colOffset) {
73+
addLineToSource(sourceLine, new IPythonLocation(lineNr, columnNr, colOffset));
74+
}
75+
76+
private void appendLine(String line) {
77+
aggregatedSource.append(line);
78+
aggregatedSourceLine++;
79+
}
80+
81+
public void addLineToSource(String sourceLine, IPythonLocation location) {
82+
appendLine(sourceLine);
83+
locationMap.put(aggregatedSourceLine, location);
84+
}
85+
86+
public void addDelimiterToSource(String delimiter, int lineNr, int columnNr) {
87+
appendLine(delimiter);
88+
addDefaultLocation(aggregatedSourceLine, lineNr, columnNr);
89+
}
90+
91+
public void addDefaultLocation(int line, int lineNr, int columnNr) {
92+
locationMap.putIfAbsent(line, new IPythonLocation(lineNr, columnNr, Map.of(-1, 0)));
93+
}
94+
95+
public void removeTrailingExtraLine() {
96+
if (!aggregatedSource.isEmpty() && aggregatedSource.charAt(aggregatedSource.length() - 1) == '\n') {
97+
aggregatedSource.deleteCharAt(aggregatedSource.length() - 1);
98+
}
99+
}
100+
101+
@Override
102+
public boolean equals(Object o) {
103+
if (this == o) {
104+
return true;
105+
}
106+
if (o == null || getClass() != o.getClass()) {
107+
return false;
108+
}
109+
NotebookParsingData that = (NotebookParsingData) o;
110+
return aggregatedSource.toString().contentEquals(that.aggregatedSource) &&
111+
Objects.equals(locationMap, that.locationMap) &&
112+
Objects.equals(aggregatedSourceLine, that.aggregatedSourceLine);
113+
}
114+
115+
@Override
116+
public int hashCode() {
117+
return Objects.hash(aggregatedSource, locationMap, aggregatedSourceLine);
118+
}
119+
}

0 commit comments

Comments
 (0)