24
24
import com .fasterxml .jackson .core .JsonParser ;
25
25
import com .fasterxml .jackson .core .JsonToken ;
26
26
import java .io .IOException ;
27
- import java .util .HashMap ;
28
27
import java .util .LinkedHashMap ;
29
28
import java .util .Map ;
30
29
import java .util .Optional ;
@@ -50,13 +49,8 @@ private IpynbNotebookParser(PythonInputFile inputFile) {
50
49
}
51
50
52
51
private final PythonInputFile inputFile ;
53
- private final StringBuilder aggregatedSource = new StringBuilder ();
54
52
55
- // Keys are the aggregated source line number
56
- private final Map <Integer , IPythonLocation > locationMap = new HashMap <>();
57
- private int aggregatedSourceLine = 0 ;
58
53
private int lastPythonLine = 0 ;
59
- private boolean isFirstCell = true ;
60
54
61
55
public Optional <GeneratedIPythonFile > parse () throws IOException {
62
56
// If the language is not present, we assume it is a Python notebook
@@ -71,12 +65,9 @@ public Optional<String> parseLanguage() throws IOException {
71
65
try (JsonParser jParser = factory .createParser (content )) {
72
66
while (!jParser .isClosed ()) {
73
67
JsonToken jsonToken = jParser .nextToken ();
74
- if (JsonToken .FIELD_NAME .equals (jsonToken )) {
75
- String fieldName = jParser .currentName ();
76
- if ("language" .equals (fieldName )) {
77
- jParser .nextToken ();
78
- return Optional .ofNullable (jParser .getValueAsString ());
79
- }
68
+ if (JsonToken .FIELD_NAME .equals (jsonToken ) && "language" .equals (jParser .currentName ())) {
69
+ jParser .nextToken ();
70
+ return Optional .ofNullable (jParser .getValueAsString ());
80
71
}
81
72
}
82
73
}
@@ -87,118 +78,128 @@ public GeneratedIPythonFile parseNotebook() throws IOException {
87
78
String content = inputFile .wrappedFile ().contents ();
88
79
JsonFactory factory = new JsonFactory ();
89
80
try (JsonParser jParser = factory .createParser (content )) {
90
- while (!jParser .isClosed ()) {
91
- JsonToken jsonToken = jParser .nextToken ();
92
- if (JsonToken .FIELD_NAME .equals (jsonToken )) {
93
- String fieldName = jParser .currentName ();
94
- if ("cell_type" .equals (fieldName )) {
95
- jParser .nextToken ();
96
- if ("code" .equals (jParser .getValueAsString ())) {
97
- processCodeCell (jParser );
98
- }
99
- }
100
- }
101
- }
102
- // Account for EOF token
103
- addDefaultLocation (lastPythonLine , jParser .currentTokenLocation ());
81
+ return parseCells (jParser ).map (notebookData -> {
82
+ // Account for EOF token
83
+ JsonLocation location = jParser .currentTokenLocation ();
84
+ notebookData .addDefaultLocation (lastPythonLine , location .getLineNr (), location .getColumnNr ());
85
+ return new GeneratedIPythonFile (inputFile .wrappedFile (), notebookData .getAggregatedSource ().toString (), notebookData .getLocationMap ());
86
+ }).orElse (new GeneratedIPythonFile (inputFile .wrappedFile (), "" , new LinkedHashMap <>()));
104
87
}
105
88
106
- return new GeneratedIPythonFile (inputFile .wrappedFile (), aggregatedSource .toString (), locationMap );
107
89
}
108
90
109
- private void processCodeCell (JsonParser jParser ) throws IOException {
91
+ private Optional <NotebookParsingData > parseCells (JsonParser parser ) throws IOException {
92
+ while (!parser .isClosed ()) {
93
+ parser .nextToken ();
94
+ String fieldName = parser .currentName ();
95
+ if ("cells" .equals (fieldName )) {
96
+ // consume array start token
97
+ parser .nextToken ();
98
+ NotebookParsingData data = parseCellArray (parser );
99
+ parser .close ();
100
+ return Optional .of (data );
101
+ }
102
+ }
103
+ return Optional .empty ();
104
+ }
110
105
111
- while (!jParser .isClosed ()) {
112
- JsonToken jsonToken = jParser .nextToken ();
113
- if (JsonToken .FIELD_NAME .equals (jsonToken ) && "source" .equals (jParser .currentName ())) {
114
- jsonToken = jParser .nextToken ();
115
- if (parseSourceArray (jParser , jsonToken ) || parseSourceMultilineString (jParser , jsonToken )) {
116
- break ;
117
- } else {
118
- throw new IllegalStateException ("Unexpected token: " + jsonToken );
119
- }
106
+ private NotebookParsingData parseCellArray (JsonParser jParser ) throws IOException {
107
+ NotebookParsingData aggregatedNotebookData = NotebookParsingData .empty ();
108
+
109
+ while (jParser .nextToken () != JsonToken .END_ARRAY ) {
110
+ if (jParser .currentToken () == JsonToken .START_OBJECT ) {
111
+ processCodeCell (aggregatedNotebookData .getAggregatedSourceLine (), jParser ).ifPresent (aggregatedNotebookData ::combine );
120
112
}
121
113
}
114
+ aggregatedNotebookData .removeTrailingExtraLine ();
115
+ return aggregatedNotebookData ;
122
116
}
123
117
124
- private void appendNewLineAfterPreviousCellDelimiter () {
125
- if (!isFirstCell ) {
126
- aggregatedSource .append ("\n " );
127
- } else {
128
- isFirstCell = false ;
118
+ private static void skipNestedObjects (JsonParser parser ) throws IOException {
119
+ if (parser .currentToken () == JsonToken .START_OBJECT || parser .currentToken () == JsonToken .START_ARRAY ) {
120
+ parser .skipChildren ();
129
121
}
130
122
}
131
123
132
- private boolean parseSourceArray (JsonParser jParser , JsonToken jsonToken ) throws IOException {
133
- if (jsonToken != JsonToken .START_ARRAY ) {
134
- return false ;
124
+ private Optional <NotebookParsingData > processCodeCell (int startLine , JsonParser jParser ) throws IOException {
125
+ boolean isCodeCell = false ;
126
+ Optional <NotebookParsingData > notebookData = Optional .empty ();
127
+ while (jParser .nextToken () != JsonToken .END_OBJECT ) {
128
+
129
+ skipNestedObjects (jParser );
130
+
131
+ if (JsonToken .FIELD_NAME .equals (jParser .currentToken ()) && "cell_type" .equals (jParser .currentName ())) {
132
+ jParser .nextToken ();
133
+ String cellType = jParser .getValueAsString ();
134
+ if ("code" .equals (cellType )) {
135
+ isCodeCell = true ;
136
+ }
137
+ }
138
+ if (JsonToken .FIELD_NAME .equals (jParser .currentToken ()) && "source" .equals (jParser .currentName ())) {
139
+ jParser .nextToken ();
140
+ switch (jParser .currentToken ()) {
141
+ case START_ARRAY :
142
+ notebookData = Optional .of (parseSourceArray (startLine , jParser ));
143
+ break ;
144
+ case VALUE_STRING :
145
+ notebookData = Optional .of (parseSourceMultilineString (startLine , jParser ));
146
+ break ;
147
+ default :
148
+ throw new IllegalStateException ("Unexpected token: " + jParser .currentToken ());
149
+ }
150
+ }
151
+ }
152
+ if (isCodeCell && notebookData .isPresent ()) {
153
+ lastPythonLine = notebookData .get ().getAggregatedSourceLine ();
154
+ return notebookData ;
135
155
}
136
- appendNewLineAfterPreviousCellDelimiter ();
156
+ return Optional .empty ();
157
+ }
158
+
159
+
160
+ private static NotebookParsingData parseSourceArray (int startLine , JsonParser jParser ) throws IOException {
161
+ NotebookParsingData cellData = NotebookParsingData .fromLine (startLine );
137
162
JsonLocation tokenLocation = jParser .currentTokenLocation ();
138
163
// In case of an empty cell, we don't add an extra line
139
164
var lastSourceLine = "\n " ;
140
165
while (jParser .nextToken () != JsonToken .END_ARRAY ) {
141
166
String sourceLine = jParser .getValueAsString ();
142
167
tokenLocation = jParser .currentTokenLocation ();
143
- var countEscapedChar = countEscapeCharacters (sourceLine , new LinkedHashMap <>(), tokenLocation .getColumnNr ());
144
- addLineToSource (sourceLine , tokenLocation , countEscapedChar );
168
+ var countEscapedChar = countEscapeCharacters (sourceLine , tokenLocation .getColumnNr ());
169
+ cellData . addLineToSource (sourceLine , tokenLocation . getLineNr (), tokenLocation . getColumnNr () , countEscapedChar );
145
170
lastSourceLine = sourceLine ;
146
171
}
147
172
if (!lastSourceLine .endsWith ("\n " )) {
148
- aggregatedSource . append ("\n " );
173
+ cellData . appendToSource ("\n " );
149
174
}
150
175
// Account for the last cell delimiter
151
- addDelimiterToSource (tokenLocation );
152
- lastPythonLine = aggregatedSourceLine ;
153
- return true ;
176
+ cellData .addDelimiterToSource (SONAR_PYTHON_NOTEBOOK_CELL_DELIMITER + "\n " , tokenLocation .getLineNr (), tokenLocation .getColumnNr ());
177
+ return cellData ;
154
178
}
155
179
156
- private boolean parseSourceMultilineString (JsonParser jParser , JsonToken jsonToken ) throws IOException {
157
- if (jsonToken != JsonToken .VALUE_STRING ) {
158
- return false ;
159
- }
160
- appendNewLineAfterPreviousCellDelimiter ();
180
+ private static NotebookParsingData parseSourceMultilineString (int startLine , JsonParser jParser ) throws IOException {
181
+ NotebookParsingData cellData = NotebookParsingData .fromLine (startLine );
161
182
String sourceLine = jParser .getValueAsString ();
162
183
JsonLocation tokenLocation = jParser .currentTokenLocation ();
163
184
var previousLen = 0 ;
164
185
var previousExtraChars = 0 ;
165
186
166
187
for (String line : sourceLine .lines ().toList ()) {
167
- var countEscapedChar = countEscapeCharacters (line , new LinkedHashMap <>(), previousLen + previousExtraChars + tokenLocation .getColumnNr ());
188
+ var countEscapedChar = countEscapeCharacters (line , previousLen + previousExtraChars + tokenLocation .getColumnNr ());
168
189
var currentCount = countEscapedChar .get (-1 );
169
- addLineToSource (line , new IPythonLocation (tokenLocation .getLineNr (),
190
+ cellData . addLineToSource (line , new IPythonLocation (tokenLocation .getLineNr (),
170
191
tokenLocation .getColumnNr () + previousLen + previousExtraChars , countEscapedChar ));
171
- aggregatedSource . append ("\n " );
192
+ cellData . appendToSource ("\n " );
172
193
previousLen = previousLen + line .length () + 2 ;
173
194
previousExtraChars = previousExtraChars + currentCount ;
174
195
}
175
196
// Account for the last cell delimiter
176
- addDelimiterToSource (tokenLocation );
177
- lastPythonLine = aggregatedSourceLine ;
178
- return true ;
179
- }
180
-
181
- private void addLineToSource (String sourceLine , JsonLocation tokenLocation , Map <Integer , Integer > colOffset ) {
182
- addLineToSource (sourceLine , new IPythonLocation (tokenLocation .getLineNr (), tokenLocation .getColumnNr (), colOffset ));
183
- }
184
-
185
- private void addLineToSource (String sourceLine , IPythonLocation location ) {
186
- aggregatedSource .append (sourceLine );
187
- aggregatedSourceLine ++;
188
- locationMap .put (aggregatedSourceLine , location );
189
- }
190
-
191
- private void addDelimiterToSource (JsonLocation tokenLocation ) {
192
- aggregatedSource .append (SONAR_PYTHON_NOTEBOOK_CELL_DELIMITER );
193
- aggregatedSourceLine ++;
194
- addDefaultLocation (aggregatedSourceLine , tokenLocation );
195
- }
196
-
197
- private void addDefaultLocation (int line , JsonLocation tokenLocation ) {
198
- locationMap .putIfAbsent (line , new IPythonLocation (tokenLocation .getLineNr (), tokenLocation .getColumnNr (), Map .of (-1 , 0 )));
197
+ cellData .addDelimiterToSource (SONAR_PYTHON_NOTEBOOK_CELL_DELIMITER + "\n " , tokenLocation .getLineNr (), tokenLocation .getColumnNr ());
198
+ return cellData ;
199
199
}
200
200
201
- private static Map <Integer , Integer > countEscapeCharacters (String sourceLine , Map <Integer , Integer > colMap , int colOffSet ) {
201
+ private static Map <Integer , Integer > countEscapeCharacters (String sourceLine , int colOffSet ) {
202
+ Map <Integer , Integer > colMap = new LinkedHashMap <>();
202
203
int count = 0 ;
203
204
var numberOfExtraChars = 0 ;
204
205
var arr = sourceLine .toCharArray ();
@@ -221,5 +222,4 @@ private static Map<Integer, Integer> countEscapeCharacters(String sourceLine, Ma
221
222
colMap .put (-1 , numberOfExtraChars );
222
223
return colMap ;
223
224
}
224
-
225
225
}
0 commit comments