Skip to content

Commit 4ff7a87

Browse files
authored
Ensure we always normalize cell source (microsoft#251670)
* Ensure we always normalize cell source * oops
1 parent 4bfe82a commit 4ff7a87

File tree

2 files changed

+27
-11
lines changed

2 files changed

+27
-11
lines changed

extensions/ipynb/src/deserializers.ts

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -90,8 +90,16 @@ function sortOutputItemsBasedOnDisplayOrder(outputItems: NotebookCellOutputItem[
9090
.sort((outputItemA, outputItemB) => outputItemA.index - outputItemB.index).map(item => item.item);
9191
}
9292

93-
function concatMultilineString(str: string | string[], trim?: boolean): string {
94-
const nonLineFeedWhiteSpaceTrim = /(^[\t\f\v\r ]+|[\t\f\v\r ]+$)/g;
93+
/**
94+
* Concatenates a multiline string or an array of strings into a single string.
95+
* Also normalizes line endings to use LF (`\n`) instead of CRLF (`\r\n`).
96+
* Same is done in serializer as well.
97+
*/
98+
function concatMultilineCellSource(source: string | string[]): string {
99+
return concatMultilineString(source).replace(/\r\n/g, '\n');
100+
}
101+
102+
function concatMultilineString(str: string | string[]): string {
95103
if (Array.isArray(str)) {
96104
let result = '';
97105
for (let i = 0; i < str.length; i += 1) {
@@ -103,10 +111,9 @@ function concatMultilineString(str: string | string[], trim?: boolean): string {
103111
}
104112
}
105113

106-
// Just trim whitespace. Leave \n in place
107-
return trim ? result.replace(nonLineFeedWhiteSpaceTrim, '') : result;
114+
return result;
108115
}
109-
return trim ? str.toString().replace(nonLineFeedWhiteSpaceTrim, '') : str.toString();
116+
return str.toString();
110117
}
111118

112119
function convertJupyterOutputToBuffer(mime: string, value: unknown): NotebookCellOutputItem {
@@ -289,15 +296,15 @@ export function jupyterCellOutputToCellOutput(output: nbformat.IOutput): Noteboo
289296
}
290297

291298
function createNotebookCellDataFromRawCell(cell: nbformat.IRawCell): NotebookCellData {
292-
const cellData = new NotebookCellData(NotebookCellKind.Code, concatMultilineString(cell.source), 'raw');
299+
const cellData = new NotebookCellData(NotebookCellKind.Code, concatMultilineCellSource(cell.source), 'raw');
293300
cellData.outputs = [];
294301
cellData.metadata = getNotebookCellMetadata(cell);
295302
return cellData;
296303
}
297304
function createNotebookCellDataFromMarkdownCell(cell: nbformat.IMarkdownCell): NotebookCellData {
298305
const cellData = new NotebookCellData(
299306
NotebookCellKind.Markup,
300-
concatMultilineString(cell.source),
307+
concatMultilineCellSource(cell.source),
301308
'markdown'
302309
);
303310
cellData.outputs = [];
@@ -309,7 +316,7 @@ function createNotebookCellDataFromCodeCell(cell: nbformat.ICodeCell, cellLangua
309316
const outputs = cellOutputs.map(jupyterCellOutputToCellOutput);
310317
const hasExecutionCount = typeof cell.execution_count === 'number' && cell.execution_count > 0;
311318

312-
const source = concatMultilineString(cell.source);
319+
const source = concatMultilineCellSource(cell.source);
313320

314321
const executionSummary: NotebookCellExecutionSummary = hasExecutionCount
315322
? { executionOrder: cell.execution_count as number }

extensions/ipynb/src/serializers.ts

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ function createCodeCellFromNotebookCell(cell: NotebookCellData, preferredLanguag
103103
// & in that case execution summary could contain the data, but metadata will not.
104104
// In such cases we do not want to re-set the metadata with the value from execution summary (remember, user reverted that).
105105
execution_count: cellMetadata.execution_count ?? null,
106-
source: splitMultilineString(cell.value.replace(/\r\n/g, '\n')),
106+
source: splitCellSourceIntoMultilineString(cell.value),
107107
outputs: (cell.outputs || []).map(translateCellDisplayOutput),
108108
metadata: cellMetadata.metadata
109109
};
@@ -117,7 +117,7 @@ function createRawCellFromNotebookCell(cell: NotebookCellData): nbformat.IRawCel
117117
const cellMetadata = getCellMetadata({ cell });
118118
const rawCell: any = {
119119
cell_type: 'raw',
120-
source: splitMultilineString(cell.value.replace(/\r\n/g, '\n')),
120+
source: splitCellSourceIntoMultilineString(cell.value),
121121
metadata: cellMetadata?.metadata || {} // This cannot be empty.
122122
};
123123
if (cellMetadata?.attachments) {
@@ -129,6 +129,15 @@ function createRawCellFromNotebookCell(cell: NotebookCellData): nbformat.IRawCel
129129
return rawCell;
130130
}
131131

132+
/**
133+
* Splits the source of a cell into an array of strings, each representing a line.
134+
* Also normalizes line endings to use LF (`\n`) instead of CRLF (`\r\n`).
135+
* Same is done in deserializer as well.
136+
*/
137+
function splitCellSourceIntoMultilineString(source: string): string[] {
138+
return splitMultilineString(source.replace(/\r\n/g, '\n'));
139+
}
140+
132141
function splitMultilineString(source: nbformat.MultilineString): string[] {
133142
if (Array.isArray(source)) {
134143
return source as string[];
@@ -368,7 +377,7 @@ export function createMarkdownCellFromNotebookCell(cell: NotebookCellData): nbfo
368377
const cellMetadata = getCellMetadata({ cell });
369378
const markdownCell: any = {
370379
cell_type: 'markdown',
371-
source: splitMultilineString(cell.value.replace(/\r\n/g, '\n')),
380+
source: splitCellSourceIntoMultilineString(cell.value),
372381
metadata: cellMetadata?.metadata || {} // This cannot be empty.
373382
};
374383
if (cellMetadata?.attachments) {

0 commit comments

Comments
 (0)