Skip to content

Commit d031589

Browse files
committed
more flexible handling of titles/yaml in jupyter notebooks
1 parent 192d333 commit d031589

File tree

5 files changed

+139
-34
lines changed

5 files changed

+139
-34
lines changed

src/command/convert/jupyter.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import {
2525
import { partitionCellOptions } from "../../core/lib/partition-cell-options.ts";
2626
import { Metadata } from "../../config/types.ts";
2727
import { jupyterKernelspec } from "../../core/jupyter/kernels.ts";
28+
import { fixupFrontMatter } from "../../core/jupyter/jupyter-fixups.ts";
2829

2930
export async function markdownToJupyterNotebook(
3031
file: string,
@@ -40,7 +41,7 @@ export async function jupyterNotebookToMarkdown(
4041
includeIds: boolean,
4142
) {
4243
// read notebook & alias kernelspec
43-
const notebook = jupyterFromFile(file);
44+
const notebook = fixupFrontMatter(jupyterFromFile(file));
4445
const kernelspec = notebook.metadata.kernelspec;
4546

4647
// generate markdown
@@ -59,7 +60,7 @@ export async function jupyterNotebookToMarkdown(
5960
break;
6061
case "raw":
6162
// see if this is the front matter
62-
if (i === 0) {
63+
if (frontMatter === undefined) {
6364
frontMatter = partitionYamlFrontMatter(cell.source.join(""))?.yaml;
6465
if (!frontMatter) {
6566
md.push(...mdFromRawCell(cell));

src/core/jupyter/jupyter-filters.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import {
1717
cacheFilteredNotebook,
1818
filteredNotebookFromCache,
1919
} from "./filtered-notebook-cache.ts";
20+
import { fixupFrontMatter } from "./jupyter-fixups.ts";
2021
import { JupyterNotebook } from "./types.ts";
2122

2223
export async function markdownFromNotebookFile(file: string, format?: Format) {
@@ -30,6 +31,9 @@ export async function markdownFromNotebookFile(file: string, format?: Format) {
3031
}
3132

3233
export function markdownFromNotebookJSON(nb: JupyterNotebook) {
34+
// run the front matter fixup
35+
nb = fixupFrontMatter(nb);
36+
3337
const markdown = nb.cells.reduce((md, cell) => {
3438
if (["markdown", "raw"].includes(cell.cell_type)) {
3539
return md + "\n" + cell.source.join("") + "\n";

src/core/jupyter/jupyter-fixups.ts

Lines changed: 88 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,17 +5,22 @@
55
*
66
*/
77

8+
import { stringify } from "encoding/yaml.ts";
89
import { warning } from "log/mod.ts";
910
import {
1011
JupyterNotebook,
1112
JupyterOutput,
1213
JupyterToMarkdownOptions,
1314
} from "./types.ts";
1415

15-
function fixupBokehCells(
16-
nb: JupyterNotebook,
17-
_options: JupyterToMarkdownOptions,
18-
): JupyterNotebook {
16+
import { kTitle } from "../../config/constants.ts";
17+
import { Metadata } from "../../publish/netlify/api/index.ts";
18+
import { lines } from "../lib/text.ts";
19+
import { markdownWithExtractedHeading } from "../pandoc/pandoc-partition.ts";
20+
import { partitionYamlFrontMatter, readYamlFromMarkdown } from "../yaml.ts";
21+
import { JupyterNotebook, JupyterOutput } from "./types.ts";
22+
23+
function fixupBokehCells(nb: JupyterNotebook): JupyterNotebook {
1924
for (const cell of nb.cells) {
2025
if (cell.cell_type === "code") {
2126
let needsFixup = false;
@@ -101,19 +106,95 @@ function fixupBokehCells(
101106
return nb;
102107
}
103108

109+
export function fixupFrontMatter(nb: JupyterNotebook): JupyterNotebook {
110+
// helper to generate yaml
111+
const asYamlText = (yaml: Metadata) => {
112+
return stringify(yaml, {
113+
indent: 2,
114+
lineWidth: -1,
115+
sortKeys: false,
116+
skipInvalid: true,
117+
});
118+
};
119+
120+
// helper to create nb lines (w/ newline after)
121+
const nbLines = (lns: string[]) => {
122+
return lns.map((line) => `${line}\n`);
123+
};
124+
125+
// look for the first raw block that has a yaml object
126+
let partitioned: { yaml: string; markdown: string } | undefined;
127+
const frontMatterCellIndex = nb.cells.findIndex((cell) => {
128+
if (cell.cell_type === "raw") {
129+
partitioned = partitionYamlFrontMatter(cell.source.join("")) || undefined;
130+
return !!partitioned;
131+
}
132+
});
133+
134+
// if we have front matter and a title then we are done
135+
const yaml = partitioned ? readYamlFromMarkdown(partitioned.yaml) : undefined;
136+
if (yaml?.title) {
137+
return nb;
138+
}
139+
140+
// snip the title out of the markdown
141+
let title: string | undefined;
142+
for (const cell of nb.cells) {
143+
if (cell.cell_type === "markdown") {
144+
const { lines, headingText } = markdownWithExtractedHeading(
145+
cell.source.join("\n"),
146+
);
147+
if (headingText) {
148+
title = headingText;
149+
cell.source = nbLines(lines);
150+
break;
151+
}
152+
}
153+
}
154+
155+
// if there is no title then we are done (the doc will have no title)
156+
if (!title) {
157+
return nb;
158+
}
159+
160+
// if we have yaml then inject the title into the cell
161+
if (yaml) {
162+
// new yaml text with title
163+
yaml[kTitle] = title;
164+
const yamlText = asYamlText(yaml);
165+
166+
// re-write cell
167+
const frontMatterCell = nb.cells[frontMatterCellIndex];
168+
frontMatterCell.source = nbLines(
169+
lines(`---\n${yamlText}---\n\n${partitioned?.markdown || ""}`),
170+
);
171+
172+
// otherwise inject a new cell at the top
173+
} else {
174+
const yamlText = asYamlText({ title });
175+
nb.cells.unshift({
176+
cell_type: "raw",
177+
source: nbLines(lines(yamlText)),
178+
metadata: {},
179+
});
180+
}
181+
182+
// return modified nb
183+
return nb;
184+
}
185+
104186
const fixups: ((
105187
nb: JupyterNotebook,
106-
options: JupyterToMarkdownOptions,
107188
) => JupyterNotebook)[] = [
108189
fixupBokehCells,
190+
fixupFrontMatter,
109191
];
110192

111193
export function fixupJupyterNotebook(
112194
nb: JupyterNotebook,
113-
options: JupyterToMarkdownOptions,
114195
): JupyterNotebook {
115196
for (const fixup of fixups) {
116-
nb = fixup(nb, options);
197+
nb = fixup(nb);
117198
}
118199
return nb;
119200
}

src/core/jupyter/jupyter.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -656,7 +656,7 @@ export async function jupyterToMarkdown(
656656
options: JupyterToMarkdownOptions,
657657
): Promise<JupyterToMarkdownResult> {
658658
// perform fixups
659-
nb = fixupJupyterNotebook(nb, options);
659+
nb = fixupJupyterNotebook(nb);
660660

661661
// optional content injection / html preservation for html output
662662
// that isn't an ipynb
@@ -705,6 +705,10 @@ export async function jupyterToMarkdown(
705705
}
706706
}
707707

708+
// find the first yaml metadata block and hold it out
709+
// note if it has a title
710+
// at the end, if it doesn't have a title, then snip the title out
711+
708712
// markdown from cell
709713
switch (cell.cell_type) {
710714
case "markdown":

src/core/pandoc/pandoc-partition.ts

Lines changed: 39 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -49,45 +49,60 @@ export function parsePandocTitle(title: string) {
4949

5050
// partition markdown into yaml, the first heading, and the rest of the markdown text
5151
export function partitionMarkdown(markdown: string): PartitionedMarkdown {
52-
const markdownLines: string[] = [];
53-
let markdownHeading: string | undefined;
54-
let markdownHeadingAttr: PandocAttr | undefined;
55-
let markdownContainsRefs = false;
52+
// partition out yaml
5653
const partitioned = partitionYamlFrontMatter(markdown);
5754
markdown = partitioned ? partitioned.markdown : markdown;
58-
for (const line of lines(markdown)) {
59-
// does this line contains the refs div?
60-
if (!markdownContainsRefs) {
61-
markdownContainsRefs = /^:::\s*{#refs([\s}]|.*?})\s*$/.test(line);
62-
}
6355

64-
if (!markdownHeading) {
56+
// extract heading
57+
const { lines, headingText, headingAttr } = markdownWithExtractedHeading(
58+
markdown,
59+
);
60+
61+
// does this contain refs?
62+
const containsRefs = lines.some((line) =>
63+
/^:::\s*{#refs([\s}]|.*?})\s*$/.test(line)
64+
);
65+
66+
return {
67+
yaml: (partitioned ? readYamlFromMarkdown(partitioned.yaml) : undefined),
68+
headingText,
69+
headingAttr,
70+
containsRefs,
71+
markdown: lines.join("\n"),
72+
srcMarkdownNoYaml: partitioned?.markdown || "",
73+
};
74+
}
75+
76+
export function markdownWithExtractedHeading(markdown: string) {
77+
const mdLines: string[] = [];
78+
let headingText: string | undefined;
79+
let headingAttr: PandocAttr | undefined;
80+
81+
for (const line of lines(markdown)) {
82+
if (!headingText) {
6583
if (line.match(/^\#{1,}\s/)) {
6684
const parsedHeading = parsePandocTitle(line);
67-
markdownHeading = parsedHeading.heading;
68-
markdownHeadingAttr = parsedHeading.attr;
85+
headingText = parsedHeading.heading;
86+
headingAttr = parsedHeading.attr;
6987
} else if (line.match(/^=+\s*$/) || line.match(/^-+\s*$/)) {
70-
const prevLine = markdownLines[markdownLines.length - 1];
88+
const prevLine = mdLines[mdLines.length - 1];
7189
if (prevLine) {
72-
markdownHeading = prevLine;
73-
markdownLines.splice(markdownLines.length - 1);
90+
headingText = prevLine;
91+
mdLines.splice(mdLines.length - 1);
7492
} else {
75-
markdownLines.push(line);
93+
mdLines.push(line);
7694
}
7795
} else {
78-
markdownLines.push(line);
96+
mdLines.push(line);
7997
}
8098
} else {
81-
markdownLines.push(line);
99+
mdLines.push(line);
82100
}
83101
}
84102

85103
return {
86-
yaml: (partitioned ? readYamlFromMarkdown(partitioned.yaml) : undefined),
87-
headingText: markdownHeading,
88-
headingAttr: markdownHeadingAttr,
89-
containsRefs: markdownContainsRefs,
90-
markdown: markdownLines.join("\n"),
91-
srcMarkdownNoYaml: partitioned?.markdown || "",
104+
lines: mdLines,
105+
headingText,
106+
headingAttr,
92107
};
93108
}

0 commit comments

Comments
 (0)