Skip to content

Commit 3078c2f

Browse files
authored
Merge pull request #12477 from quarto-dev/bugfix/9089
Jupyter: JSON-encode compound metadata
2 parents 3ffb46f + 5bbcc1a commit 3078c2f

File tree

3 files changed

+91
-4
lines changed

3 files changed

+91
-4
lines changed

news/changelog-1.7.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,7 @@ All changes included in 1.7:
140140

141141
### `jupyter`
142142

143+
- ([#9089](https://github.com/quarto-dev/quarto-cli/issues/9089)): Compound jupyter metadata is now serialized into a special key-value attribute to not break Pandoc's fenced div parsing.
143144
- ([#12114](https://github.com/quarto-dev/quarto-cli/issues/12114)): `JUPYTERCACHE` environment variable from [Jupyter cache CLI](https://jupyter-cache.readthedocs.io/en/latest/using/cli.html) is now respected by Quarto when `cache: true` is used. This environment variable allows to change the path of the cache directory.
144145
- ([#12374](https://github.com/quarto-dev/quarto-cli/issues/12374)): Detect language properly in Jupyter notebooks that lack the `language` field in their `kernelspec`s.
145146
- ([#12228](https://github.com/quarto-dev/quarto-cli/issues/12228)): `quarto render` will now fails if errors are detected at IPython display level. Setting `error: true` globally or at cell level will keep the error to show in output and not stop the rendering.

src/core/jupyter/jupyter.ts

Lines changed: 48 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,8 @@ import {
174174
jupyterCellSrcAsLines,
175175
jupyterCellSrcAsStr,
176176
} from "./jupyter-shared.ts";
177+
import { error } from "../../deno_ral/log.ts";
178+
import { valid } from "semver/mod.ts";
177179

178180
export const kQuartoMimeType = "quarto_mimetype";
179181
export const kQuartoOutputOrder = "quarto_order";
@@ -921,8 +923,44 @@ export function jupyterCellWithOptions(
921923
}
922924
};
923925

926+
const validMetadata: Record<
927+
string,
928+
string | number | boolean | null | Array<unknown>
929+
> = {};
930+
for (const key of Object.keys(cell.metadata)) {
931+
const value = cell.metadata[key];
932+
let jsonEncodedKeyIndex = 0;
933+
if (value !== undefined) {
934+
if (!value && typeof value === "object") {
935+
validMetadata[key] = null;
936+
} else if (value && typeof value === "object" && !Array.isArray(value)) {
937+
// https://github.com/quarto-dev/quarto-cli/issues/9089
938+
// we need to json-encode this and signal the encoding in the key
939+
// we can't use the key as is since it may contain invalid characters
940+
// and modifying the key might introduce collisions
941+
// we ensure the key is unique with a counter, and assume
942+
// "quarto-private-*" to be a private namespace for quarto.
943+
// we'd prefer to use _quarto-* instead, but Pandoc doesn't allow keys to start
944+
// with an underscore.
945+
validMetadata[
946+
`quarto-private-${++jsonEncodedKeyIndex}`
947+
] = JSON.stringify({ key, value });
948+
} else if (
949+
typeof value === "string" || typeof value === "number" ||
950+
typeof value === "boolean" || Array.isArray(value)
951+
) {
952+
validMetadata[key] = value;
953+
} else {
954+
error(
955+
`Invalid metadata type for key ${key}: ${typeof value}. Entry will not be serialized.`,
956+
);
957+
}
958+
}
959+
}
960+
924961
return {
925962
...cell,
963+
metadata: validMetadata,
926964
id: cellId(cell),
927965
source,
928966
optionsSource,
@@ -1766,7 +1804,10 @@ function isMarkdown(output: JupyterOutput, options: JupyterToMarkdownOptions) {
17661804
return isDisplayDataType(output, options, displayDataIsMarkdown);
17671805
}
17681806

1769-
async function mdOutputStream(output: JupyterOutputStream, options: JupyterToMarkdownOptions) {
1807+
async function mdOutputStream(
1808+
output: JupyterOutputStream,
1809+
options: JupyterToMarkdownOptions,
1810+
) {
17701811
let text: string[] = [];
17711812
if (typeof output.text === "string") {
17721813
text = [output.text];
@@ -1873,8 +1914,11 @@ async function mdOutputDisplayData(
18731914
// if output is invalid, warn and emit empty
18741915
const data = output.data[mimeType] as unknown;
18751916
if (!Array.isArray(data) || data.some((s) => typeof s !== "string")) {
1876-
return await mdWarningOutput(`Unable to process text plain output data
1877-
which does not appear to be plain text: ${JSON.stringify(data)}`, options);
1917+
return await mdWarningOutput(
1918+
`Unable to process text plain output data
1919+
which does not appear to be plain text: ${JSON.stringify(data)}`,
1920+
options,
1921+
);
18781922
}
18791923
const lines = data as string[];
18801924
// pandas inexplicably outputs html tables as text/plain with an enclosing single-quote
@@ -1911,7 +1955,7 @@ which does not appear to be plain text: ${JSON.stringify(data)}`, options);
19111955
// no type match found
19121956
return await mdWarningOutput(
19131957
"Unable to display output for mime type(s): " +
1914-
Object.keys(output.data).join(", "),
1958+
Object.keys(output.data).join(", "),
19151959
options,
19161960
);
19171961
}
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
---
2+
title: DataBricks Notebooks
3+
engine: jupyter
4+
keep-md: true
5+
execute:
6+
eval: false
7+
_quarto:
8+
tests:
9+
html:
10+
ensureFileRegexMatches:
11+
- []
12+
- [":::"]
13+
---
14+
15+
16+
## Introduction
17+
18+
In this notebook, we try Quarto with DataBricks.
19+
20+
## Chapter
21+
22+
In the first chapter, we try multiple commands and observe their results.
23+
24+
25+
```{python}
26+
#| application/vnd.databricks.v1+cell: {cellMetadata: {byteLimit: 2048000, rowLimit: 10000}, inputWidgets: {}, nuid: 7039bc23-d898-4506-b24d-8f1002a66d18, showTitle: false, title: ''}
27+
df = spark.read.table("samples.nyctaxi.trips")
28+
df.show(5)
29+
```
30+
31+
This is text in-between the commands.
32+
33+
```{python}
34+
#| application/vnd.databricks.v1+cell: {cellMetadata: {byteLimit: 2048000, rowLimit: 10000}, inputWidgets: {}, nuid: 21c1cb83-83cc-40c8-9a8b-f5378d3f29be, showTitle: false, title: ''}
35+
from databricks.sdk.runtime import dbutils
36+
dbutils.fs.ls("dbfs:/Workspace/Users/")
37+
```
38+
39+
## Conclusion
40+
41+
Currently, Quarto does not fully work, at least not rendering.
42+

0 commit comments

Comments
 (0)