Skip to content

Commit 8968cc8

Browse files
committed
jupyter - use language from language_info when necessary (#12374)
1 parent 70fb5de commit 8968cc8

File tree

8 files changed

+147
-18
lines changed

8 files changed

+147
-18
lines changed

news/changelog-1.7.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ All changes included in 1.7:
123123
### `jupyter`
124124

125125
- ([#12114](https://github.com/quarto-dev/quarto-cli/issues/12114)): `JUPYTERCACHE` environment variable from [Jupyter cache CLI](https://jupyter-cache.readthedocs.io/en/latest/using/cli.html) is now respected by Quarto when `cache: true` is used. This environment variable allows to change the path of the cache directory.
126+
- ([#12374](https://github.com/quarto-dev/quarto-cli/issues/12374)): Detect language properly in Jupyter notebooks that lack the `language` field in their `kernelspec`s.
126127

127128
## Other Fixes and Improvements
128129

src/command/convert/jupyter.ts

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,23 @@ export async function jupyterNotebookToMarkdown(
4747
) {
4848
// read notebook & alias kernelspec
4949
const notebook = fixupFrontMatter(jupyterFromFile(file));
50-
const kernelspec = notebook.metadata.kernelspec;
50+
let kernelspec = notebook.metadata.kernelspec;
51+
52+
// https://github.com/quarto-dev/quarto-cli/issues/12374
53+
// narrow fix for .ipynbs that have a language_info field but no kernelspec.language
54+
if (
55+
kernelspec.language === undefined && notebook.metadata.language_info?.name
56+
) {
57+
kernelspec = {
58+
...kernelspec,
59+
language: notebook.metadata.language_info?.name,
60+
};
61+
}
62+
if (kernelspec.language === undefined) {
63+
throw new Error(
64+
"No language found in kernelspec for notebook " + file,
65+
);
66+
}
5167

5268
// generate markdown
5369
const md: string[] = [];

src/core/jupyter/types.ts

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,16 @@ export interface JupyterCapabilitiesEx extends JupyterCapabilities {
7474
venv?: boolean;
7575
}
7676

77+
// cf https://github.com/jupyter/nbformat/blob/main/nbformat/v4/nbformat.v4.schema.json
78+
// note that this doesn't directly define the kernelspec as used in kernel.json
79+
// but defines the kernelspec object used in .ipynb files
80+
//
81+
// note in addition that Quarto needs to know the language name which
82+
// might not come from the kernelspec itself but will exist in a language_info
83+
// field. When the kernelspec object in a jupyter notebook is missing the language field,
84+
// this object's language field will come from the language_info.name field
85+
//
86+
// see https://github.com/quarto-dev/quarto-cli/issues/12374
7787
export interface JupyterKernelspec {
7888
name: string;
7989
language: string;
@@ -88,10 +98,20 @@ export interface JupyterAssets {
8898
supporting_dir: string;
8999
}
90100

101+
// cf https://github.com/jupyter/nbformat/blob/main/nbformat/v4/nbformat.v4.schema.json
102+
export type JupyterLanguageInfo = {
103+
name: string;
104+
codemirror_mode?: string | Record<string, unknown>;
105+
file_extension?: string;
106+
mimetype?: string;
107+
pygments_lexer?: string;
108+
};
109+
91110
export interface JupyterNotebook {
92111
metadata: {
93112
kernelspec: JupyterKernelspec;
94113
widgets?: Record<string, unknown>;
114+
language_info?: JupyterLanguageInfo;
95115
[key: string]: unknown;
96116
};
97117
cells: JupyterCell[];

src/execute/jupyter/jupyter-kernel.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,8 @@ export async function executeKernelOneshot(
5555
}
5656

5757
trace(options, "Executing notebook with oneshot kernel");
58-
const debug = !!options.format.execute[kExecuteDebug];
58+
const debug = !!options.format.execute[kExecuteDebug] ||
59+
(!!Deno.env.get("QUARTO_JUPYTER_DEBUG"));
5960
const result = await execJupyter(
6061
"execute",
6162
{ ...options, debug },

src/execute/jupyter/jupyter.ts

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,19 @@ export const jupyterEngine: ExecutionEngine = {
181181
let nb: JupyterNotebook | undefined;
182182
if (isJupyterNotebook(file)) {
183183
const nbJSON = Deno.readTextFileSync(file);
184-
nb = JSON.parse(nbJSON) as JupyterNotebook;
184+
const nbRaw = JSON.parse(nbJSON);
185+
186+
// https://github.com/quarto-dev/quarto-cli/issues/12374
187+
// kernelspecs are not guaranteed to have a language field
188+
// so we need to check for it and if not present
189+
// use the language_info.name field
190+
if (
191+
nbRaw.metadata.kernelspec.language === undefined &&
192+
nbRaw.metadata.language_info?.name
193+
) {
194+
nbRaw.metadata.kernelspec.language = nbRaw.metadata.language_info.name;
195+
}
196+
nb = nbRaw as JupyterNotebook;
185197
}
186198

187199
// cache check for percent script

src/resources/jupyter/notebook.py

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ def notebook_execute(options, status):
192192
nb_parameterize(nb, quarto_kernel_setup_options["params"])
193193

194194
# insert setup cell
195-
setup_cell = nb_setup_cell(nb.metadata.kernelspec, quarto_kernel_setup_options)
195+
setup_cell = nb_setup_cell(nb, quarto_kernel_setup_options)
196196
nb.cells.insert(0, setup_cell)
197197

198198
nb_cache = retrieve_nb_from_cache(nb, status, **quarto_kernel_setup_options)
@@ -254,7 +254,8 @@ def handle_meta_object(obj):
254254
if cell.cell_type == 'code':
255255
total_code_cells += 1
256256
# map cells to their labels
257-
label = nb_cell_yaml_options(client.nb.metadata.kernelspec.language, cell).get('label', '')
257+
language = client.nb.metadata.kernelspec.get("language", client.nb.metadata.language_info.name)
258+
label = nb_cell_yaml_options(language, cell).get('label', '')
258259
cell_labels.append(label)
259260
# find max label length
260261
max_label_len = max(max_label_len, len(label))
@@ -350,7 +351,7 @@ def handle_meta_object(obj):
350351
nb_write(client.nb, input)
351352

352353
# execute cleanup cell
353-
cleanup_cell = nb_cleanup_cell(nb.metadata.kernelspec, resource_dir)
354+
cleanup_cell = nb_cleanup_cell(nb, resource_dir)
354355
if cleanup_cell:
355356
kernel_supports_daemonization = True
356357
nb.cells.append(cleanup_cell)
@@ -425,18 +426,20 @@ async def get_info():
425426
def nb_write(nb, input):
426427
nbformat.write(nb, input, version = NB_FORMAT_VERSION)
427428

428-
def nb_setup_cell(kernelspec, options):
429+
def nb_setup_cell(nb, options):
429430
options = dict(options)
430431
options["allow_empty"] = True
431-
return nb_language_cell('setup', kernelspec, **options)
432+
return nb_language_cell('setup', nb, **options)
432433

433-
def nb_cleanup_cell(kernelspec, resource_dir):
434-
return nb_language_cell('cleanup', kernelspec, resource_dir, False)
434+
def nb_cleanup_cell(nb, resource_dir):
435+
return nb_language_cell('cleanup', nb, resource_dir, False)
435436

436-
def nb_language_cell(name, kernelspec, resource_dir, allow_empty, **args):
437-
trace(json.dumps(kernelspec, indent=2))
437+
def nb_language_cell(name, nb, resource_dir, allow_empty, **args):
438+
kernelspec = nb.metadata.kernelspec
439+
language = nb.metadata.kernelspec.get("language", nb.metadata.language_info.name)
440+
trace(json.dumps(nb.metadata, indent=2))
438441
source = ''
439-
lang_dir = os.path.join(resource_dir, 'jupyter', 'lang', kernelspec.language)
442+
lang_dir = os.path.join(resource_dir, 'jupyter', 'lang', language)
440443
if os.path.isdir(lang_dir):
441444
cell_file = glob.glob(os.path.join(lang_dir, name + '.*'))
442445
# base64-encode the run_path given
@@ -445,7 +448,7 @@ def nb_language_cell(name, kernelspec, resource_dir, allow_empty, **args):
445448
with open(cell_file[0], 'r') as file:
446449
source = file.read().format(**args)
447450
else:
448-
trace(f'No {kernelspec.language} directory found in {lang_dir}')
451+
trace(f'No {language} directory found in {lang_dir}')
449452
trace(f'Will look for explicit quarto setup cell information in kernelspec dir')
450453
try:
451454
with open(os.path.join(kernelspec.path, f"quarto_{name}_cell"), 'r') as file:
@@ -500,8 +503,9 @@ def nb_kernel_dependencies(setup_cell):
500503

501504
def cell_execute(client, cell, index, execution_count, eval_default, store_history):
502505

506+
language = client.nb.metadata.kernelspec.get("language", client.nb.metadata.language_info.name)
503507
# read cell options
504-
cell_options = nb_cell_yaml_options(client.nb.metadata.kernelspec.language, cell)
508+
cell_options = nb_cell_yaml_options(language, cell)
505509

506510
# check options for eval and error
507511
eval = cell_options.get('eval', eval_default)
@@ -560,7 +564,7 @@ def clear_user_expressions():
560564
del metadata["user_expressions"]
561565

562566
# find expressions in source
563-
language = client.nb.metadata.kernelspec.language
567+
language = client.nb.metadata.kernelspec.get("language", client.nb.metadata.language_info.name)
564568
source = ''.join(cell.source)
565569
expressions = re.findall(
566570
fr'(?:^|[^`])`{{{language}}}[ \t]([^`]+)`',
@@ -623,7 +627,7 @@ def nb_parameterize(nb, params):
623627

624628
# alias kernel name and language
625629
kernel_name = nb.metadata.kernelspec.name
626-
language = nb.metadata.kernelspec.language
630+
language = nb.metadata.kernelspec.get("language", nb.metadata.language_info.name)
627631

628632
# find params index and note any tags/yaml on it (exit if no params)
629633
params_index = find_first_tagged_cell_index(nb, "parameters")
@@ -701,7 +705,7 @@ def find_first_tagged_cell_index(nb, tag):
701705
return parameters_indices[0]
702706

703707
def nb_strip_yaml_options(client, source):
704-
yaml_lines = nb_cell_yaml_lines(client.nb.metadata.kernelspec.language, source)
708+
yaml_lines = nb_cell_yaml_lines(client.nb.metadata.kernelspec.get("language", client.nb.metadata.language_info.name), source)
705709
num_yaml_lines = len(yaml_lines)
706710
if num_yaml_lines > 0:
707711
return "\n".join(source.splitlines()[num_yaml_lines:])
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 1,
6+
"metadata": {
7+
"colab": {
8+
"base_uri": "https://localhost:8080/"
9+
},
10+
"id": "KSSliJZInRNP",
11+
"outputId": "7d826c0a-e6ab-4f76-f334-4cff966e6253"
12+
},
13+
"outputs": [
14+
{
15+
"name": "stdout",
16+
"output_type": "stream",
17+
"text": [
18+
"Hello, world\n"
19+
]
20+
}
21+
],
22+
"source": [
23+
"print(\"Hello, world\")"
24+
]
25+
}
26+
],
27+
"metadata": {
28+
"colab": {
29+
"provenance": []
30+
},
31+
"kernelspec": {
32+
"display_name": "Python 3",
33+
"name": "python3"
34+
},
35+
"language_info": {
36+
"codemirror_mode": {
37+
"name": "ipython",
38+
"version": 3
39+
},
40+
"file_extension": ".py",
41+
"mimetype": "text/x-python",
42+
"name": "python",
43+
"nbconvert_exporter": "python",
44+
"pygments_lexer": "ipython3",
45+
"version": "3.10.15"
46+
}
47+
},
48+
"nbformat": 4,
49+
"nbformat_minor": 0
50+
}
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
/*
2+
* issue-12374.test.ts
3+
*
4+
* https://github.com/quarto-dev/quarto-cli/issues/12374
5+
*
6+
* Copyright (C) 2023 Posit Software, PBC
7+
*/
8+
9+
import { quarto } from "../../../src/quarto.ts";
10+
import { test } from "../../test.ts";
11+
import { assertEquals } from "testing/asserts";
12+
import { noErrors } from "../../verify.ts";
13+
14+
test({
15+
name: "jupyter:issue-12374.test.ts",
16+
context: {},
17+
execute: async () => {
18+
// https://github.com/quarto-dev/quarto-cli/issues/12374
19+
await quarto(["render",
20+
"docs/jupyter/issue-12374.ipynb",
21+
"--no-execute-daemon", "--execute"]);
22+
},
23+
verify: [noErrors],
24+
type: "smoke",
25+
});

0 commit comments

Comments
 (0)