diff --git a/docs/execute-notebooks.md b/docs/execute-notebooks.md index 8885ef458..b11eb5f06 100644 --- a/docs/execute-notebooks.md +++ b/docs/execute-notebooks.md @@ -39,6 +39,43 @@ If you enable execution with the `--execute` flag as above, the following conten In order to execute your MyST content, you must install a Jupyter Server and the kernel needed to execute your code (e.g., the [IPython kernel](https://ipython.readthedocs.io/en/stable/), the [Xeus Python kernel](https://github.com/jupyter-xeus/xeus-python), or the [IRKernel](https://irkernel.github.io/).) ::: +## How to manage the order of execution? + +### Implicit TOC + +If no table of contents (`toc`) is defined in your myst.yml, all executable sources are run in parallel by default. + +### Explicit TOC + +#### Managing concurrency without dependency order + +By default, executable files are processed concurrently in batches of 5. + +You can modify this behavior by passing the `--execute-concurrency ` option to your build command, where `` specifies how many executable documents should run simultaneously. + +* You can pass `--execute-concurrency ` to your build command to change the number of executable documents that will be executed together. + +#### Defining a specific execution order + +To define a sequential execution order, use the `execution_order` field within the `toc` element. For example: + +```yaml + toc: + - file: paper.md + - file: evidence/figure_1.ipynb + execution_order: 0 + - file: evidence/figure_2.ipynb + execution_order: 1 + - file: evidence/figure_3.ipynb + execution_order: 1 +``` + +In this example, `figure_2.ipynb` and `figure_3.ipynb` will both wait for `figure_1.ipynb` to finish before being executed concurrently. + +:::{warning} Execution flow +If a notebook that other notebooks depend on fails during execution, the build process will continue by default. To stop the build whenever an error occurs (including for notebooks without dependencies) pass the `--strict` flag to your build command. +::: + ## Show raw Python objects like modules and classes By default, MyST will suppress outputs from cells that return **raw** Python objects - like modules and classes - that don't have a string representation. For example with regular Python, you would observe this: diff --git a/packages/myst-cli/src/cli/build.ts b/packages/myst-cli/src/cli/build.ts index 21ad997e6..5edb184d8 100644 --- a/packages/myst-cli/src/cli/build.ts +++ b/packages/myst-cli/src/cli/build.ts @@ -17,6 +17,7 @@ import { makeWatchOption, makeCIOption, makeExecuteOption, + makeExecuteConcurrencyOption, makeMaxSizeWebpOption, makeDOIBibOption, makeCffOption, @@ -30,6 +31,7 @@ export function makeBuildCommand() { .description('Build PDF, LaTeX, Word and website exports from MyST files') .argument('[files...]', 'list of files to export') .addOption(makeExecuteOption('Execute Notebooks')) + .addOption(makeExecuteConcurrencyOption()) .addOption(makePdfOption('Build PDF output')) .addOption(makeTexOption('Build LaTeX outputs')) .addOption(makeTypstOption('Build Typst outputs')) diff --git a/packages/myst-cli/src/cli/options.ts b/packages/myst-cli/src/cli/options.ts index dfdf128c9..f837bd86b 100644 --- a/packages/myst-cli/src/cli/options.ts +++ b/packages/myst-cli/src/cli/options.ts @@ -60,6 +60,15 @@ export function makeExecuteOption(description: string) { return new Option('--execute', description).default(false); } +export function makeExecuteConcurrencyOption() { + return new Option( + '--execute-concurrency ', + 'Maximum number of notebooks to execute concurrently (default: 5)', + ) + .argParser(parseInt) + .default(5); +} + export function makeAllOption(description: string) { return new Option('-a, --all', description).default(false); } diff --git a/packages/myst-cli/src/process/site.ts b/packages/myst-cli/src/process/site.ts index 9c57d305c..8dac40ce0 100644 --- a/packages/myst-cli/src/process/site.ts +++ b/packages/myst-cli/src/process/site.ts @@ -2,6 +2,7 @@ import yaml from 'js-yaml'; import { basename, extname, join } from 'node:path'; import chalk from 'chalk'; import { Inventory, Domains } from 'intersphinx'; +import pLimit from 'p-limit'; import { writeFileToFolder, tic, hashAndCopyStaticFile } from 'myst-cli-utils'; import { RuleId, toText, plural, slugToUrl } from 'myst-common'; import type { SiteConfig, SiteProject } from 'myst-config'; @@ -65,6 +66,8 @@ export type ProcessFileOptions = { extraTransforms?: TransformFn[]; /** Execute flag for notebooks */ execute?: boolean; + /** Max number of notebooks to execute concurrently */ + executeConcurrency?: number; maxSizeWebp?: number; }; @@ -115,6 +118,46 @@ function getReferenceTitleAsText(targetNode: Node): string | undefined { if (caption) return toText(caption); } +/** + * Helper function to group pages by execution order + * + * Sources (as defined in toc) without an execution order will be run in parallel + */ +function groupPagesByExecutionOrder(pages: T[]): T[][] { + const withOrder = pages.filter((p) => p.execution_order !== undefined); + const withoutOrder = pages.filter((p) => p.execution_order === undefined); + + // Lump together the files without order in a batch + if (withOrder.length === 0) { + return [pages]; + } + + // Group by order value + const batchMap = new Map(); + for (const page of withOrder) { + const order = page.execution_order!; + if (!batchMap.has(order)) { + batchMap.set(order, []); + } + batchMap.get(order)!.push(page); + } + + // Sort batches by order (ascending) + const batches = Array.from(batchMap.entries()) + .sort(([a], [b]) => a - b) + .map(([_, batch]) => batch); + + // Unordered pages run in parallel with the first batch (no dependencies) + if (withoutOrder.length > 0 && batches.length > 0) { + batches[0] = [...withoutOrder, ...batches[0]]; + } else if (withoutOrder.length > 0) { + // Only unordered pages + batches.push(withoutOrder); + } + + return batches; +} + /** * Write myst.xref.json file from collected page reference states * @@ -542,6 +585,7 @@ export async function processProject( writeFiles = true, reloadProject, execute, + executeConcurrency, maxSizeWebp, checkLinks, strict, @@ -574,28 +618,89 @@ export async function processProject( .map((part) => { return { file: part }; }); - const pagesToTransform: { file: string; slug?: string; level?: number }[] = [ - ...pages, - ...projectParts, - ]; + const pagesToTransform: { + file: string; + slug?: string; + level?: number; + execution_order?: number; + }[] = [...pages, ...projectParts]; const usedImageExtensions = imageExtensions ?? WEB_IMAGE_EXTENSIONS; // Transform all pages - await Promise.all( - pagesToTransform.map((page) => - transformMdast(session, { - file: page.file, - projectPath: project.path, - projectSlug: siteProject.slug, - pageSlug: page.slug, - imageExtensions: usedImageExtensions, - watchMode, - execute, - extraTransforms, - index: project.index, - offset: page.level ? page.level - 1 : undefined, - }), - ), - ); + // await Promise.all( + // pagesToTransform.map((page) => + // transformMdast(session, { + // file: page.file, + // projectPath: project.path, + // projectSlug: siteProject.slug, + // pageSlug: page.slug, + // imageExtensions: usedImageExtensions, + // watchMode, + // execute, + // extraTransforms, + // index: project.index, + // offset: page.level ? page.level - 1 : undefined, + // }), + // ), + // ); + if (execute) { + // Group pages by execution_order for sequential batch execution + const batches = groupPagesByExecutionOrder(pagesToTransform); + const concurrency = executeConcurrency ?? 5; + const limit = pLimit(concurrency); + + for (const [batchIndex, batch] of batches.entries()) { + if (batches.length > 1) { + session.log.info( + `🍡 Executing batch ${batchIndex + 1}/${batches.length} (${batch.length} file${batch.length > 1 ? 's' : ''}, max ${concurrency} concurrent)`, + ); + } else if (batch.length > concurrency) { + session.log.info(`🍡 Executing ${batch.length} files (max ${concurrency} concurrent)`); + } + + // Execute files within batch with concurrency control + await Promise.all( + batch.map((page) => + limit(() => + transformMdast(session, { + file: page.file, + projectPath: project.path, + projectSlug: siteProject.slug, + pageSlug: page.slug, + imageExtensions: usedImageExtensions, + watchMode, + execute: true, + extraTransforms, + index: project.index, + offset: page.level ? page.level - 1 : undefined, + }), + ), + ), + ); + + if (batches.length > 1) { + session.log.info(`✅ Batch ${batchIndex + 1} complete`); + } + } + } else { + // Fallback to default behavior + await Promise.all( + pagesToTransform.map((page) => + transformMdast(session, { + file: page.file, + projectPath: project.path, + projectSlug: siteProject.slug, + pageSlug: page.slug, + imageExtensions: usedImageExtensions, + watchMode, + execute: false, + extraTransforms, + index: project.index, + offset: page.level ? page.level - 1 : undefined, + }), + ), + ); + } + const pageReferenceStates = selectPageReferenceStates(session, pagesToTransform); // Handle all cross references await Promise.all( diff --git a/packages/myst-cli/src/project/types.ts b/packages/myst-cli/src/project/types.ts index 4cad1fba0..27dbf411a 100644 --- a/packages/myst-cli/src/project/types.ts +++ b/packages/myst-cli/src/project/types.ts @@ -38,6 +38,7 @@ export type LocalProject = { /** The slug that the index get's renamed to for the JSON */ index: string; implicitIndex?: boolean; + execution_order?: number; bibliography: string[]; pages: (LocalProjectPage | LocalProjectFolder | ExternalURL)[]; }; diff --git a/packages/myst-toc/src/toc.ts b/packages/myst-toc/src/toc.ts index 42dc9755b..b9b53d481 100644 --- a/packages/myst-toc/src/toc.ts +++ b/packages/myst-toc/src/toc.ts @@ -20,6 +20,7 @@ import { validateString, validationError, validateBoolean, + validateNumber, } from 'simple-validators'; const COMMON_ENTRY_KEYS = ['title', 'hidden']; @@ -58,7 +59,7 @@ export function validateFileEntry( entry, { required: ['file'], - optional: [...COMMON_ENTRY_KEYS, 'children'], + optional: [...COMMON_ENTRY_KEYS, 'children', 'execution_order'], }, opts, ); @@ -74,6 +75,18 @@ export function validateFileEntry( const commonEntry = validateCommonEntry(intermediate, opts); let output: FileEntry | FileParentEntry = { file, ...commonEntry }; + + if (defined(intermediate.execution_order)) { + const execution_order = validateNumber(intermediate.execution_order, { + ...incrementOptions('execution_order', opts), + integer: true, + min: 0, + }); + if (execution_order !== undefined) { + output.execution_order = execution_order; + } + } + if (defined(entry.children)) { const children = validateList( intermediate.children, diff --git a/packages/myst-toc/src/types.ts b/packages/myst-toc/src/types.ts index 8ef5328cd..35740e3b7 100644 --- a/packages/myst-toc/src/types.ts +++ b/packages/myst-toc/src/types.ts @@ -23,6 +23,7 @@ export type ParentEntry = { */ export type FileEntry = { file: string; + execution_order?: number; } & CommonEntry; /**