From 80503f872f7485cd27bc3e0f6c684ecf7ffb9daf Mon Sep 17 00:00:00 2001 From: elliot Date: Wed, 6 Aug 2025 10:11:44 -0400 Subject: [PATCH 1/2] fix code block capsule leak by inspecting str tokens --- packages/editor/src/api/pandoc_capsule.ts | 22 ++++++++++ packages/editor/src/nodes/code_block.ts | 50 ++++++++++++++--------- 2 files changed, 53 insertions(+), 19 deletions(-) diff --git a/packages/editor/src/api/pandoc_capsule.ts b/packages/editor/src/api/pandoc_capsule.ts index 2f4fed03..09437032 100644 --- a/packages/editor/src/api/pandoc_capsule.ts +++ b/packages/editor/src/api/pandoc_capsule.ts @@ -259,6 +259,28 @@ export function blockCapsuleParagraphTokenHandler(type: string) { }; } +export function blockCapsuleStrTokenHandler(type: string) { + const tokenRegex = encodedBlockCapsuleRegex('^', '$'); + return (tok: PandocToken) => { + if (tok.t === PandocTokenType.Str) { + const text = tok.c as string; + const match = text.match(tokenRegex); + if (match) { + const capsuleRecord = parsePandocBlockCapsule(match[0]); + if (capsuleRecord.type === type) { + return match[0]; + } + } + } + return null; + }; +} + +export const blockCapsuleHandlerOr = ( + handler1: (tok: PandocToken) => string | null, + handler2: (tok: PandocToken) => string | null +) => (tok: PandocToken) => handler1(tok) ?? handler2(tok); + // create a regex that can be used to match a block capsule export function encodedBlockCapsuleRegex(prefix?: string, suffix?: string, flags?: string) { return new RegExp( diff --git a/packages/editor/src/nodes/code_block.ts b/packages/editor/src/nodes/code_block.ts index b43db140..8a12ca71 100644 --- a/packages/editor/src/nodes/code_block.ts +++ b/packages/editor/src/nodes/code_block.ts @@ -35,7 +35,7 @@ import { hasFencedCodeBlocks } from '../api/pandoc_format'; import { precedingListItemInsertPos, precedingListItemInsert } from '../api/list'; import { EditorOptions } from '../api/options'; import { OmniInsertGroup } from '../api/omni_insert'; -import { blockCapsuleParagraphTokenHandler, blockCapsuleSourceWithoutPrefix, blockCapsuleTextHandler, encodedBlockCapsuleRegex, PandocBlockCapsule, PandocBlockCapsuleFilter } from '../api/pandoc_capsule'; +import { blockCapsuleHandlerOr, blockCapsuleParagraphTokenHandler, blockCapsuleSourceWithoutPrefix, blockCapsuleStrTokenHandler, blockCapsuleTextHandler, encodedBlockCapsuleRegex, PandocBlockCapsule, PandocBlockCapsuleFilter } from '../api/pandoc_capsule'; const kNoAttributesSentinel = 'CEF7FA46'; @@ -70,12 +70,12 @@ const extension = (context: ExtensionContext): Extension => { const fontClass = 'pm-fixedwidth-font'; const attrs = hasAttr ? pandocAttrToDomAttr({ - ...node.attrs, - classes: [...node.attrs.classes, fontClass], - }) + ...node.attrs, + classes: [...node.attrs.classes, fontClass], + }) : { - class: fontClass, - }; + class: fontClass, + }; return ['pre', attrs, ['code', 0]]; }, }, @@ -114,19 +114,19 @@ const extension = (context: ExtensionContext): Extension => { } } } - + output.writeToken(PandocTokenType.CodeBlock, () => { if (hasAttr) { const id = pandocExtensions.fenced_code_attributes ? node.attrs.id : ''; const keyvalue = pandocExtensions.fenced_code_attributes ? node.attrs.keyvalue : []; - + // if there are no attributes this will end up outputting a code block // without the fence markers (rather indenting the code block 4 spaces). // we don't want this so we add a sentinel class to the attributes to // force the fence markers (which we then cleanup below in the postprocessor) const classes = [...node.attrs.classes]; if (!pandocAttrAvailable(node.attrs) && pandocExtensions.backtick_code_blocks) { - classes.push(kNoAttributesSentinel) + classes.push(kNoAttributesSentinel); } output.writeAttr(id, classes, keyvalue); @@ -138,11 +138,11 @@ const extension = (context: ExtensionContext): Extension => { }, blockCapsuleFilter: escapedRmdChunkBlockCapsuleFilter(), markdownPostProcessor: (markdown: string) => { - // cleanup the sentinel classes we may have added above + // cleanup the sentinel classes we may have added above if (pandocExtensions.backtick_code_blocks) { markdown = markdown.replace( - new RegExp("``` " + kNoAttributesSentinel, 'g'), - "``` " + " ".repeat(kNoAttributesSentinel.length) + new RegExp("``` " + kNoAttributesSentinel, 'g'), + "``` " + " ".repeat(kNoAttributesSentinel.length) ); } return markdown; @@ -301,9 +301,9 @@ function codeBlockAttrEdit(pandocExtensions: PandocExtensions, pandocCapabilitie tags.push(`#${node.attrs.id}`); } if (node.attrs.classes) { - for (let i=1; i 0) { const lang = node.attrs.classes[0]; if (pandocCapabilities.highlight_languages.includes(lang) || lang === 'tex') { @@ -315,7 +315,7 @@ function codeBlockAttrEdit(pandocExtensions: PandocExtensions, pandocCapabilitie } if (node.attrs.keyvalue && node.attrs.keyvalue.length) { tags.push(`${node.attrs.keyvalue.map( - (kv: [string,string]) => kv[0] + '="' + (kv[1] || '1') + '"').join(' ')} + (kv: [string, string]) => kv[0] + '="' + (kv[1] || '1') + '"').join(' ')} `); } return tags; @@ -364,9 +364,17 @@ export function escapedRmdChunkBlockCapsuleFilter(): PandocBlockCapsuleFilter { encodedBlockCapsuleRegex(undefined, undefined, 'gm'), ), - // we are looking for a paragraph token consisting entirely of a block capsule of our type. - // if find that then return the block capsule text - handleToken: blockCapsuleParagraphTokenHandler(kEscapedRmdChunkBlockCapsuleType), + // we are looking for a paragraph token consisting entirely of a block capsule of our type + // OR a string token with a block capsule of our type. if find that then return the + // block capsule text. + // Historical note: we were previously only using the paragraph handler, but it did not work if the + // code block did not have a blank line between it and the previous paragraph becuase + // Pandoc would parse the block capsule into the end of the that paragraph. + handleToken: + blockCapsuleHandlerOr( + blockCapsuleParagraphTokenHandler(kEscapedRmdChunkBlockCapsuleType), + blockCapsuleStrTokenHandler(kEscapedRmdChunkBlockCapsuleType) + ), // write the node writeNode: (schema: Schema, writer: ProsemirrorWriter, capsule: PandocBlockCapsule) => { @@ -377,8 +385,12 @@ export function escapedRmdChunkBlockCapsuleFilter(): PandocBlockCapsuleFilter { const sourceLines = lines(source); sourceLines[0] = sourceLines[0].replace(/^(```+)\{(\{+[^}]+\}+)\}([ \t]*)$/, "$1$2$3"); - // write the node + const isWritingInsideParagraph = writer.isNodeOpen(schema.nodes.paragraph); + // We can't write code blocks inside of paragraphs, so let's temporarily leave the paragraph + // before reopening it after writing the code block + if (isWritingInsideParagraph) writer.closeNode(); writer.addNode(schema.nodes.code_block, {}, [schema.text(sourceLines.join("\n"))]); + if (isWritingInsideParagraph) writer.openNode(schema.nodes.paragraph, {}); }, }; } From bed8a728e54d152e05c0ed9bfdf02cb749925da2 Mon Sep 17 00:00:00 2001 From: elliot Date: Tue, 12 Aug 2025 14:23:29 -0400 Subject: [PATCH 2/2] add capsule-leak snapshot test --- apps/vscode/src/test/examples/capsule-leak.qmd | 5 +++++ .../generated_snapshots/roundtripped-capsule-leak.qmd | 7 +++++++ apps/vscode/src/test/examples/roundtrip-changes.qmd | 7 ------- apps/vscode/src/test/quartoDoc.test.ts | 7 +++++++ 4 files changed, 19 insertions(+), 7 deletions(-) create mode 100644 apps/vscode/src/test/examples/capsule-leak.qmd create mode 100644 apps/vscode/src/test/examples/generated_snapshots/roundtripped-capsule-leak.qmd diff --git a/apps/vscode/src/test/examples/capsule-leak.qmd b/apps/vscode/src/test/examples/capsule-leak.qmd new file mode 100644 index 00000000..a88ce01c --- /dev/null +++ b/apps/vscode/src/test/examples/capsule-leak.qmd @@ -0,0 +1,5 @@ +The lack of newline between this text and the code block previously caused a capsule leak. See https://github.com/quarto-dev/quarto/pull/780 +```{{python}} +1+2 +``` +lets also snapshot what happens to this text after the code block diff --git a/apps/vscode/src/test/examples/generated_snapshots/roundtripped-capsule-leak.qmd b/apps/vscode/src/test/examples/generated_snapshots/roundtripped-capsule-leak.qmd new file mode 100644 index 00000000..c2c7ba27 --- /dev/null +++ b/apps/vscode/src/test/examples/generated_snapshots/roundtripped-capsule-leak.qmd @@ -0,0 +1,7 @@ +The lack of newline between this text and the code block previously caused a capsule leak. See https://github.com/quarto-dev/quarto/pull/780 + +```{{python}} +1+2 +``` + +lets also snapshot what happens to this text after the code block \ No newline at end of file diff --git a/apps/vscode/src/test/examples/roundtrip-changes.qmd b/apps/vscode/src/test/examples/roundtrip-changes.qmd index c8438342..ea5236fb 100644 --- a/apps/vscode/src/test/examples/roundtrip-changes.qmd +++ b/apps/vscode/src/test/examples/roundtrip-changes.qmd @@ -4,13 +4,6 @@ hi ``` `````` -kBlockCapsuleSentinel uuid sentinel leak during SE→VE -``````{{python}} -``` -dog -``` -`````` - ````` ```{python} a = 3 diff --git a/apps/vscode/src/test/quartoDoc.test.ts b/apps/vscode/src/test/quartoDoc.test.ts index 7782909b..4b27cbce 100644 --- a/apps/vscode/src/test/quartoDoc.test.ts +++ b/apps/vscode/src/test/quartoDoc.test.ts @@ -57,4 +57,11 @@ suite("Quarto basics", function () { assert.equal(after, await readOrCreateSnapshot("roundtripped-invalid.qmd", after)); }); + test("Roundtripped capsule-leak.qmd matches snapshot", async function () { + const { doc } = await openAndShowTextDocument("capsule-leak.qmd"); + + const { after } = await roundtrip(doc); + + assert.equal(after, await readOrCreateSnapshot("roundtripped-capsule-leak.qmd", after)); + }); });