Skip to content

Commit 1a16cee

Browse files
Merge pull request #2086 from NullVoxPopuli/nvp/fix-component-escaping-in-remark-land
Fix component escaping from markdown
2 parents 383164d + 88fdecf commit 1a16cee

File tree

7 files changed

+253
-24
lines changed

7 files changed

+253
-24
lines changed

packages/repl-sdk/babel.config.cjs

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
/**
2+
* This babel.config is used for linting only
3+
*/
4+
module.exports = {
5+
plugins: [
6+
[
7+
'module:decorator-transforms',
8+
{
9+
runtime: {
10+
import: require.resolve('decorator-transforms/runtime-esm'),
11+
},
12+
},
13+
],
14+
],
15+
16+
generatorOpts: {
17+
compact: false,
18+
},
19+
};

packages/repl-sdk/package.json

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,14 @@
2222
"type": "addon"
2323
},
2424
"scripts": {
25-
"lint:fix": "pnpm -w exec lint fix",
25+
"lint": "concurrently \"pnpm:lint:*(!fix)\" --names \"lint:\" --prefixColors auto",
26+
"lint:fix": "concurrently \"pnpm:lint:*:fix\" --names \"fix:\" --prefixColors auto && pnpm run format",
2627
"example": "cd example && vite",
2728
"lint:package": "pnpm publint",
2829
"lint:js": "pnpm -w exec lint js",
2930
"lint:types": "tsc --noEmit",
3031
"lint:js:fix": "pnpm -w exec lint js:fix",
31-
"lint:prettier:fix": "pnpm -w exec lint prettier:fix",
32+
"format": "pnpm -w exec lint prettier:fix",
3233
"lint:prettier": "pnpm -w exec lint prettier",
3334
"test:node": "vitest"
3435
},
@@ -56,6 +57,7 @@
5657
"@types/hast": "^3.0.4",
5758
"@types/mdast": "^4.0.4",
5859
"common-tags": "^1.8.2",
60+
"decorator-transforms": "2.3.1",
5961
"eslint": "^9.39.1",
6062
"prettier": "^3.7.4",
6163
"publint": "^0.3.16",

packages/repl-sdk/src/compilers/markdown/parse.js

Lines changed: 52 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
* @property {CodeBlock[]} codeBlocks
1313
*/
1414

15+
import remarkEscapeComponents, { REPL_LT } from '../../remark-escape-components.js';
1516
import { buildCompiler } from './build-compiler.js';
1617

1718
export { buildCompiler } from './build-compiler.js';
@@ -22,36 +23,67 @@ export { buildCompiler } from './build-compiler.js';
2223
* @returns {Promise<ParseResult>}
2324
*/
2425
export async function parseMarkdown(input, options) {
25-
const markdownCompiler = options?.compiler ?? buildCompiler(options);
26+
const markdownCompiler =
27+
options?.compiler ??
28+
buildCompiler({
29+
...options,
30+
remarkPlugins: [...(options?.remarkPlugins || []), remarkEscapeComponents],
31+
});
2632
const processed = await markdownCompiler.process(input);
2733
const liveCode = /** @type {CodeBlock[]} */ (processed.data.liveCode || []);
2834
// @ts-ignore - processed is typed as unknown due to unified processor complexity
2935
let templateOnly = processed.toString();
3036

31-
// Unescape PascalCase components that had only the opening < HTML-entity escaped
32-
// BUT only outside of <pre><code> blocks where escaping should be preserved
33-
// (inline <code> tags should have components unescaped)
34-
// Split by <pre><code>...</code></pre> to exclude only code blocks
35-
const parts = templateOnly.split(/(<pre[^>]*>.*?<\/pre>)/is);
37+
// 1. Convert the placeholder written by the remark plugin to &#x3C;
38+
// This placeholder survives the entire unified pipeline without being
39+
// entity-encoded, so no double-escaping can occur.
40+
if (REPL_LT) {
41+
templateOnly = templateOnly.replaceAll(REPL_LT, '&#x3C;');
42+
}
43+
44+
// 2. The pipeline may HTML-escape `<` for PascalCase component invocations
45+
// that appear in regular markdown (outside code/backticks). Undo that so
46+
// Glimmer can still invoke them. We only unescape outside <code> elements
47+
// (and outside <pre> blocks) to preserve escaping in code.
48+
templateOnly = unescapeComponentsOutsideCode(templateOnly);
49+
50+
return { text: templateOnly, codeBlocks: liveCode };
51+
}
52+
53+
/**
54+
* Undo HTML-escaping of PascalCase component tags that appear outside
55+
* `<code>` and `<pre>` blocks so Glimmer can invoke them.
56+
*
57+
* @param {string} html
58+
* @returns {string}
59+
*/
60+
function unescapeComponentsOutsideCode(html) {
61+
// Split by <pre>…</pre> blocks first – never touch code fences.
62+
const parts = html.split(/(<pre[\s\S]*?<\/pre>)/gi);
3663

3764
for (let i = 0; i < parts.length; i++) {
38-
const part = parts[i];
39-
40-
// Only process parts that are NOT pre blocks (odd indices are pre blocks)
41-
if (i % 2 === 0 && part) {
42-
// Pattern: &#x3C;ComponentName ... / > (only < is escaped as &#x3C;)
43-
parts[i] = part.replace(/&#x3C;([A-Z][a-zA-Z0-9]*\s[^<]*?)>/g, (match, content) => {
44-
// Only unescape if it contains @ (attribute) indicating a component
45-
if (content.includes('@')) {
46-
return `<${content}>`;
65+
// Only touch content outside <pre>
66+
if (i % 2 === 0) {
67+
// Split by <code>…</code> so we skip inline code too
68+
const part = parts[i] ?? '';
69+
const codeParts = part.split(/(<code[^>]*>[\s\S]*?<\/code>)/gi);
70+
71+
for (let j = 0; j < codeParts.length; j++) {
72+
const segment = codeParts[j];
73+
74+
// Even indices are outside <code> – unescape PascalCase there
75+
if (j % 2 === 0 && segment) {
76+
codeParts[j] = segment
77+
.replace(/&#x3C;([A-Z][a-zA-Z0-9]*\s[^<]*?)>/g, (_m, content) =>
78+
content.includes('@') ? `<${content}>` : _m
79+
)
80+
.replace(/&#x3C;\/([A-Z][a-zA-Z0-9]*)>/g, '</$1>');
4781
}
82+
}
4883

49-
return match;
50-
});
84+
parts[i] = codeParts.join('');
5185
}
5286
}
5387

54-
templateOnly = parts.join('');
55-
56-
return { text: templateOnly, codeBlocks: liveCode };
88+
return parts.join('');
5789
}

packages/repl-sdk/src/compilers/markdown/parse.test.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ describe('default features', () => {
230230
expect(result).toMatchInlineSnapshot(`
231231
{
232232
"codeBlocks": [],
233-
"text": "<h2 id="hello-foo-two"><code><Hello @foo="two" /></code></h2>",
233+
"text": "<h2 id="hello-foo-two"><code>&#x3C;Hello @foo="two" /></code></h2>",
234234
}
235235
`);
236236
});
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
import { visit } from 'unist-util-visit';
2+
3+
/**
4+
* A unique placeholder that replaces `<` in PascalCase component tags
5+
* inside inline code and non-live code fences. It survives the unified
6+
* pipeline without being entity-encoded, and is converted to `&#x3C;`
7+
* in the final post-processing step inside `parseMarkdown()`.
8+
*/
9+
export const REPL_LT = '__REPL_LT__';
10+
11+
/**
12+
* Remark plugin: escape PascalCase component tags in `inlineCode` and
13+
* non-live `code` (code fence) nodes by replacing `<` with a placeholder.
14+
*/
15+
function remarkEscapeComponents() {
16+
/** @param {import('mdast').Root} tree */
17+
return (tree) => {
18+
visit(tree, (node) => {
19+
// Inline code (backticks)
20+
if (node.type === 'inlineCode') {
21+
node.value = node.value
22+
.replace(/<([A-Z][a-zA-Z0-9]*(?:\s[^<]*)?)>/g, REPL_LT + '$1>')
23+
.replace(/<\/([A-Z][a-zA-Z0-9]*)>/g, REPL_LT + '/$1>');
24+
}
25+
26+
// Code fences (``` blocks)
27+
if (node.type === 'code') {
28+
// Only escape if not live
29+
if (!/\blive\b/.test(node.meta || '')) {
30+
node.value = node.value
31+
.replace(/<([A-Z][a-zA-Z0-9]*(?:\s[^<]*)?)>/g, REPL_LT + '$1>')
32+
.replace(/<\/([A-Z][a-zA-Z0-9]*)>/g, REPL_LT + '/$1>');
33+
}
34+
}
35+
});
36+
};
37+
}
38+
39+
export default remarkEscapeComponents;

packages/repl-sdk/tests-node/markdown-parse.test.ts

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,47 @@ describe('markdown/parse', () => {
88
expect(result.text).toMatchInlineSnapshot(`"<h1 id="hello">hello</h1>"`);
99
expect(result.codeBlocks).toMatchInlineSnapshot(`[]`);
1010
});
11+
12+
test('it escapes component invocations in code blocks', async () => {
13+
const result = await parseMarkdown(
14+
`# Comp in tag
15+
16+
comp: \`<Portal @to="popover">\`
17+
18+
tag: \`<div>\`
19+
20+
## Install
21+
22+
\`\`\`hbs live
23+
<SetupInstructions @src="components/portal-targets.gts" />
24+
\`\`\`
25+
`,
26+
{
27+
isLive: (meta) => meta.includes('live'),
28+
ALLOWED_FORMATS: ['hbs'],
29+
getFlavorFromMeta: () => undefined,
30+
isPreview: () => false,
31+
isBelow: () => false,
32+
}
33+
);
34+
35+
expect(result.text).toMatchInlineSnapshot(`
36+
"<h1 id="comp-in-tag">Comp in tag</h1>
37+
<p>comp: <code>&#x3C;Portal @to="popover"></code></p>
38+
<p>tag: <code>&#x3C;div></code></p>
39+
<h2 id="install">Install</h2>
40+
<div id="repl_1" class="repl-sdk__demo"></div>"
41+
`);
42+
expect(result.codeBlocks).toMatchInlineSnapshot(`
43+
[
44+
{
45+
"code": "<SetupInstructions @src="components/portal-targets.gts" />",
46+
"flavor": undefined,
47+
"format": "hbs",
48+
"meta": "live",
49+
"placeholderId": "repl_1",
50+
},
51+
]
52+
`);
53+
});
1154
});

pnpm-lock.yaml

Lines changed: 95 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)