Skip to content

Commit e703cce

Browse files
authored
Merge pull request #148 from telesoho/fix-Pipe-in-HTML-table-cells-breaks-Markdown-structure
Fix pipe in html table cells breaks markdown structure
2 parents 13f422a + fc64428 commit e703cce

File tree

5 files changed

+292
-6
lines changed

5 files changed

+292
-6
lines changed

CHANGELOG.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
# Change Log
22

3-
## 1.4.5 (2005/08/29)
3+
## 1.4.6 (2026/02/08)
4+
5+
- fix: Pipe | in HTML table cells breaks Markdown structure #146
6+
- fix: Nest list paste from HTML not working #145
7+
8+
## 1.4.5 (2025/08/29)
49

510
- fix: Paste Image is disabled in SSH, and Dev Container remote modes.
611

package.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"name": "vscode-markdown-paste-image",
33
"displayName": "Markdown Paste",
44
"description": "A smartly paste for markdown.",
5-
"version": "1.4.5",
5+
"version": "1.4.6",
66
"publisher": "telesoho",
77
"author": {
88
"name": "telesoho",
@@ -49,7 +49,7 @@
4949
"prettier": "npx prettier -w .",
5050
"compile": "npm run lint && tsc -p ./",
5151
"watch": "tsc -watch -p ./",
52-
"pretest": "rimraf ./out_test && npm run prettier && npm run lint && tsc --project ./ts-test.json && cpy './res/**/*' './out_test/res'",
52+
"pretest": "rimraf ./out_test && npm run prettier && npm run lint && tsc --project ./ts-test.json && npx cpy-cli \"res/**/*\" \"out_test/res\"",
5353
"lint": "eslint src --ext ts",
5454
"test": "node ./out_test/test/runTest.js",
5555
"dev:preinstall": "npx vsce package",

src/toMarkdown.ts

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,18 @@
1+
/**
2+
* Count how many list (ul/ol) ancestors this list item has. Used for nested list indentation.
3+
*/
4+
function getListDepth(liNode: {
5+
parentNode: { nodeName: string; parentNode: unknown } | null;
6+
}): number {
7+
let depth = 0;
8+
let p: { nodeName: string; parentNode: unknown } | null = liNode.parentNode;
9+
while (p) {
10+
if (p.nodeName === "UL" || p.nodeName === "OL") depth++;
11+
p = p.parentNode as { nodeName: string; parentNode: unknown } | null;
12+
}
13+
return depth;
14+
}
15+
116
function genBorder(content, node) {
217
const colspan = parseInt(node.getAttribute("colspan") || "0");
318
let suffix = " " + content + " |";
@@ -14,6 +29,9 @@ function genBorder(content, node) {
1429
}
1530

1631
function cell(content, node) {
32+
// Escape pipe characters in cell content to prevent Markdown parser from treating them as column separators
33+
content = content.replace(/\|/g, "\\|");
34+
1735
const colspan = parseInt(node.getAttribute("colspan") || "0");
1836
let suffix = "|";
1937
if (colspan) {
@@ -127,6 +145,27 @@ function toMarkdown(content, options) {
127145
return content + "\n";
128146
},
129147
},
148+
// Nested list support: add indentation by depth so nested items render correctly (Issue #145)
149+
{
150+
filter: "li",
151+
replacement: function (content, node, opts) {
152+
content = content.replace(/^\n+/, "").replace(/\n+$/, "\n");
153+
const depth = getListDepth(node);
154+
const indent = " "; // 4 spaces per level (CommonMark)
155+
const leadingSpaces = depth > 1 ? indent.repeat(depth - 1) : "";
156+
const continuationIndent = indent.repeat(depth);
157+
content = content.replace(/\n/gm, "\n" + continuationIndent);
158+
let prefix = (opts.bulletListMarker || "*") + " ";
159+
const parent = node.parentNode;
160+
if (parent && parent.nodeName === "OL") {
161+
const start = parent.getAttribute("start");
162+
const index = Array.prototype.indexOf.call(parent.children, node);
163+
prefix = (start ? Number(start) + index : index + 1) + ". ";
164+
}
165+
const lineEnd = node.nextSibling && !/\n$/.test(content) ? "\n" : "";
166+
return leadingSpaces + prefix + content + lineEnd;
167+
},
168+
},
130169
{
131170
filter: ["pre"],
132171
replacement: function (content) {

test/suite/index.ts

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import path from "path";
22
import Mocha from "mocha";
3-
import { glob } from "glob";
3+
import * as fs from "fs";
44

55
export async function run(): Promise<void> {
66
// Create the mocha test
@@ -12,10 +12,27 @@ export async function run(): Promise<void> {
1212
const testsRoot = path.resolve(__dirname, "..");
1313

1414
try {
15-
const files = await glob("**/**.test.js", { cwd: testsRoot });
15+
// Find all test files recursively
16+
function findTestFiles(dir: string, fileList: string[] = []): string[] {
17+
const files = fs.readdirSync(dir);
18+
files.forEach((file) => {
19+
const filePath = path.join(dir, file);
20+
const stat = fs.statSync(filePath);
21+
if (stat.isDirectory()) {
22+
findTestFiles(filePath, fileList);
23+
} else if (file.endsWith(".test.js")) {
24+
fileList.push(path.relative(testsRoot, filePath));
25+
}
26+
});
27+
return fileList;
28+
}
29+
30+
const files = findTestFiles(testsRoot);
1631

1732
// Add files to the test suite
18-
files.forEach((f) => mocha.addFile(path.resolve(testsRoot, f)));
33+
for (const f of files) {
34+
mocha.addFile(path.resolve(testsRoot, f));
35+
}
1936

2037
// Run the mocha test
2138
return new Promise<void>((c, e) => {

test/suite/toMarkdown.test.ts

Lines changed: 225 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,225 @@
1+
//
2+
// Note: This example test is leveraging the Mocha test framework.
3+
// Please refer to their documentation on https://mochajs.org/ for help.
4+
//
5+
6+
// The module 'assert' provides assertion methods from node
7+
import * as assert from "assert";
8+
import { toMarkdown } from "../../src/toMarkdown";
9+
10+
// Defines a Mocha test suite to group tests of similar kind together
11+
suite("toMarkdown Tests", () => {
12+
// Test that pipe characters in table cells are properly escaped
13+
test("should escape pipe characters in table cell content", () => {
14+
const html = `
15+
<table>
16+
<tr>
17+
<td>Option A | Option B</td>
18+
</tr>
19+
</table>
20+
`;
21+
22+
const result = toMarkdown(html, {
23+
emDelimiter: "*",
24+
});
25+
26+
// The pipe character should be escaped as \|
27+
// Expected: | Option A \| Option B |
28+
assert.ok(
29+
result.includes("Option A \\| Option B"),
30+
`Expected escaped pipe, but got: ${result}`
31+
);
32+
33+
// Should not create extra columns (should be a single cell)
34+
// Count the number of pipes in the row (excluding escaped ones)
35+
const rowMatch = result.match(/\n\|[^\n]+\|/);
36+
assert.ok(rowMatch, "Should contain a table row");
37+
if (rowMatch) {
38+
const row = rowMatch[0];
39+
// Count unescaped pipes by replacing escaped pipes temporarily
40+
const tempRow = row.replace(/\\\|/g, "ESCAPED_PIPE");
41+
const unescapedPipes = (tempRow.match(/\|/g) || []).length;
42+
// Should have exactly 2 pipes (start and end of single cell row)
43+
assert.strictEqual(
44+
unescapedPipes,
45+
2,
46+
`Expected 2 unescaped pipes (start and end), but found ${unescapedPipes} in: ${row}`
47+
);
48+
}
49+
});
50+
51+
test("should handle multiple pipe characters in table cell", () => {
52+
const html = `
53+
<table>
54+
<tr>
55+
<td>a |= y | b</td>
56+
</tr>
57+
</table>
58+
`;
59+
60+
const result = toMarkdown(html, {
61+
emDelimiter: "*",
62+
});
63+
64+
// All pipe characters should be escaped
65+
assert.ok(
66+
result.includes("a \\|= y \\| b"),
67+
`Expected all pipes escaped, but got: ${result}`
68+
);
69+
});
70+
71+
test("should handle table with multiple cells containing pipes", () => {
72+
const html = `
73+
<table>
74+
<tr>
75+
<td>Option A | Option B</td>
76+
<td>Value | Test</td>
77+
</tr>
78+
</table>
79+
`;
80+
81+
const result = toMarkdown(html, {
82+
emDelimiter: "*",
83+
});
84+
85+
// Both cells should have escaped pipes
86+
assert.ok(
87+
result.includes("Option A \\| Option B"),
88+
`First cell should have escaped pipe: ${result}`
89+
);
90+
assert.ok(
91+
result.includes("Value \\| Test"),
92+
`Second cell should have escaped pipe: ${result}`
93+
);
94+
95+
// Should have exactly 3 unescaped pipes per row (start, between cells, end)
96+
const rowMatch = result.match(/\n\|[^\n]+\|/);
97+
assert.ok(rowMatch, "Should contain a table row");
98+
if (rowMatch) {
99+
const row = rowMatch[0];
100+
// Count unescaped pipes by replacing escaped pipes temporarily
101+
const tempRow = row.replace(/\\\|/g, "ESCAPED_PIPE");
102+
const unescapedPipes = (tempRow.match(/\|/g) || []).length;
103+
assert.strictEqual(
104+
unescapedPipes,
105+
3,
106+
`Expected 3 unescaped pipes (start, separator, end), but found ${unescapedPipes} in: ${row}`
107+
);
108+
}
109+
});
110+
111+
test("should not affect tables without pipe characters", () => {
112+
const html = `
113+
<table>
114+
<tr>
115+
<td>Normal Cell Content</td>
116+
<td>Another Cell</td>
117+
</tr>
118+
</table>
119+
`;
120+
121+
const result = toMarkdown(html, {
122+
emDelimiter: "*",
123+
});
124+
125+
// Should still work correctly for normal tables
126+
assert.ok(
127+
result.includes("Normal Cell Content"),
128+
"Should contain normal content"
129+
);
130+
assert.ok(result.includes("Another Cell"), "Should contain second cell");
131+
assert.ok(result.includes("|"), "Should contain table structure");
132+
});
133+
134+
test("should handle table headers with pipe characters", () => {
135+
const html = `
136+
<table>
137+
<thead>
138+
<tr>
139+
<th>Header A | Header B</th>
140+
</tr>
141+
</thead>
142+
<tbody>
143+
<tr>
144+
<td>Content</td>
145+
</tr>
146+
</tbody>
147+
</table>
148+
`;
149+
150+
const result = toMarkdown(html, {
151+
emDelimiter: "*",
152+
});
153+
154+
// Header cell should have escaped pipe
155+
assert.ok(
156+
result.includes("Header A \\| Header B"),
157+
`Header should have escaped pipe: ${result}`
158+
);
159+
});
160+
161+
// Issue #145: Nested list paste from HTML (e.g. Outlook/calendar) should preserve nesting
162+
test("should indent nested ordered list items", () => {
163+
const html = `
164+
<p>The agenda is as follows:</p>
165+
<ol>
166+
<li>Next week meeting adjustments (if needed)</li>
167+
<li>Status on assignments (by TAs)</li>
168+
<li>Issues found in past week with students (by TAs)</li>
169+
<li>Next week lecture preparation tasks</li>
170+
<li>Any Other Business
171+
<ol>
172+
<li>Kruskal removal: tried it but cannot due to assignment using it.</li>
173+
</ol>
174+
</li>
175+
</ol>
176+
`;
177+
178+
const result = toMarkdown(html, {
179+
emDelimiter: "*",
180+
});
181+
182+
// Top-level items should be "1. ", "2. ", ... "5. "
183+
assert.ok(
184+
result.includes("1. Next week meeting"),
185+
`Expected top-level item 1: ${result}`
186+
);
187+
assert.ok(
188+
result.includes("5. Any Other Business"),
189+
`Expected top-level item 5: ${result}`
190+
);
191+
192+
// Nested item under "Any Other Business" must be indented (4 spaces) so it renders as sub-item
193+
assert.ok(
194+
result.includes(" 1. Kruskal removal"),
195+
`Expected nested "1." to be indented with 4 spaces (Issue #145). Got: ${result}`
196+
);
197+
assert.ok(
198+
!result.match(/\n1\. Kruskal removal/m) ||
199+
result.includes(" 1. Kruskal removal"),
200+
"Nested item should not appear as top-level 1. without leading spaces"
201+
);
202+
});
203+
204+
test("should indent nested unordered list items", () => {
205+
const html = `
206+
<ul>
207+
<li>First
208+
<ul>
209+
<li>Nested bullet</li>
210+
</ul>
211+
</li>
212+
</ul>
213+
`;
214+
215+
const result = toMarkdown(html, {
216+
emDelimiter: "*",
217+
bulletListMarker: "-",
218+
});
219+
220+
assert.ok(
221+
result.includes(" - Nested bullet"),
222+
`Expected nested bullet indented. Got: ${result}`
223+
);
224+
});
225+
});

0 commit comments

Comments
 (0)