Skip to content

Commit d1895a4

Browse files
Remove extra whitespaces
1 parent 0e597aa commit d1895a4

File tree

3 files changed

+111
-2
lines changed

3 files changed

+111
-2
lines changed

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
"lint:code": "eslint .",
4242
"lint:code:fix": "eslint . --fix",
4343
"postinstall": "patch-package",
44-
"postbuild": "node ./scripts/joinLlmsFiles.mjs"
44+
"postbuild": "node ./scripts/joinLlmsFiles.mjs && node ./scripts/cleanupMarkdownFiles.mjs"
4545
},
4646
"devDependencies": {
4747
"@apify/eslint-config": "^1.0.0",

scripts/cleanupMarkdownFiles.mjs

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
import fs from 'node:fs/promises';
2+
import path from 'node:path';
3+
import { fileURLToPath } from 'node:url';
4+
5+
const __filename = fileURLToPath(import.meta.url);
6+
const __dirname = path.dirname(__filename);
7+
8+
const BUILD_DIR = path.resolve(__dirname, '../build');
9+
10+
/**
11+
* String-based cleanup function for removing LLM button text from markdown content.
12+
*/
13+
function removeLlmButtonsFromString(markdownContent) {
14+
if (!markdownContent) return markdownContent;
15+
16+
let cleaned = markdownContent;
17+
18+
// Remove LLM button text patterns
19+
cleaned = cleaned.replace(/View as MarkdownCopy for LLM/g, '');
20+
cleaned = cleaned.replace(/View as Markdown/g, '');
21+
cleaned = cleaned.replace(/Copy for LLM/g, '');
22+
23+
// Remove lines that only contain LLM button text
24+
cleaned = cleaned.replace(/^[^\n]*View as Markdown[^\n]*$/gm, '');
25+
cleaned = cleaned.replace(/^[^\n]*Copy for LLM[^\n]*$/gm, '');
26+
27+
// Clean up excessive whitespace and empty lines
28+
cleaned = cleaned.replace(/\n{3,}/g, '\n\n'); // Replace 3+ newlines with 2
29+
cleaned = cleaned.replace(/^\s+$/gm, ''); // Remove lines that are only whitespace
30+
cleaned = cleaned.replace(/\n\s*\n\s*\n/g, '\n\n'); // Clean up multiple empty lines
31+
32+
return cleaned.trim();
33+
}
34+
35+
/**
36+
* Recursively find and process all .md files in the build directory.
37+
*/
38+
async function processMarkdownFiles(dir) {
39+
try {
40+
const entries = await fs.readdir(dir, { withFileTypes: true });
41+
42+
for (const entry of entries) {
43+
const fullPath = path.join(dir, entry.name);
44+
45+
if (entry.isDirectory()) {
46+
// Recursively process subdirectories
47+
await processMarkdownFiles(fullPath);
48+
} else if (entry.name.endsWith('.md')) {
49+
// Process markdown files
50+
console.log(`Processing: ${fullPath}`);
51+
52+
try {
53+
const content = await fs.readFile(fullPath, 'utf8');
54+
const cleanedContent = removeLlmButtonsFromString(content);
55+
56+
if (content !== cleanedContent) {
57+
await fs.writeFile(fullPath, cleanedContent, 'utf8');
58+
console.log(` ✓ Cleaned: ${fullPath}`);
59+
}
60+
} catch (error) {
61+
console.error(` ✗ Error processing ${fullPath}:`, error.message);
62+
}
63+
}
64+
}
65+
} catch (error) {
66+
console.error(`Error reading directory ${dir}:`, error.message);
67+
}
68+
}
69+
70+
/**
71+
* Main function to clean up markdown files.
72+
*/
73+
async function cleanupMarkdownFiles() {
74+
console.log('Starting markdown cleanup...');
75+
76+
if (!await fs.stat(BUILD_DIR).catch(() => false)) {
77+
console.error(`Build directory not found: ${BUILD_DIR}`);
78+
process.exit(1);
79+
}
80+
81+
await processMarkdownFiles(BUILD_DIR);
82+
console.log('Markdown cleanup completed!');
83+
}
84+
85+
// Run the cleanup if this script is executed directly
86+
if (import.meta.url === `file://${process.argv[1]}`) {
87+
cleanupMarkdownFiles().catch(console.error);
88+
}
89+
90+
export { cleanupMarkdownFiles, removeLlmButtonsFromString };

tools/utils/removeLlmButtons.js

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,32 @@ function removeLlmButtons() {
1212
if (node.value && (
1313
node.value.includes('View as Markdown') ||
1414
node.value.includes('Copy for LLM') ||
15-
node.value.includes('View as MarkdownCopy for LLM')
15+
node.value.includes('View as MarkdownCopy for LLM') ||
16+
node.value.trim() === 'View as Markdown' ||
17+
node.value.trim() === 'Copy for LLM'
1618
)) {
1719
// Remove the text node
1820
parent.children.splice(index, 1);
1921
return index; // Adjust index after removal
2022
}
2123
});
24+
25+
// Clean up empty paragraphs that resulted from text removal
26+
visit(tree, 'paragraph', (node, index, parent) => {
27+
// Check if paragraph is empty or only contains whitespace
28+
const hasContent = node.children && node.children.some(child => {
29+
if (child.type === 'text') {
30+
return child.value && child.value.trim().length > 0;
31+
}
32+
return true; // Keep non-text nodes
33+
});
34+
35+
if (!hasContent) {
36+
// Remove empty paragraph
37+
parent.children.splice(index, 1);
38+
return index;
39+
}
40+
});
2241
};
2342
}
2443

0 commit comments

Comments
 (0)