Skip to content

Commit 4865c25

Browse files
committed
refactor: switch to text-based parsing for perfect markdown round-trip compatibility
## CHANGES - Replace AST parsing with line-by-line text processing - Preserve original markdown formatting and spacing exactly - Add text-based methods for explode and assemble operations - Fix heading level adjustment to maintain structure - Simplify TOC generation using direct text manipulation - Remove AST tree dependencies from section extraction - Bump version to 1.3.1 for compatibility fix
1 parent d58f978 commit 4865c25

File tree

2 files changed

+121
-66
lines changed

2 files changed

+121
-66
lines changed

bin/md-tree.js

Lines changed: 120 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -460,9 +460,59 @@ For more information, visit: https://github.com/ksylvan/markdown-tree-parser
460460
}
461461

462462
async explodeDocument(filePath, outputDir) {
463+
// Use the text-based approach for perfect round-trip compatibility
464+
return await this.explodeDocumentTextBased(filePath, outputDir);
465+
}
466+
467+
// Text-based explode that preserves original formatting exactly
468+
async explodeDocumentTextBased(filePath, outputDir) {
463469
const content = await this.readFile(filePath);
464-
const tree = await this.parser.parse(content);
465-
const sections = this.parser.extractAllSections(tree, 2);
470+
const lines = content.split('\n');
471+
472+
// Find all level 2 headings and their positions
473+
const sections = [];
474+
let currentSection = null;
475+
476+
for (let i = 0; i < lines.length; i++) {
477+
const line = lines[i];
478+
479+
// Check for main title (level 1)
480+
if (line.match(/^# /)) {
481+
if (currentSection) {
482+
currentSection.endLine = i - 1;
483+
sections.push(currentSection);
484+
}
485+
currentSection = null;
486+
continue;
487+
}
488+
489+
// Check for level 2 heading (section start)
490+
if (line.match(/^## /)) {
491+
if (currentSection) {
492+
currentSection.endLine = i - 1;
493+
sections.push(currentSection);
494+
}
495+
496+
currentSection = {
497+
headingText: line.replace(/^## /, ''),
498+
startLine: i,
499+
endLine: null,
500+
lines: [],
501+
};
502+
continue;
503+
}
504+
505+
// Add line to current section if we're in one
506+
if (currentSection) {
507+
currentSection.lines.push(line);
508+
}
509+
}
510+
511+
// Don't forget the last section
512+
if (currentSection) {
513+
currentSection.endLine = lines.length - 1;
514+
sections.push(currentSection);
515+
}
466516

467517
if (sections.length === 0) {
468518
console.log(
@@ -481,14 +531,13 @@ For more information, visit: https://github.com/ksylvan/markdown-tree-parser
481531
// Keep track of section filenames for index generation
482532
const sectionFiles = [];
483533

484-
// Extract each section to its own file (without numbered prefixes)
485-
for (let i = 0; i < sections.length; i++) {
486-
const section = sections[i];
534+
// Extract each section to its own file
535+
for (const section of sections) {
487536
const headingText = section.headingText;
488537

489-
// Decrement heading levels by 1 so the section starts at level 1
490-
const adjustedTree = this.decrementHeadingLevels(section.tree);
491-
const markdown = await this.parser.stringify(adjustedTree);
538+
// Convert the heading to level 1 and preserve all original content
539+
const sectionLines = [`# ${headingText}`, ...section.lines];
540+
const sectionContent = sectionLines.join('\n');
492541

493542
// Generate filename without numbered prefix
494543
const filename = `${this.sanitizeFilename(headingText)}.md`;
@@ -497,15 +546,14 @@ For more information, visit: https://github.com/ksylvan/markdown-tree-parser
497546
sectionFiles.push({
498547
filename,
499548
headingText,
500-
section: section.tree,
501549
});
502550

503-
await this.writeFile(outputPath, markdown);
551+
await this.writeFile(outputPath, sectionContent);
504552
console.log(`✅ ${headingText}${filename}`);
505553
}
506554

507-
// Generate index.md with modified TOC
508-
const indexContent = await this.generateIndexContent(tree, sectionFiles);
555+
// Generate index.md with original title and TOC pointing to files
556+
const indexContent = await this.generateIndexContentTextBased(content, sectionFiles);
509557
const indexPath = path.join(outputDir, 'index.md');
510558
await this.writeFile(indexPath, indexContent);
511559
console.log(`✅ Table of Contents → index.md`);
@@ -516,59 +564,40 @@ For more information, visit: https://github.com/ksylvan/markdown-tree-parser
516564
}
517565

518566
async generateIndexContent(tree, sectionFiles) {
519-
// Get the original TOC but modify it for file links
520-
const headings = this.parser.getHeadingsList(tree);
567+
// Use the text-based approach for consistency
568+
// Convert the tree back to text to get the original content
569+
const originalContent = await this.parser.stringify(tree);
570+
return await this.generateIndexContentTextBased(originalContent, sectionFiles);
571+
}
521572

522-
if (headings.length === 0) {
523-
return '# Table of Contents\n\nNo headings found.';
524-
}
573+
// Generate index content preserving original spacing
574+
async generateIndexContentTextBased(originalContent, sectionFiles) {
575+
const lines = originalContent.split('\n');
525576

526-
// Find the main title (level 1 heading)
527-
const mainTitle = headings.find((h) => h.level === 1);
528-
let toc = mainTitle ? `# ${mainTitle.text}\n\n` : '';
529-
toc += '## Table of Contents\n\n';
577+
// Find the main title
578+
let mainTitle = 'Table of Contents';
579+
for (const line of lines) {
580+
if (line.match(/^# /)) {
581+
mainTitle = line.replace(/^# /, '');
582+
break;
583+
}
584+
}
530585

531586
// Create a map of section names to filenames for quick lookup
532587
const sectionMap = new Map();
533588
sectionFiles.forEach((file) => {
534589
sectionMap.set(file.headingText.toLowerCase(), file.filename);
535590
});
536591

537-
headings.forEach((heading) => {
538-
const indent = ' '.repeat(Math.max(0, heading.level - 1));
539-
const link = heading.text
540-
.toLowerCase()
541-
.replace(/[^a-z0-9\s-]/g, '')
542-
.replace(/\s+/g, '-')
543-
.replace(/-+/g, '-')
544-
.replace(/^-|-$/g, '');
545-
546-
let linkTarget;
547-
548-
if (heading.level === 1) {
549-
// Main title should link to table of contents
550-
linkTarget = '#table-of-contents';
551-
} else if (heading.level === 2) {
552-
// Level 2 headings link to their individual files
553-
const filename = sectionMap.get(heading.text.toLowerCase());
554-
linkTarget = filename ? `./${filename}` : `#${link}`;
555-
} else {
556-
// Sub-headings link to sections within their parent file
557-
// Find the parent level 2 heading
558-
const parentHeading = this.findParentLevel2Heading(headings, heading);
559-
if (parentHeading) {
560-
const parentFilename = sectionMap.get(
561-
parentHeading.text.toLowerCase()
562-
);
563-
linkTarget = parentFilename
564-
? `./${parentFilename}#${link}`
565-
: `#${link}`;
566-
} else {
567-
linkTarget = `#${link}`;
568-
}
569-
}
592+
// Start with title and TOC heading, preserving original spacing
593+
let toc = `# ${mainTitle}\n\n## Table of Contents\n\n`;
594+
595+
// Add the main title link
596+
toc += `- [${mainTitle}](#table-of-contents)\n`;
570597

571-
toc += `${indent}- [${heading.text}](${linkTarget})\n`;
598+
// Add links for each section
599+
sectionFiles.forEach((file) => {
600+
toc += ` - [${file.headingText}](./${file.filename})\n`;
572601
});
573602

574603
return toc;
@@ -645,7 +674,7 @@ For more information, visit: https://github.com/ksylvan/markdown-tree-parser
645674
// Check if index.md exists
646675
try {
647676
await fs.access(indexPath);
648-
} catch (_error) {
677+
} catch {
649678
console.error(`❌ index.md not found in ${inputDir}`);
650679
process.exit(1);
651680
}
@@ -675,7 +704,7 @@ For more information, visit: https://github.com/ksylvan/markdown-tree-parser
675704
console.log(`📖 Found ${sectionFiles.length} sections to assemble`);
676705

677706
// Start building the reassembled document
678-
let assembledContent = `# ${mainTitle.text}\n\n`;
707+
let assembledContent = `# ${mainTitle.text}\n`;
679708

680709
// Process each section file
681710
for (const sectionFile of sectionFiles) {
@@ -684,24 +713,50 @@ For more information, visit: https://github.com/ksylvan/markdown-tree-parser
684713
const filePath = path.join(inputDir, sectionFile.filename);
685714
try {
686715
const sectionContent = await this.readFile(filePath);
687-
const sectionTree = await this.parser.parse(sectionContent);
688716

689-
// Increment heading levels by 1 to restore original structure
690-
const adjustedTree = this.incrementHeadingLevels(sectionTree);
691-
const sectionMarkdown = await this.parser.stringify(adjustedTree);
717+
// Work directly with text to preserve formatting
718+
const adjustedContent =
719+
this.incrementHeadingLevelsInText(sectionContent);
692720

693-
// Remove the leading heading since it will be a level 2 now
694-
assembledContent += sectionMarkdown + '\n\n';
695-
} catch (_error) {
721+
// Add the section content:
722+
// - After main title: blank line then content (original has blank line after title)
723+
// - Between sections: direct concatenation (original has no spacing between sections)
724+
assembledContent += '\n' + adjustedContent;
725+
} catch {
696726
console.error(
697727
`⚠️ Warning: Could not read ${sectionFile.filename}, skipping...`
698728
);
699729
}
700730
}
701731

702732
// Write the assembled document
703-
await this.writeFile(outputFile, assembledContent.trim());
733+
await this.writeFile(outputFile, assembledContent);
704734
console.log(`\n✨ Document assembled to ${outputFile}`);
735+
} // New method to increment heading levels directly in text without AST roundtrip
736+
incrementHeadingLevelsInText(content) {
737+
const lines = content.split('\n');
738+
let isFirstHeading = true;
739+
740+
const adjustedLines = lines.map((line) => {
741+
// Check if line is a heading (starts with #)
742+
const headingMatch = line.match(/^(#{1,6})(\s+.*)$/);
743+
if (headingMatch) {
744+
const [, hashes, rest] = headingMatch;
745+
746+
// Only increment the first heading (the main section heading)
747+
// This converts the level 1 section heading back to level 2
748+
if (isFirstHeading && hashes === '#') {
749+
isFirstHeading = false;
750+
return '##' + rest;
751+
}
752+
753+
// All other headings remain at their current level
754+
return line;
755+
}
756+
return line;
757+
});
758+
759+
return adjustedLines.join('\n');
705760
}
706761

707762
async extractSectionFilesFromTOC(indexTree) {

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "@kayvan/markdown-tree-parser",
3-
"version": "1.3.0",
3+
"version": "1.3.1",
44
"description": "A powerful JavaScript library and CLI tool for parsing and manipulating markdown files as tree structures using the remark/unified ecosystem",
55
"type": "module",
66
"main": "index.js",

0 commit comments

Comments
 (0)