@@ -126,6 +126,9 @@ class FernScribeGitHub {
126126 this . urlMapper = new FernUrlMapper ( process . env . GITHUB_TOKEN , process . env . REPOSITORY ) ;
127127 this . productSlugToDir = parseProductRootMapping ( ) ;
128128 this . learnToFile = parseLearnToFileMapping ( ) ;
129+
130+ // Track files that failed MDX validation
131+ this . mdxValidationFailures = [ ] ;
129132 }
130133
131134 async init ( ) {
@@ -642,6 +645,17 @@ class FernScribeGitHub {
642645 }
643646
644647 async generateContent ( filePath , existingContent , context , fernStructure ) {
648+ // Check if content needs chunking
649+ const CHUNK_THRESHOLD = 12000 ; // Chars threshold to decide when to chunk
650+ if ( existingContent . length <= CHUNK_THRESHOLD ) {
651+ return this . generateSingleContent ( filePath , existingContent , context , fernStructure ) ;
652+ } else {
653+ console . log ( ` 📊 Large file detected (${ existingContent . length } chars) - using chunked processing` ) ;
654+ return this . generateChunkedContent ( filePath , existingContent , context , fernStructure ) ;
655+ }
656+ }
657+
658+ async generateSingleContent ( filePath , existingContent , context , fernStructure ) {
645659 const prompt = `${ this . systemPrompt }
646660
647661## Context
@@ -713,6 +727,159 @@ Complete updated file content:`;
713727 }
714728 }
715729
730+ async generateChunkedContent ( filePath , existingContent , context , fernStructure ) {
731+ const chunks = this . chunkContent ( existingContent , 8000 ) ;
732+ const updatedChunks = [ ] ;
733+ let hasChanges = false ;
734+
735+ console . log ( ` 🧩 Processing ${ chunks . length } chunks for ${ filePath } ` ) ;
736+
737+ for ( let i = 0 ; i < chunks . length ; i ++ ) {
738+ const chunk = chunks [ i ] ;
739+ console . log ( ` 📝 Processing chunk ${ i + 1 } /${ chunks . length } ${ chunk . section ? ` (${ chunk . section } )` : '' } ` ) ;
740+
741+ const chunkPrompt = `${ this . systemPrompt }
742+
743+ ## Context
744+ File: ${ filePath }
745+ Chunk: ${ i + 1 } of ${ chunks . length } ${ chunk . section ? ` - Section: "${ chunk . section } "` : '' }
746+ Request: ${ context . requestDescription }
747+ Existing Instructions: ${ context . existingInstructions }
748+ Why Current Approach Doesn't Work: ${ context . whyNotWork }
749+ Additional Context: ${ context . additionalContext }
750+ ${ context . slackThreadContent ? `\n## Slack Discussion Context\n${ context . slackThreadContent } ` : '' }
751+
752+ ## Fern Docs Structure Reference
753+ ${ fernStructure }
754+
755+ ## Current Chunk Content
756+ ${ chunk . content }
757+
758+ ## Instructions
759+ ${ chunk . isComplete ?
760+ 'This is the final chunk of the file. Update this section to address the documentation request.' :
761+ `This is chunk ${ i + 1 } of ${ chunks . length } from a larger file. Update only this section as needed to address the documentation request. Do not add or remove section headers unless specifically needed for this chunk.`
762+ }
763+
764+ Focus on:
765+ - Addressing the specific documentation gaps mentioned in the request
766+ - Improving clarity and completeness within this chunk
767+ - Maintaining consistency with Fern documentation patterns
768+ - Preserving the existing structure and flow
769+
770+ CRITICAL MDX SYNTAX REQUIREMENTS:
771+ - ALL opening tags MUST have corresponding closing tags (e.g., <ParamField> must have </ParamField>)
772+ - Self-closing tags must use proper syntax (e.g., <ParamField param="value" />)
773+ - Preserve existing MDX component structure exactly
774+ - When adding new ParamField, CodeBlock, or other components, ensure they are properly closed
775+ - Check that every < has a matching >
776+ - Validate that nested components are properly structured
777+
778+ IMPORTANT: Return ONLY the updated chunk content. Do not include any explanatory text, meta-commentary, or descriptions about what you're doing.
779+
780+ Updated chunk content:` ;
781+
782+ try {
783+ const response = await httpRequest ( 'https://api.anthropic.com/v1/messages' , {
784+ method : 'POST' ,
785+ headers : {
786+ 'x-api-key' : this . anthropicApiKey ,
787+ 'content-type' : 'application/json' ,
788+ 'anthropic-version' : '2023-06-01'
789+ } ,
790+ body : JSON . stringify ( {
791+ model : 'claude-3-5-sonnet-20241022' ,
792+ max_tokens : 4096 ,
793+ messages : [ {
794+ role : 'user' ,
795+ content : chunkPrompt
796+ } ]
797+ } )
798+ } ) ;
799+
800+ if ( ! response . ok ) {
801+ const errorText = await response . text ( ) ;
802+ console . error ( `❌ Anthropic API error for chunk ${ i + 1 } :` , errorText ) ;
803+ updatedChunks . push ( chunk . content ) ; // Use original chunk
804+ continue ;
805+ }
806+
807+ const data = await response . json ( ) ;
808+ const updatedChunkContent = data . content [ 0 ] ?. text || chunk . content ;
809+
810+ // Validate the chunk
811+ const validationResult = this . validateMDXContent ( updatedChunkContent ) ;
812+ if ( ! validationResult . isValid ) {
813+ console . warn ( `⚠️ MDX validation warnings for chunk ${ i + 1 } :` , validationResult . warnings ) ;
814+ updatedChunks . push ( chunk . content ) ; // Use original chunk if validation fails
815+ } else {
816+ updatedChunks . push ( updatedChunkContent ) ;
817+ if ( updatedChunkContent !== chunk . content ) {
818+ hasChanges = true ;
819+ console . log ( ` ✅ Updated chunk ${ i + 1 } (${ chunk . content . length } → ${ updatedChunkContent . length } chars)` ) ;
820+ } else {
821+ console . log ( ` ℹ️ No changes for chunk ${ i + 1 } ` ) ;
822+ }
823+ }
824+
825+ } catch ( error ) {
826+ console . error ( `❌ Error processing chunk ${ i + 1 } :` , error . message ) ;
827+ updatedChunks . push ( chunk . content ) ; // Use original chunk
828+ }
829+
830+ // Add a small delay between chunks to be respectful to the API
831+ if ( i < chunks . length - 1 ) {
832+ await new Promise ( resolve => setTimeout ( resolve , 1000 ) ) ;
833+ }
834+ }
835+
836+ // Reassemble the chunks
837+ const finalContent = this . reassembleChunks ( updatedChunks , chunks ) ;
838+
839+ console . log ( ` 🔧 Reassembled content: ${ existingContent . length } → ${ finalContent . length } chars` ) ;
840+
841+ return hasChanges ? finalContent : existingContent ;
842+ }
843+
844+ reassembleChunks ( updatedChunks , originalChunks ) {
845+ // If there's only one chunk, return it directly
846+ if ( updatedChunks . length === 1 ) {
847+ return updatedChunks [ 0 ] ;
848+ }
849+
850+ // For multiple chunks, we need to carefully reassemble
851+ let reassembled = '' ;
852+
853+ for ( let i = 0 ; i < updatedChunks . length ; i ++ ) {
854+ const chunk = updatedChunks [ i ] ;
855+ const originalChunk = originalChunks [ i ] ;
856+
857+ if ( i === 0 ) {
858+ // First chunk should include frontmatter if present
859+ reassembled = chunk ;
860+ } else {
861+ // For subsequent chunks, remove frontmatter if it was duplicated
862+ let cleanChunk = chunk ;
863+ if ( cleanChunk . startsWith ( '---\n' ) && reassembled . includes ( '---\n' ) ) {
864+ // Remove frontmatter from subsequent chunks
865+ const frontmatterEnd = cleanChunk . indexOf ( '---\n' , 4 ) ;
866+ if ( frontmatterEnd !== - 1 ) {
867+ cleanChunk = cleanChunk . substring ( frontmatterEnd + 4 ) ;
868+ }
869+ }
870+
871+ // Add proper spacing between chunks
872+ if ( reassembled . trim ( ) && cleanChunk . trim ( ) ) {
873+ reassembled += '\n\n' + cleanChunk ;
874+ } else {
875+ reassembled += cleanChunk ;
876+ }
877+ }
878+ }
879+
880+ return reassembled ;
881+ }
882+
716883 // Basic MDX validation to catch common issues
717884 validateMDXContent ( content ) {
718885 const warnings = [ ] ;
@@ -747,6 +914,109 @@ Complete updated file content:`;
747914 } ;
748915 }
749916
917+ // Intelligent content chunking for large files
918+ chunkContent ( content , maxChunkSize = 8000 ) {
919+ // If content is small enough, return as single chunk
920+ if ( content . length <= maxChunkSize ) {
921+ return [ { content, isComplete : true , chunkIndex : 0 , totalChunks : 1 } ] ;
922+ }
923+
924+ const chunks = [ ] ;
925+ const lines = content . split ( '\n' ) ;
926+ let currentChunk = '' ;
927+ let frontmatter = '' ;
928+ let inFrontmatter = false ;
929+ let frontmatterEnded = false ;
930+
931+ // Extract frontmatter first
932+ if ( lines [ 0 ] === '---' ) {
933+ inFrontmatter = true ;
934+ for ( let i = 0 ; i < lines . length ; i ++ ) {
935+ if ( i > 0 && lines [ i ] === '---' ) {
936+ inFrontmatter = false ;
937+ frontmatterEnded = true ;
938+ frontmatter = lines . slice ( 0 , i + 1 ) . join ( '\n' ) + '\n' ;
939+ break ;
940+ }
941+ }
942+ }
943+
944+ // Start processing from after frontmatter
945+ const startIndex = frontmatterEnded ? lines . findIndex ( ( line , idx ) => idx > 0 && line === '---' ) + 1 : 0 ;
946+ const contentLines = lines . slice ( startIndex ) ;
947+
948+ let sectionBuffer = [ ] ;
949+ let currentSection = null ;
950+
951+ for ( let i = 0 ; i < contentLines . length ; i ++ ) {
952+ const line = contentLines [ i ] ;
953+
954+ // Detect section headers (## or ###)
955+ if ( line . match ( / ^ # { 2 , 3 } \s + / ) ) {
956+ // If we have accumulated content and adding this section would exceed limit
957+ if ( sectionBuffer . length > 0 && ( currentChunk + sectionBuffer . join ( '\n' ) ) . length > maxChunkSize ) {
958+ // Save current chunk
959+ chunks . push ( {
960+ content : ( chunks . length === 0 ? frontmatter : '' ) + currentChunk . trim ( ) ,
961+ isComplete : false ,
962+ chunkIndex : chunks . length ,
963+ section : currentSection ,
964+ hasMore : true
965+ } ) ;
966+ currentChunk = '' ;
967+ currentSection = null ;
968+ }
969+
970+ // Start new section
971+ currentSection = line . replace ( / ^ # + \s + / , '' ) . trim ( ) ;
972+ sectionBuffer = [ line ] ;
973+ } else {
974+ sectionBuffer . push ( line ) ;
975+ }
976+
977+ // Check if we need to break at this point
978+ const potentialChunk = currentChunk + sectionBuffer . join ( '\n' ) + '\n' ;
979+ if ( potentialChunk . length > maxChunkSize && currentChunk . length > 0 ) {
980+ // Save current chunk without the current section
981+ chunks . push ( {
982+ content : ( chunks . length === 0 ? frontmatter : '' ) + currentChunk . trim ( ) ,
983+ isComplete : false ,
984+ chunkIndex : chunks . length ,
985+ section : chunks . length > 0 ? currentSection : null ,
986+ hasMore : true
987+ } ) ;
988+ currentChunk = sectionBuffer . join ( '\n' ) + '\n' ;
989+ sectionBuffer = [ ] ;
990+ } else {
991+ currentChunk += sectionBuffer . join ( '\n' ) + '\n' ;
992+ sectionBuffer = [ ] ;
993+ }
994+ }
995+
996+ // Add remaining content as final chunk
997+ if ( currentChunk . trim ( ) ) {
998+ chunks . push ( {
999+ content : ( chunks . length === 0 ? frontmatter : '' ) + currentChunk . trim ( ) ,
1000+ isComplete : true ,
1001+ chunkIndex : chunks . length ,
1002+ section : currentSection ,
1003+ hasMore : false
1004+ } ) ;
1005+ }
1006+
1007+ // Update totalChunks for all chunks
1008+ chunks . forEach ( chunk => {
1009+ chunk . totalChunks = chunks . length ;
1010+ } ) ;
1011+
1012+ console . log ( ` 📊 Split content into ${ chunks . length } chunks (${ content . length } chars total)` ) ;
1013+ chunks . forEach ( ( chunk , i ) => {
1014+ console . log ( ` Chunk ${ i + 1 } : ${ chunk . content . length } chars${ chunk . section ? ` (${ chunk . section } )` : '' } ` ) ;
1015+ } ) ;
1016+
1017+ return chunks ;
1018+ }
1019+
7501020 async analyzeDocumentationNeeds ( context ) {
7511021 if ( ! this . anthropicApiKey ) {
7521022 console . log ( '⚠️ No Anthropic API key provided - skipping documentation analysis' ) ;
@@ -1227,7 +1497,9 @@ Changelog entry:`;
12271497
12281498 async createPullRequest ( branchName , context , filesUpdated ) {
12291499 const title = `🌿 Fern Scribe: ${ context . requestDescription . substring ( 0 , 50 ) } ...` ;
1230- const body = `## 🌿 Fern Scribe Documentation Update
1500+
1501+ // Build the main PR body
1502+ let body = `## 🌿 Fern Scribe Documentation Update
12311503
12321504**Original Request:** ${ context . requestDescription }
12331505
@@ -1238,9 +1510,37 @@ ${filesUpdated.map(file => `- \`${file}\``).join('\n')}
12381510
12391511${ context . slackThread ? `**Related Discussion:** ${ context . slackThread } ` : '' }
12401512
1241- ${ context . additionalContext ? `**Additional Context:** ${ context . additionalContext } ` : '' }
1513+ ${ context . additionalContext ? `**Additional Context:** ${ context . additionalContext } ` : '' } ` ;
12421514
1243- ---
1515+ // Add section for files that failed MDX validation
1516+ if ( this . mdxValidationFailures . length > 0 ) {
1517+ body += `\n\n## ⚠️ Files with MDX Validation Issues
1518+
1519+ The following files could not be updated due to MDX validation failures after 3 attempts:
1520+
1521+ ${ this . mdxValidationFailures . map ( ( failure , index ) => {
1522+ const warnings = failure . warnings . map ( w => ` - ${ w } ` ) . join ( '\n' ) ;
1523+ const truncatedContent = failure . suggestedContent && failure . suggestedContent . length > 4000
1524+ ? failure . suggestedContent . substring ( 0 , 4000 ) + '\n\n... [Content truncated due to length]'
1525+ : failure . suggestedContent ;
1526+
1527+ return `### ${ index + 1 } . **\`${ failure . filePath } \`** (${ failure . title || 'Untitled' } )
1528+
1529+ - **URL**: ${ failure . url || 'N/A' }
1530+ - **Validation Issues**:
1531+ ${ warnings }
1532+
1533+ **Suggested Content** (needs manual MDX fixes):
1534+
1535+ \`\`\`mdx
1536+ ${ truncatedContent || 'No suggested content available' }
1537+ \`\`\`` ;
1538+ } ) . join ( '\n\n' ) }
1539+
1540+ **Note**: These files require manual review and correction of their MDX component structure before the content can be applied.` ;
1541+ }
1542+
1543+ body += `\n\n---
12441544*This PR was automatically generated by Fern Scribe based on issue #${ this . issueNumber } *
12451545
12461546**Please review the changes carefully before merging.**` ;
@@ -1397,8 +1697,20 @@ ${context.additionalContext ? `**Additional Context:** ${context.additionalConte
13971697 }
13981698 }
13991699 if ( ! valid ) {
1400- const msg = `❌ Skipping file due to invalid MDX after 3 attempts: ${ filePath } \nWarnings: ${ JSON . stringify ( this . validateMDXContent ( suggestedContent ) . warnings ) } ` ;
1700+ const validationResult = this . validateMDXContent ( suggestedContent ) ;
1701+ const msg = `❌ Skipping file due to invalid MDX after 3 attempts: ${ filePath } \nWarnings: ${ JSON . stringify ( validationResult . warnings ) } ` ;
14011702 console . warn ( msg ) ;
1703+
1704+ // Track this failure for the PR description
1705+ this . mdxValidationFailures . push ( {
1706+ filePath,
1707+ warnings : validationResult . warnings ,
1708+ attempts : 3 ,
1709+ url : result . url ,
1710+ title : result . title ,
1711+ suggestedContent : suggestedContent // Store the suggested content despite validation issues
1712+ } ) ;
1713+
14021714 // If running in GitHub Actions, comment on the issue
14031715 if ( process . env . GITHUB_TOKEN && process . env . REPOSITORY && process . env . ISSUE_NUMBER ) {
14041716 const [ owner , repo ] = process . env . REPOSITORY . split ( '/' ) ;
0 commit comments