Skip to content

Commit c0b09ff

Browse files
author
Kapil Gowru
committed
feat: added scribe to comment what changes it didn't make
1 parent ef8c087 commit c0b09ff

File tree

1 file changed

+316
-4
lines changed

1 file changed

+316
-4
lines changed

.github/scripts/fern-scribe.js

Lines changed: 316 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,9 @@ class FernScribeGitHub {
126126
this.urlMapper = new FernUrlMapper(process.env.GITHUB_TOKEN, process.env.REPOSITORY);
127127
this.productSlugToDir = parseProductRootMapping();
128128
this.learnToFile = parseLearnToFileMapping();
129+
130+
// Track files that failed MDX validation
131+
this.mdxValidationFailures = [];
129132
}
130133

131134
async init() {
@@ -642,6 +645,17 @@ class FernScribeGitHub {
642645
}
643646

644647
async generateContent(filePath, existingContent, context, fernStructure) {
648+
// Check if content needs chunking
649+
const CHUNK_THRESHOLD = 12000; // Chars threshold to decide when to chunk
650+
if (existingContent.length <= CHUNK_THRESHOLD) {
651+
return this.generateSingleContent(filePath, existingContent, context, fernStructure);
652+
} else {
653+
console.log(` 📊 Large file detected (${existingContent.length} chars) - using chunked processing`);
654+
return this.generateChunkedContent(filePath, existingContent, context, fernStructure);
655+
}
656+
}
657+
658+
async generateSingleContent(filePath, existingContent, context, fernStructure) {
645659
const prompt = `${this.systemPrompt}
646660
647661
## Context
@@ -713,6 +727,159 @@ Complete updated file content:`;
713727
}
714728
}
715729

730+
async generateChunkedContent(filePath, existingContent, context, fernStructure) {
731+
const chunks = this.chunkContent(existingContent, 8000);
732+
const updatedChunks = [];
733+
let hasChanges = false;
734+
735+
console.log(` 🧩 Processing ${chunks.length} chunks for ${filePath}`);
736+
737+
for (let i = 0; i < chunks.length; i++) {
738+
const chunk = chunks[i];
739+
console.log(` 📝 Processing chunk ${i + 1}/${chunks.length}${chunk.section ? ` (${chunk.section})` : ''}`);
740+
741+
const chunkPrompt = `${this.systemPrompt}
742+
743+
## Context
744+
File: ${filePath}
745+
Chunk: ${i + 1} of ${chunks.length}${chunk.section ? ` - Section: "${chunk.section}"` : ''}
746+
Request: ${context.requestDescription}
747+
Existing Instructions: ${context.existingInstructions}
748+
Why Current Approach Doesn't Work: ${context.whyNotWork}
749+
Additional Context: ${context.additionalContext}
750+
${context.slackThreadContent ? `\n## Slack Discussion Context\n${context.slackThreadContent}` : ''}
751+
752+
## Fern Docs Structure Reference
753+
${fernStructure}
754+
755+
## Current Chunk Content
756+
${chunk.content}
757+
758+
## Instructions
759+
${chunk.isComplete ?
760+
'This is the final chunk of the file. Update this section to address the documentation request.' :
761+
`This is chunk ${i + 1} of ${chunks.length} from a larger file. Update only this section as needed to address the documentation request. Do not add or remove section headers unless specifically needed for this chunk.`
762+
}
763+
764+
Focus on:
765+
- Addressing the specific documentation gaps mentioned in the request
766+
- Improving clarity and completeness within this chunk
767+
- Maintaining consistency with Fern documentation patterns
768+
- Preserving the existing structure and flow
769+
770+
CRITICAL MDX SYNTAX REQUIREMENTS:
771+
- ALL opening tags MUST have corresponding closing tags (e.g., <ParamField> must have </ParamField>)
772+
- Self-closing tags must use proper syntax (e.g., <ParamField param="value" />)
773+
- Preserve existing MDX component structure exactly
774+
- When adding new ParamField, CodeBlock, or other components, ensure they are properly closed
775+
- Check that every < has a matching >
776+
- Validate that nested components are properly structured
777+
778+
IMPORTANT: Return ONLY the updated chunk content. Do not include any explanatory text, meta-commentary, or descriptions about what you're doing.
779+
780+
Updated chunk content:`;
781+
782+
try {
783+
const response = await httpRequest('https://api.anthropic.com/v1/messages', {
784+
method: 'POST',
785+
headers: {
786+
'x-api-key': this.anthropicApiKey,
787+
'content-type': 'application/json',
788+
'anthropic-version': '2023-06-01'
789+
},
790+
body: JSON.stringify({
791+
model: 'claude-3-5-sonnet-20241022',
792+
max_tokens: 4096,
793+
messages: [{
794+
role: 'user',
795+
content: chunkPrompt
796+
}]
797+
})
798+
});
799+
800+
if (!response.ok) {
801+
const errorText = await response.text();
802+
console.error(`❌ Anthropic API error for chunk ${i + 1}:`, errorText);
803+
updatedChunks.push(chunk.content); // Use original chunk
804+
continue;
805+
}
806+
807+
const data = await response.json();
808+
const updatedChunkContent = data.content[0]?.text || chunk.content;
809+
810+
// Validate the chunk
811+
const validationResult = this.validateMDXContent(updatedChunkContent);
812+
if (!validationResult.isValid) {
813+
console.warn(`⚠️ MDX validation warnings for chunk ${i + 1}:`, validationResult.warnings);
814+
updatedChunks.push(chunk.content); // Use original chunk if validation fails
815+
} else {
816+
updatedChunks.push(updatedChunkContent);
817+
if (updatedChunkContent !== chunk.content) {
818+
hasChanges = true;
819+
console.log(` ✅ Updated chunk ${i + 1} (${chunk.content.length}${updatedChunkContent.length} chars)`);
820+
} else {
821+
console.log(` ℹ️ No changes for chunk ${i + 1}`);
822+
}
823+
}
824+
825+
} catch (error) {
826+
console.error(`❌ Error processing chunk ${i + 1}:`, error.message);
827+
updatedChunks.push(chunk.content); // Use original chunk
828+
}
829+
830+
// Add a small delay between chunks to be respectful to the API
831+
if (i < chunks.length - 1) {
832+
await new Promise(resolve => setTimeout(resolve, 1000));
833+
}
834+
}
835+
836+
// Reassemble the chunks
837+
const finalContent = this.reassembleChunks(updatedChunks, chunks);
838+
839+
console.log(` 🔧 Reassembled content: ${existingContent.length}${finalContent.length} chars`);
840+
841+
return hasChanges ? finalContent : existingContent;
842+
}
843+
844+
reassembleChunks(updatedChunks, originalChunks) {
845+
// If there's only one chunk, return it directly
846+
if (updatedChunks.length === 1) {
847+
return updatedChunks[0];
848+
}
849+
850+
// For multiple chunks, we need to carefully reassemble
851+
let reassembled = '';
852+
853+
for (let i = 0; i < updatedChunks.length; i++) {
854+
const chunk = updatedChunks[i];
855+
const originalChunk = originalChunks[i];
856+
857+
if (i === 0) {
858+
// First chunk should include frontmatter if present
859+
reassembled = chunk;
860+
} else {
861+
// For subsequent chunks, remove frontmatter if it was duplicated
862+
let cleanChunk = chunk;
863+
if (cleanChunk.startsWith('---\n') && reassembled.includes('---\n')) {
864+
// Remove frontmatter from subsequent chunks
865+
const frontmatterEnd = cleanChunk.indexOf('---\n', 4);
866+
if (frontmatterEnd !== -1) {
867+
cleanChunk = cleanChunk.substring(frontmatterEnd + 4);
868+
}
869+
}
870+
871+
// Add proper spacing between chunks
872+
if (reassembled.trim() && cleanChunk.trim()) {
873+
reassembled += '\n\n' + cleanChunk;
874+
} else {
875+
reassembled += cleanChunk;
876+
}
877+
}
878+
}
879+
880+
return reassembled;
881+
}
882+
716883
// Basic MDX validation to catch common issues
717884
validateMDXContent(content) {
718885
const warnings = [];
@@ -747,6 +914,109 @@ Complete updated file content:`;
747914
};
748915
}
749916

917+
// Intelligent content chunking for large files
918+
chunkContent(content, maxChunkSize = 8000) {
919+
// If content is small enough, return as single chunk
920+
if (content.length <= maxChunkSize) {
921+
return [{ content, isComplete: true, chunkIndex: 0, totalChunks: 1 }];
922+
}
923+
924+
const chunks = [];
925+
const lines = content.split('\n');
926+
let currentChunk = '';
927+
let frontmatter = '';
928+
let inFrontmatter = false;
929+
let frontmatterEnded = false;
930+
931+
// Extract frontmatter first
932+
if (lines[0] === '---') {
933+
inFrontmatter = true;
934+
for (let i = 0; i < lines.length; i++) {
935+
if (i > 0 && lines[i] === '---') {
936+
inFrontmatter = false;
937+
frontmatterEnded = true;
938+
frontmatter = lines.slice(0, i + 1).join('\n') + '\n';
939+
break;
940+
}
941+
}
942+
}
943+
944+
// Start processing from after frontmatter
945+
const startIndex = frontmatterEnded ? lines.findIndex((line, idx) => idx > 0 && line === '---') + 1 : 0;
946+
const contentLines = lines.slice(startIndex);
947+
948+
let sectionBuffer = [];
949+
let currentSection = null;
950+
951+
for (let i = 0; i < contentLines.length; i++) {
952+
const line = contentLines[i];
953+
954+
// Detect section headers (## or ###)
955+
if (line.match(/^#{2,3}\s+/)) {
956+
// If we have accumulated content and adding this section would exceed limit
957+
if (sectionBuffer.length > 0 && (currentChunk + sectionBuffer.join('\n')).length > maxChunkSize) {
958+
// Save current chunk
959+
chunks.push({
960+
content: (chunks.length === 0 ? frontmatter : '') + currentChunk.trim(),
961+
isComplete: false,
962+
chunkIndex: chunks.length,
963+
section: currentSection,
964+
hasMore: true
965+
});
966+
currentChunk = '';
967+
currentSection = null;
968+
}
969+
970+
// Start new section
971+
currentSection = line.replace(/^#+\s+/, '').trim();
972+
sectionBuffer = [line];
973+
} else {
974+
sectionBuffer.push(line);
975+
}
976+
977+
// Check if we need to break at this point
978+
const potentialChunk = currentChunk + sectionBuffer.join('\n') + '\n';
979+
if (potentialChunk.length > maxChunkSize && currentChunk.length > 0) {
980+
// Save current chunk without the current section
981+
chunks.push({
982+
content: (chunks.length === 0 ? frontmatter : '') + currentChunk.trim(),
983+
isComplete: false,
984+
chunkIndex: chunks.length,
985+
section: chunks.length > 0 ? currentSection : null,
986+
hasMore: true
987+
});
988+
currentChunk = sectionBuffer.join('\n') + '\n';
989+
sectionBuffer = [];
990+
} else {
991+
currentChunk += sectionBuffer.join('\n') + '\n';
992+
sectionBuffer = [];
993+
}
994+
}
995+
996+
// Add remaining content as final chunk
997+
if (currentChunk.trim()) {
998+
chunks.push({
999+
content: (chunks.length === 0 ? frontmatter : '') + currentChunk.trim(),
1000+
isComplete: true,
1001+
chunkIndex: chunks.length,
1002+
section: currentSection,
1003+
hasMore: false
1004+
});
1005+
}
1006+
1007+
// Update totalChunks for all chunks
1008+
chunks.forEach(chunk => {
1009+
chunk.totalChunks = chunks.length;
1010+
});
1011+
1012+
console.log(` 📊 Split content into ${chunks.length} chunks (${content.length} chars total)`);
1013+
chunks.forEach((chunk, i) => {
1014+
console.log(` Chunk ${i + 1}: ${chunk.content.length} chars${chunk.section ? ` (${chunk.section})` : ''}`);
1015+
});
1016+
1017+
return chunks;
1018+
}
1019+
7501020
async analyzeDocumentationNeeds(context) {
7511021
if (!this.anthropicApiKey) {
7521022
console.log('⚠️ No Anthropic API key provided - skipping documentation analysis');
@@ -1227,7 +1497,9 @@ Changelog entry:`;
12271497

12281498
async createPullRequest(branchName, context, filesUpdated) {
12291499
const title = `🌿 Fern Scribe: ${context.requestDescription.substring(0, 50)}...`;
1230-
const body = `## 🌿 Fern Scribe Documentation Update
1500+
1501+
// Build the main PR body
1502+
let body = `## 🌿 Fern Scribe Documentation Update
12311503
12321504
**Original Request:** ${context.requestDescription}
12331505
@@ -1238,9 +1510,37 @@ ${filesUpdated.map(file => `- \`${file}\``).join('\n')}
12381510
12391511
${context.slackThread ? `**Related Discussion:** ${context.slackThread}` : ''}
12401512
1241-
${context.additionalContext ? `**Additional Context:** ${context.additionalContext}` : ''}
1513+
${context.additionalContext ? `**Additional Context:** ${context.additionalContext}` : ''}`;
12421514

1243-
---
1515+
// Add section for files that failed MDX validation
1516+
if (this.mdxValidationFailures.length > 0) {
1517+
body += `\n\n## ⚠️ Files with MDX Validation Issues
1518+
1519+
The following files could not be updated due to MDX validation failures after 3 attempts:
1520+
1521+
${this.mdxValidationFailures.map((failure, index) => {
1522+
const warnings = failure.warnings.map(w => ` - ${w}`).join('\n');
1523+
const truncatedContent = failure.suggestedContent && failure.suggestedContent.length > 4000
1524+
? failure.suggestedContent.substring(0, 4000) + '\n\n... [Content truncated due to length]'
1525+
: failure.suggestedContent;
1526+
1527+
return `### ${index + 1}. **\`${failure.filePath}\`** (${failure.title || 'Untitled'})
1528+
1529+
- **URL**: ${failure.url || 'N/A'}
1530+
- **Validation Issues**:
1531+
${warnings}
1532+
1533+
**Suggested Content** (needs manual MDX fixes):
1534+
1535+
\`\`\`mdx
1536+
${truncatedContent || 'No suggested content available'}
1537+
\`\`\``;
1538+
}).join('\n\n')}
1539+
1540+
**Note**: These files require manual review and correction of their MDX component structure before the content can be applied.`;
1541+
}
1542+
1543+
body += `\n\n---
12441544
*This PR was automatically generated by Fern Scribe based on issue #${this.issueNumber}*
12451545
12461546
**Please review the changes carefully before merging.**`;
@@ -1397,8 +1697,20 @@ ${context.additionalContext ? `**Additional Context:** ${context.additionalConte
13971697
}
13981698
}
13991699
if (!valid) {
1400-
const msg = `❌ Skipping file due to invalid MDX after 3 attempts: ${filePath}\nWarnings: ${JSON.stringify(this.validateMDXContent(suggestedContent).warnings)}`;
1700+
const validationResult = this.validateMDXContent(suggestedContent);
1701+
const msg = `❌ Skipping file due to invalid MDX after 3 attempts: ${filePath}\nWarnings: ${JSON.stringify(validationResult.warnings)}`;
14011702
console.warn(msg);
1703+
1704+
// Track this failure for the PR description
1705+
this.mdxValidationFailures.push({
1706+
filePath,
1707+
warnings: validationResult.warnings,
1708+
attempts: 3,
1709+
url: result.url,
1710+
title: result.title,
1711+
suggestedContent: suggestedContent // Store the suggested content despite validation issues
1712+
});
1713+
14021714
// If running in GitHub Actions, comment on the issue
14031715
if (process.env.GITHUB_TOKEN && process.env.REPOSITORY && process.env.ISSUE_NUMBER) {
14041716
const [owner, repo] = process.env.REPOSITORY.split('/');

0 commit comments

Comments
 (0)