diff --git a/.github/scripts/fern-scribe.js b/.github/scripts/fern-scribe.js index 670a0fd93..b770b14d5 100644 --- a/.github/scripts/fern-scribe.js +++ b/.github/scripts/fern-scribe.js @@ -8,6 +8,7 @@ class FernScribe { this.turbopufferEndpoint = process.env.TURBOPUFFER_ENDPOINT; this.turbopufferApiKey = process.env.TURBOPUFFER_API_KEY; this.anthropicApiKey = process.env.ANTHROPIC_API_KEY; + this.slackToken = process.env.SLACK_USER_TOKEN; this.owner = process.env.REPOSITORY.split('/')[0]; this.repo = process.env.REPOSITORY.split('/')[1]; @@ -61,33 +62,316 @@ class FernScribe { return parsed; } - async queryTurbopuffer(query) { + parseSlackUrl(url) { + if (!url || !url.includes('slack.com')) return null; + + // Parse Slack URL formats: + // https://workspace.slack.com/archives/C1234567890/p1234567890123456 + // https://workspace.slack.com/archives/C1234567890/p1234567890123456?thread_ts=1234567890.123456 + + const regex = /https:\/\/([^.]+)\.slack\.com\/archives\/([A-Z0-9]+)\/p(\d+)(?:\?thread_ts=(\d+\.\d+))?/; + const match = url.match(regex); + + if (!match) return null; + + const [, workspace, channelId, messageTs, threadTs] = match; + + // Convert message timestamp format (p1234567890123456 -> 1234567890.123456) + const timestamp = messageTs.slice(0, 10) + '.' + messageTs.slice(10); + + return { + workspace, + channelId, + messageTs: timestamp, + threadTs: threadTs || timestamp // Use thread_ts if available, otherwise use message timestamp + }; + } + + async fetchSlackFile(file) { + if (!file.url_private || !this.slackToken) return null; + + try { + // Download the file content + const response = await fetch(file.url_private, { + headers: { + 'Authorization': `Bearer ${this.slackToken}` + } + }); + + if (!response.ok) return null; + + // Handle different file types + const mimeType = file.mimetype || ''; + const fileName = file.name || ''; + const fileExtension = fileName.split('.').pop()?.toLowerCase(); + + // Text-based files - return content directly + if (mimeType.startsWith('text/') || + ['txt', 'md', 'json', 'yaml', 'yml', 'csv', 'log'].includes(fileExtension)) { + return await response.text(); + } + + // Code files - return content with language hint + if (['js', 'ts', 'py', 'java', 'go', 'rs', 'cpp', 'c', 'php', 'rb', 'sh', 'sql', 'html', 'css', 'xml'].includes(fileExtension)) { + const content = await response.text(); + return `// File: ${fileName}\n${content}`; + } + + // Configuration files + if (['env', 'config', 'ini', 'toml', 'properties'].includes(fileExtension) || fileName === 'Dockerfile') { + const content = await response.text(); + return `# Configuration: ${fileName}\n${content}`; + } + + // For binary files, return file info instead of content + return `[Binary file: ${fileName} (${file.size || 0} bytes, ${mimeType})]`; + + } catch (error) { + console.error(`Failed to fetch file ${file.name}:`, error); + return `[Could not fetch file: ${file.name}]`; + } + } + + async describeImage(imageUrl) { + if (!imageUrl || !this.anthropicApiKey) return null; + + try { + // Download image and convert to base64 + const response = await fetch(imageUrl, { + headers: { + 'Authorization': `Bearer ${this.slackToken}` + } + }); + + if (!response.ok) return null; + + const imageBuffer = await response.arrayBuffer(); + const base64Image = Buffer.from(imageBuffer).toString('base64'); + const mimeType = response.headers.get('content-type') || 'image/jpeg'; + + // Use Claude to describe the image + const claudeResponse = await fetch('https://api.anthropic.com/v1/messages', { + method: 'POST', + headers: { + 'x-api-key': this.anthropicApiKey, + 'Content-Type': 'application/json', + 'anthropic-version': '2023-06-01' + }, + body: JSON.stringify({ + model: 'claude-3-5-sonnet-20241022', + max_tokens: 300, + messages: [{ + role: 'user', + content: [ + { + type: 'image', + source: { + type: 'base64', + media_type: mimeType, + data: base64Image + } + }, + { + type: 'text', + text: 'Describe this image in detail, focusing on any text, code, diagrams, or technical content that might be relevant for documentation purposes.' + } + ] + }] + }) + }); + + if (!claudeResponse.ok) return null; + + const data = await claudeResponse.json(); + return data.content[0]?.text || null; + + } catch (error) { + console.error('Failed to describe image:', error); + return null; + } + } + + async fetchSlackThread(slackUrl) { + if (!slackUrl || !this.slackToken) return ''; + + const parsedUrl = this.parseSlackUrl(slackUrl); + if (!parsedUrl) { + console.log('Could not parse Slack URL:', slackUrl); + return ''; + } + + try { + // Fetch the thread replies + const response = await fetch(`https://slack.com/api/conversations.replies?${new URLSearchParams({ + channel: parsedUrl.channelId, + ts: parsedUrl.threadTs, + inclusive: 'true' + })}`, { + method: 'GET', + headers: { + 'Authorization': `Bearer ${this.slackToken}`, + 'Content-Type': 'application/json' + } + }); + + if (!response.ok) { + throw new Error(`Slack API error: ${response.status}`); + } + + const data = await response.json(); + + if (!data.ok) { + console.error('Slack API error:', data.error); + return ''; + } + + // Format the thread messages with files and attachments + const messages = data.messages || []; + const threadContent = await Promise.all(messages.map(async (msg, index) => { + const timestamp = new Date(parseFloat(msg.ts) * 1000).toLocaleString(); + const user = msg.user || 'Unknown'; + let text = msg.text || ''; + + // Preserve code blocks exactly as-is + const codeBlockRegex = /```([^`]*?)```/gs; + const codeBlocks = []; + text = text.replace(codeBlockRegex, (match, code) => { + const placeholder = `__CODE_BLOCK_${codeBlocks.length}__`; + codeBlocks.push(code.trim()); + return placeholder; + }); + + // Clean up other Slack formatting but preserve structure + const cleanText = text + .replace(/<@[UW][A-Z0-9]+(\|[^>]+)?>/g, '@user') // Replace user mentions + .replace(/<#[CD][A-Z0-9]+\|([^>]+)>/g, '#$1') // Replace channel mentions + .replace(/<([^|>]+)\|([^>]+)>/g, '$2') // Replace links with text + .replace(/</g, '<') + .replace(/>/g, '>') + .replace(/&/g, '&'); + + // Restore code blocks + let finalText = cleanText; + codeBlocks.forEach((code, i) => { + finalText = finalText.replace(`__CODE_BLOCK_${i}__`, `\`\`\`\n${code}\n\`\`\``); + }); + + let messageContent = `[${timestamp}] ${index === 0 ? '(Original)' : ''} ${user}: ${finalText}`; + + // Handle file attachments - convert all to text + if (msg.files && msg.files.length > 0) { + messageContent += '\n\n**Attached Files:**'; + + for (const file of msg.files) { + messageContent += `\n\n--- File: ${file.name} ---`; + + // Extract text content from file + const fileContent = await this.fetchSlackFile(file); + if (fileContent) { + messageContent += `\n${fileContent}`; + } + + // Describe images using Claude + if (file.mimetype && file.mimetype.startsWith('image/')) { + const imageDescription = await this.describeImage(file.url_private); + if (imageDescription) { + messageContent += `\n[Image Description: ${imageDescription}]`; + } + } + } + } + + // Handle code snippets (Slack's snippet feature) + if (msg.attachments && msg.attachments.length > 0) { + for (const attachment of msg.attachments) { + if (attachment.text) { + messageContent += `\n\n**Code Snippet:**\n\`\`\`\n${attachment.text}\n\`\`\``; + } + } + } + + return messageContent; + })); + + const fullThreadContent = (await Promise.all(threadContent)).join('\n\n---\n\n'); + console.log(`📱 Fetched Slack thread with ${messages.length} messages and extracted file content`); + return fullThreadContent; + + } catch (error) { + console.error('Failed to fetch Slack thread:', error); + return ''; + } + } + + async queryTurbopuffer(query, opts = {}) { + if (!query || query.trimStart().length === 0) { + return []; + } + try { + // Create embedding for the query + const embeddingResponse = await this.createEmbedding(query); + if (!embeddingResponse) { + console.error('Failed to create embedding for query'); + return []; + } + + const requestBody = { + query_embedding: embeddingResponse, + top_k: opts.topK || 10, + namespace: opts.namespace, + ...(opts.documentIdsToIgnore && { document_ids_to_ignore: opts.documentIdsToIgnore }), + ...(opts.urlsToIgnore && { urls_to_ignore: opts.urlsToIgnore }) + }; + const response = await fetch(this.turbopufferEndpoint, { method: 'POST', headers: { 'Authorization': `Bearer ${this.turbopufferApiKey}`, 'Content-Type': 'application/json' }, - body: JSON.stringify({ - query: query, - top_k: 10, - include_metadata: true - }) + body: JSON.stringify(requestBody) }); if (!response.ok) { - throw new Error(`TurboBuffer API error: ${response.status}`); + throw new Error(`Turbopuffer API error: ${response.status}`); } const data = await response.json(); return data.results || []; } catch (error) { - console.error('TurboBuffer query failed:', error); + console.error('Turbopuffer query failed:', error); return []; } } + async createEmbedding(text) { + try { + // Using OpenAI's embedding model + const response = await fetch('https://api.openai.com/v1/embeddings', { + method: 'POST', + headers: { + 'Authorization': `Bearer ${process.env.OPENAI_API_KEY}`, + 'Content-Type': 'application/json' + }, + body: JSON.stringify({ + model: 'text-embedding-3-small', + input: text + }) + }); + + if (!response.ok) { + throw new Error(`Embedding API error: ${response.status}`); + } + + const data = await response.json(); + return data.data[0]?.embedding; + } catch (error) { + console.error('Embedding creation failed:', error); + return null; + } + } + async getFernDocsStructure() { try { const response = await fetch('https://buildwithfern.com/learn/llms.txt'); @@ -107,6 +391,7 @@ Request: ${context.requestDescription} Existing Instructions: ${context.existingInstructions} Why Current Approach Doesn't Work: ${context.whyNotWork} Additional Context: ${context.additionalContext} +${context.slackThreadContent ? `\n## Slack Discussion Context\n${context.slackThreadContent}` : ''} ## Fern Docs Structure Reference ${fernStructure} @@ -115,7 +400,7 @@ ${fernStructure} ${existingContent} ## Instructions -Update this file to address the documentation request. Follow Fern documentation best practices and maintain consistency with the existing structure. +Update this file to address the documentation request. Use the Slack discussion context to understand the specific pain points and requirements mentioned by users. Follow Fern documentation best practices and maintain consistency with the existing structure. Provide the complete updated file content:`; @@ -304,15 +589,52 @@ ${context.additionalContext ? `**Additional Context:** ${context.additionalConte const context = this.parseIssueBody(this.issueBody); console.log('📝 Parsed issue context:', context); + // Fetch Slack thread if URL provided + let slackThreadContent = ''; + if (context.slackThread) { + console.log('📱 Fetching Slack thread content...'); + slackThreadContent = await this.fetchSlackThread(context.slackThread); + } + + // Create enhanced query text that includes both request description and Slack context + const enhancedQuery = [ + context.requestDescription, + slackThreadContent ? `\n\nSlack Discussion Context:\n${slackThreadContent}` : '', + context.additionalContext ? `\n\nAdditional Context:\n${context.additionalContext}` : '' + ].filter(Boolean).join('\n'); + // Query TurboBuffer for relevant files console.log('🔍 Querying TurboBuffer for relevant files...'); - const relevantFiles = await this.queryTurbopuffer(context.requestDescription); + const searchResultURLs = new Set(); + const searchResults = []; - if (relevantFiles.length === 0) { + const turbopufferResults = await this.queryTurbopuffer(enhancedQuery, { + namespace: process.env.TURBOPUFFER_NAMESPACE || 'default', + topK: 3 + }); + + // Deduplicate results by URL (following the original logic) + for (const result of turbopufferResults) { + const url = result.attributes?.url || + `https://${result.attributes?.domain}${result.attributes?.pathname}${result.attributes?.hash || ''}`; + + if (result.attributes?.url) { + if (!searchResultURLs.has(result.attributes.url)) { + searchResultURLs.add(result.attributes.url); + searchResults.push(result); + } + } else { + searchResults.push(result); + } + } + + if (searchResults.length === 0) { console.log('❌ No relevant files found'); return; } + console.log(`📁 Found ${searchResults.length} relevant files`); + // Get Fern docs structure const fernStructure = await this.getFernDocsStructure(); @@ -324,14 +646,20 @@ ${context.additionalContext ? `**Additional Context:** ${context.additionalConte const filesUpdated = []; // Process each relevant file - for (const file of relevantFiles) { - const filePath = file.metadata?.path || file.path; + for (const result of searchResults) { + const filePath = result.attributes?.pathname || result.path; if (!filePath) continue; console.log(`📄 Processing: ${filePath}`); const currentContent = await this.getCurrentFileContent(filePath); - const updatedContent = await this.generateContent(filePath, currentContent, context, fernStructure); + const contextWithDocument = { + ...context, + currentDocument: result.attributes?.document || '', + slackThreadContent + }; + + const updatedContent = await this.generateContent(filePath, currentContent, contextWithDocument, fernStructure); if (updatedContent && updatedContent !== currentContent) { await this.updateFile( diff --git a/.github/workflows/fern-scribe.yml b/.github/workflows/fern-scribe.yml index 6fb39546b..a41498cdc 100644 --- a/.github/workflows/fern-scribe.yml +++ b/.github/workflows/fern-scribe.yml @@ -37,7 +37,10 @@ jobs: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} TURBOPUFFER_API_KEY: ${{ secrets.TURBOPUFFER_API_KEY }} TURBOPUFFER_ENDPOINT: ${{ secrets.TURBOPUFFER_ENDPOINT }} + TURBOPUFFER_NAMESPACE: ${{ secrets.TURBOPUFFER_NAMESPACE }} ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + SLACK_USER_TOKEN: ${{ secrets.SLACK_USER_TOKEN }} ISSUE_NUMBER: ${{ github.event.issue.number }} ISSUE_BODY: ${{ github.event.issue.body }} ISSUE_TITLE: ${{ github.event.issue.title }}