Enable documentation generation and add repo content endpoints

DhruvK278 · DhruvK278 · commit 2bf5fe200f1f · 2025-09-03T00:02:08.000+05:30
Uncommented and integrated the documentation generation tool in the GitHub toolkit. Added new endpoints and service methods to fetch repository content via the MCP server, including non-recursive retrieval of key files from the repo root. Improved error handling and logging throughout. Enhanced Weaviate connection logic with retries on startup. Updated dependencies to include httpx.
diff --git a/backend/app/agents/devrel/github/github_toolkit.py b/backend/app/agents/devrel/github/github_toolkit.py
@@ -10,7 +10,7 @@
 from .tools.contributor_recommendation import handle_contributor_recommendation
 # from .tools.repository_query import handle_repo_query
 # from .tools.issue_creation import handle_issue_creation
-# from .tools.documentation_generation import handle_documentation_generation
+from .tools.documentation_generation import handle_documentation_generation
 from .tools.general_github_help import handle_general_github_help
 logger = logging.getLogger(__name__)
 
@@ -124,13 +124,11 @@ async def execute(self, query: str) -> Dict[str, Any]:
                 result = await handle_contributor_recommendation(query)
             elif classification == "repo_support":
                 result = await handle_repo_query(query)
-                # result = await handle_repo_query(query)
             elif classification == "issue_creation":
                 result = "Not implemented"
                 # result = await handle_issue_creation(query)
             elif classification == "documentation_generation":
-                result = "Not implemented"
-                # result = await handle_documentation_generation(query)
+                result = await handle_documentation_generation(query)
             elif classification == "web_search":
                 result = await handle_web_search(query)
             else:
diff --git a/backend/app/agents/devrel/github/services/github_mcp_client.py b/backend/app/agents/devrel/github/services/github_mcp_client.py
@@ -6,17 +6,21 @@
 logger = logging.getLogger(__name__)
 
 class GitHubMCPClient:
+    """Client for communicating with the GitHub MCP server."""
 
-    #Client for communicating with the GitHub MCP server.
-    
-    def __init__(self, mcp_server_url: str = "http://localhost:8001"):
-
+    def __init__(self, mcp_server_url: str = "http://localhost:8001", timeout: int = 15):
+        """
+        Initializes the client.
+        Args:
+            mcp_server_url: The URL of the MCP server.
+            timeout: The total timeout in seconds for client requests.
+        """
         self.mcp_server_url = mcp_server_url
+        self.timeout = timeout  # Store the timeout value
         self.session: Optional[aiohttp.ClientSession] = None
     
     async def __aenter__(self):
-        # Async context manager entry
-        self.session = aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=15))
+        self.session = aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=self.timeout))
         return self
     
     async def __aexit__(self, exc_type, exc_val, exc_tb):
@@ -68,4 +72,20 @@ async def is_server_available(self) -> bool:
             
         except (aiohttp.ClientError, asyncio.TimeoutError) as e:
             logger.debug(f"Health check failed: {e}")
-            return False
+            return False
+        
+    async def get_repo_content(self, owner: str, repo: str) -> Dict[str, Any]:
+        """Fetches repository content via the MCP server."""
+        if not self.session:
+            raise RuntimeError("Client not initialized. Use async context manager.")
+        
+        payload = {"owner": owner, "repo": repo}
+        try:
+            async with self.session.post(
+                f"{self.mcp_server_url}/repo_content", json=payload
+            ) as response:
+                response.raise_for_status()
+                return await response.json()
+        except aiohttp.ClientError as e:
+            logger.error(f"Error communicating with MCP server for repo content: {e}")
+            return {"error": f"Communication error: {str(e)}"}
diff --git a/backend/app/agents/devrel/github/services/github_mcp_server.py b/backend/app/agents/devrel/github/services/github_mcp_server.py
@@ -94,3 +94,21 @@ async def get_repo_info(request: RepoInfoRequest):
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=8001)
+
+@app.post("/repo_content")
+async def get_repo_content(request: RepoInfoRequest):
+    """Endpoint to get the content of a repository."""
+    try:
+        if not github_service:
+            raise HTTPException(status_code=500, detail="GitHub service not available")
+        
+        result = github_service.get_repo_content(request.owner, request.repo)
+        
+        if "error" in result:
+            return {"status": "error", "error": result["error"]}
+        
+        return result
+        
+    except Exception as e:
+        logger.error(f"Error getting repo content: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
diff --git a/backend/app/agents/devrel/github/services/github_mcp_service.py b/backend/app/agents/devrel/github/services/github_mcp_service.py
@@ -61,4 +61,66 @@ def repo_query(self, owner: str, repo: str) -> dict:
             "created_at": data.get("created_at"),
             "updated_at": data.get("updated_at"),
             "pushed_at": data.get("pushed_at"),
-        }
+        }
+    
+    def get_repo_content(self, owner: str, repo: str) -> dict:
+        """
+        Fetches the content of key files from the repository's root directory.
+        This non-recursive approach is much faster and avoids timeouts.
+        """
+        print(f"SERVICE: Getting repo content for {owner}/{repo}")
+
+        contents_url = f"{self.base_url}/repos/{owner}/{repo}/contents/"
+        headers = {
+            "Authorization": f"Bearer {self.token}",
+            "Accept": "application/vnd.github+json",
+        }
+
+        try:
+            contents_resp = requests.get(contents_url, headers=headers, timeout=15)
+            contents_resp.raise_for_status()
+            contents_data = contents_resp.json()
+        except requests.exceptions.RequestException as e:
+            print(f"SERVICE ERROR: Failed to fetch repository contents: {e}")
+            return {"error": "Failed to fetch repository contents", "message": str(e)}
+
+        files_to_download = []
+        key_files = [
+            "readme.md", "pyproject.toml", "requirements.txt", "package.json", 
+            "dockerfile", "main.py", "app.py", "index.js"
+        ]
+        for item in contents_data:
+            if item.get("type") == "file" and item.get("name").lower() in key_files:
+                files_to_download.append(item)
+        
+        if not files_to_download:
+            print("SERVICE INFO: No key files found in root. Grabbing first 5 files.")
+            files_to_download = [item for item in contents_data if item.get("type") == "file"][:5]
+
+        if not files_to_download:
+            print("SERVICE ERROR: No files found in the root directory.")
+            return {"error": "No files found in the root directory to generate documentation from."}
+
+        print(f"SERVICE INFO: Found {len(files_to_download)} files to download.")
+        
+        files = []
+        for item in files_to_download:
+            try:
+                download_url = item.get("download_url")
+                if not download_url:
+                    continue
+
+                file_resp = requests.get(download_url, headers=headers, timeout=15)
+                file_resp.raise_for_status()
+                
+                content = file_resp.text
+                files.append({"name": item["name"], "content": content})
+                print(f"SERVICE INFO: Successfully downloaded {item['name']}")
+
+            except requests.exceptions.RequestException as e:
+                print(f"SERVICE WARNING: Skipping file {item['name']}: {e}")
+
+        if not files:
+            return {"error": "Could not download content from any key files."}
+
+        return {"status": "success", "files": files}
diff --git a/backend/app/agents/devrel/github/tools/documentation_generation.py b/backend/app/agents/devrel/github/tools/documentation_generation.py
@@ -1 +1,117 @@
+import logging
+import re
+import asyncio
+import aiohttp
+from typing import Dict, Any, List
+from langchain_google_genai import ChatGoogleGenerativeAI
+from langchain_core.messages import HumanMessage
+from app.core.config import settings
 
+logger = logging.getLogger(__name__)
+
+llm = ChatGoogleGenerativeAI(
+    model=settings.github_agent_model,
+    temperature=0.2,
+    google_api_key=settings.gemini_api_key,
+)
+
+OWNER_REPO_RE = re.compile(
+    r'\b([A-Za-z0-9](?:-?[A-Za-z0-9]){0,38})/([A-Za-z0-9._-]{1,100})\b'
+)
+
+async def fetch_file_content(session: aiohttp.ClientSession, url: str, headers: dict) -> str:
+    """Asynchronously fetches content from a given URL."""
+    try:
+        async with session.get(url, headers=headers) as response:
+            response.raise_for_status()
+            return await response.text()
+    except aiohttp.ClientError as e:
+        logger.warning(f"Skipping file at {url} due to error: {e}")
+        return ""
+
+async def handle_documentation_generation(query: str) -> Dict[str, Any]:
+    """
+    Handles the documentation generation tool by asynchronously fetching repository
+    content directly from the GitHub API.
+    """
+    logger.info(f"Generating documentation for query: {query}")
+
+    match = OWNER_REPO_RE.search(query)
+    if not match:
+        return {
+            "status": "error",
+            "message": "Could not parse repository owner and name. Please use the format 'owner/repo'.",
+        }
+    owner, repo = match.group(1), match.group(2)
+    logger.info(f"Parsed repository: {owner}/{repo}")
+
+    base_url = "https://api.github.com"
+    contents_url = f"{base_url}/repos/{owner}/{repo}/contents/"
+    token = settings.github_token
+    headers = {
+        "Authorization": f"Bearer {token}",
+        "Accept": "application/vnd.github+json",
+    }
+
+    try:
+        async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=60)) as session:
+            async with session.get(contents_url, headers=headers) as response:
+                response.raise_for_status()
+                contents_data = await response.json()
+
+            files_to_download: List[Dict[str, Any]] = []
+            key_files = ["readme.md", "pyproject.toml", "requirements.txt", "package.json", "dockerfile", "main.py", "app.py", "index.js"]
+            for item in contents_data:
+                if item.get("type") == "file" and item.get("name").lower() in key_files:
+                    files_to_download.append(item)
+            
+            if not files_to_download:
+                files_to_download = [item for item in contents_data if item.get("type") == "file"][:5]
+
+            if not files_to_download:
+                return {"status": "error", "message": "No files found in the root directory."}
+
+            # async tasks to download all files in parallel
+            tasks = [fetch_file_content(session, item["download_url"], headers) for item in files_to_download]
+            file_contents_list = await asyncio.gather(*tasks)
+            
+            # Combine file names with their content
+            files = [
+                {"name": item["name"], "content": content}
+                for item, content in zip(files_to_download, file_contents_list) if content
+            ]
+
+            if not files:
+                return {"status": "error", "message": "Could not retrieve content from any key files."}
+
+    except aiohttp.ClientError as e:
+        logger.error(f"GitHub API request failed: {e}")
+        return {"status": "error", "message": f"An error occurred while fetching repository data: {e}"}
+    except Exception as e:
+        logger.error(f"An unexpected error occurred: {e}")
+        return {"status": "error", "message": f"An unexpected error occurred: {e}"}
+
+    # Generate documentation with the language model
+    try:
+        file_contents_str = "\n\n".join(
+            f"File: {file['name']}\n\n```\n{file['content']}\n```"
+            for file in files
+        )
+
+        prompt = f"""
+        Generate comprehensive documentation in Markdown for the repository: {owner}/{repo}.
+        Based on the following files:
+        {file_contents_str}
+        Please generate a README.md file that includes:
+        - A brief introduction to the project.
+        - An overview of the key files and project structure.
+        - Instructions on how to get started.
+        """
+        response = await llm.ainvoke([HumanMessage(content=prompt)])
+        documentation = response.content.strip()
+
+        return {"status": "success", "documentation": documentation}
+
+    except Exception as e:
+        logger.error(f"Error generating documentation: {e}")
+        return {"status": "error", "message": f"An error occurred during documentation generation: {e}"}
diff --git a/backend/main.py b/backend/main.py
@@ -61,14 +61,29 @@ async def start_background_tasks(self):
             raise
 
     async def test_weaviate_connection(self):
-        """Test Weaviate connection during startup."""
-        try:
-            async with get_weaviate_client() as client:
-                if await client.is_ready():
-                    logger.info("Weaviate connection successful and ready")
-        except Exception as e:
-            logger.error(f"Failed to connect to Weaviate: {e}")
-            raise
+        logger.info("Attempting to connect to Weaviate...")
+        max_retries = 5
+        retry_delay = 5
+
+        for attempt in range(max_retries):
+            try:
+                async with get_weaviate_client() as client:
+                    if await client.is_ready():
+                        logger.info("Successfully connected to Weaviate.")
+                        return
+
+                logger.warning("Weaviate client connected, but the service is not ready yet.")
+
+            except Exception as e:
+                logger.warning(f"Attempt {attempt + 1}/{max_retries} failed to connect to Weaviate: {e}")
+
+            if attempt < max_retries - 1:
+                logger.info(f"Retrying in {retry_delay} seconds...")
+                await asyncio.sleep(retry_delay)
+            else:
+                logger.error("Could not connect to Weaviate after several retries. Please ensure Docker services are running and accessible.")
+                raise ConnectionError("Failed to connect to Weaviate after multiple attempts.")
+
 
     async def stop_background_tasks(self):
         """Stops all background tasks and connections gracefully."""
diff --git a/pyproject.toml b/pyproject.toml
@@ -26,6 +26,7 @@ dependencies = [
     "uvicorn (>=0.35.0,<0.36.0)",
     "ddgs (>=9.0.2,<10.0.0)",
     "fastmcp>=2.11.3,<3.0.0",
+    "httpx (>=0.27.0,<0.28.0)",
     "discord-py (>=2.5.2,<3.0.0)",
 ]
 

Original file line number	Diff line number	Diff line change
`@@ -26,6 +26,7 @@ dependencies = [`
`26`	`26`	`"uvicorn (>=0.35.0,<0.36.0)",`
`27`	`27`	`"ddgs (>=9.0.2,<10.0.0)",`
`28`	`28`	`"fastmcp>=2.11.3,<3.0.0",`
	`29`	`+ "httpx (>=0.27.0,<0.28.0)",`
`29`	`30`	`"discord-py (>=2.5.2,<3.0.0)",`
`30`	`31`	`]`
`31`	`32`