Skip to content

Commit 2bf5fe2

Browse files
committed
Enable documentation generation and add repo content endpoints
Uncommented and integrated the documentation generation tool in the GitHub toolkit. Added new endpoints and service methods to fetch repository content via the MCP server, including non-recursive retrieval of key files from the repo root. Improved error handling and logging throughout. Enhanced Weaviate connection logic with retries on startup. Updated dependencies to include httpx.
1 parent ea97cbe commit 2bf5fe2

File tree

7 files changed

+250
-20
lines changed

7 files changed

+250
-20
lines changed

backend/app/agents/devrel/github/github_toolkit.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from .tools.contributor_recommendation import handle_contributor_recommendation
1111
# from .tools.repository_query import handle_repo_query
1212
# from .tools.issue_creation import handle_issue_creation
13-
# from .tools.documentation_generation import handle_documentation_generation
13+
from .tools.documentation_generation import handle_documentation_generation
1414
from .tools.general_github_help import handle_general_github_help
1515
logger = logging.getLogger(__name__)
1616

@@ -124,13 +124,11 @@ async def execute(self, query: str) -> Dict[str, Any]:
124124
result = await handle_contributor_recommendation(query)
125125
elif classification == "repo_support":
126126
result = await handle_repo_query(query)
127-
# result = await handle_repo_query(query)
128127
elif classification == "issue_creation":
129128
result = "Not implemented"
130129
# result = await handle_issue_creation(query)
131130
elif classification == "documentation_generation":
132-
result = "Not implemented"
133-
# result = await handle_documentation_generation(query)
131+
result = await handle_documentation_generation(query)
134132
elif classification == "web_search":
135133
result = await handle_web_search(query)
136134
else:

backend/app/agents/devrel/github/services/github_mcp_client.py

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,17 +6,21 @@
66
logger = logging.getLogger(__name__)
77

88
class GitHubMCPClient:
9+
"""Client for communicating with the GitHub MCP server."""
910

10-
#Client for communicating with the GitHub MCP server.
11-
12-
def __init__(self, mcp_server_url: str = "http://localhost:8001"):
13-
11+
def __init__(self, mcp_server_url: str = "http://localhost:8001", timeout: int = 15):
12+
"""
13+
Initializes the client.
14+
Args:
15+
mcp_server_url: The URL of the MCP server.
16+
timeout: The total timeout in seconds for client requests.
17+
"""
1418
self.mcp_server_url = mcp_server_url
19+
self.timeout = timeout # Store the timeout value
1520
self.session: Optional[aiohttp.ClientSession] = None
1621

1722
async def __aenter__(self):
18-
# Async context manager entry
19-
self.session = aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=15))
23+
self.session = aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=self.timeout))
2024
return self
2125

2226
async def __aexit__(self, exc_type, exc_val, exc_tb):
@@ -68,4 +72,20 @@ async def is_server_available(self) -> bool:
6872

6973
except (aiohttp.ClientError, asyncio.TimeoutError) as e:
7074
logger.debug(f"Health check failed: {e}")
71-
return False
75+
return False
76+
77+
async def get_repo_content(self, owner: str, repo: str) -> Dict[str, Any]:
78+
"""Fetches repository content via the MCP server."""
79+
if not self.session:
80+
raise RuntimeError("Client not initialized. Use async context manager.")
81+
82+
payload = {"owner": owner, "repo": repo}
83+
try:
84+
async with self.session.post(
85+
f"{self.mcp_server_url}/repo_content", json=payload
86+
) as response:
87+
response.raise_for_status()
88+
return await response.json()
89+
except aiohttp.ClientError as e:
90+
logger.error(f"Error communicating with MCP server for repo content: {e}")
91+
return {"error": f"Communication error: {str(e)}"}

backend/app/agents/devrel/github/services/github_mcp_server.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,3 +94,21 @@ async def get_repo_info(request: RepoInfoRequest):
9494
if __name__ == "__main__":
9595
import uvicorn
9696
uvicorn.run(app, host="0.0.0.0", port=8001)
97+
98+
@app.post("/repo_content")
99+
async def get_repo_content(request: RepoInfoRequest):
100+
"""Endpoint to get the content of a repository."""
101+
try:
102+
if not github_service:
103+
raise HTTPException(status_code=500, detail="GitHub service not available")
104+
105+
result = github_service.get_repo_content(request.owner, request.repo)
106+
107+
if "error" in result:
108+
return {"status": "error", "error": result["error"]}
109+
110+
return result
111+
112+
except Exception as e:
113+
logger.error(f"Error getting repo content: {e}")
114+
raise HTTPException(status_code=500, detail=str(e))

backend/app/agents/devrel/github/services/github_mcp_service.py

Lines changed: 63 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,4 +61,66 @@ def repo_query(self, owner: str, repo: str) -> dict:
6161
"created_at": data.get("created_at"),
6262
"updated_at": data.get("updated_at"),
6363
"pushed_at": data.get("pushed_at"),
64-
}
64+
}
65+
66+
def get_repo_content(self, owner: str, repo: str) -> dict:
67+
"""
68+
Fetches the content of key files from the repository's root directory.
69+
This non-recursive approach is much faster and avoids timeouts.
70+
"""
71+
print(f"SERVICE: Getting repo content for {owner}/{repo}")
72+
73+
contents_url = f"{self.base_url}/repos/{owner}/{repo}/contents/"
74+
headers = {
75+
"Authorization": f"Bearer {self.token}",
76+
"Accept": "application/vnd.github+json",
77+
}
78+
79+
try:
80+
contents_resp = requests.get(contents_url, headers=headers, timeout=15)
81+
contents_resp.raise_for_status()
82+
contents_data = contents_resp.json()
83+
except requests.exceptions.RequestException as e:
84+
print(f"SERVICE ERROR: Failed to fetch repository contents: {e}")
85+
return {"error": "Failed to fetch repository contents", "message": str(e)}
86+
87+
files_to_download = []
88+
key_files = [
89+
"readme.md", "pyproject.toml", "requirements.txt", "package.json",
90+
"dockerfile", "main.py", "app.py", "index.js"
91+
]
92+
for item in contents_data:
93+
if item.get("type") == "file" and item.get("name").lower() in key_files:
94+
files_to_download.append(item)
95+
96+
if not files_to_download:
97+
print("SERVICE INFO: No key files found in root. Grabbing first 5 files.")
98+
files_to_download = [item for item in contents_data if item.get("type") == "file"][:5]
99+
100+
if not files_to_download:
101+
print("SERVICE ERROR: No files found in the root directory.")
102+
return {"error": "No files found in the root directory to generate documentation from."}
103+
104+
print(f"SERVICE INFO: Found {len(files_to_download)} files to download.")
105+
106+
files = []
107+
for item in files_to_download:
108+
try:
109+
download_url = item.get("download_url")
110+
if not download_url:
111+
continue
112+
113+
file_resp = requests.get(download_url, headers=headers, timeout=15)
114+
file_resp.raise_for_status()
115+
116+
content = file_resp.text
117+
files.append({"name": item["name"], "content": content})
118+
print(f"SERVICE INFO: Successfully downloaded {item['name']}")
119+
120+
except requests.exceptions.RequestException as e:
121+
print(f"SERVICE WARNING: Skipping file {item['name']}: {e}")
122+
123+
if not files:
124+
return {"error": "Could not download content from any key files."}
125+
126+
return {"status": "success", "files": files}
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,117 @@
1+
import logging
2+
import re
3+
import asyncio
4+
import aiohttp
5+
from typing import Dict, Any, List
6+
from langchain_google_genai import ChatGoogleGenerativeAI
7+
from langchain_core.messages import HumanMessage
8+
from app.core.config import settings
19

10+
logger = logging.getLogger(__name__)
11+
12+
llm = ChatGoogleGenerativeAI(
13+
model=settings.github_agent_model,
14+
temperature=0.2,
15+
google_api_key=settings.gemini_api_key,
16+
)
17+
18+
OWNER_REPO_RE = re.compile(
19+
r'\b([A-Za-z0-9](?:-?[A-Za-z0-9]){0,38})/([A-Za-z0-9._-]{1,100})\b'
20+
)
21+
22+
async def fetch_file_content(session: aiohttp.ClientSession, url: str, headers: dict) -> str:
23+
"""Asynchronously fetches content from a given URL."""
24+
try:
25+
async with session.get(url, headers=headers) as response:
26+
response.raise_for_status()
27+
return await response.text()
28+
except aiohttp.ClientError as e:
29+
logger.warning(f"Skipping file at {url} due to error: {e}")
30+
return ""
31+
32+
async def handle_documentation_generation(query: str) -> Dict[str, Any]:
33+
"""
34+
Handles the documentation generation tool by asynchronously fetching repository
35+
content directly from the GitHub API.
36+
"""
37+
logger.info(f"Generating documentation for query: {query}")
38+
39+
match = OWNER_REPO_RE.search(query)
40+
if not match:
41+
return {
42+
"status": "error",
43+
"message": "Could not parse repository owner and name. Please use the format 'owner/repo'.",
44+
}
45+
owner, repo = match.group(1), match.group(2)
46+
logger.info(f"Parsed repository: {owner}/{repo}")
47+
48+
base_url = "https://api.github.com"
49+
contents_url = f"{base_url}/repos/{owner}/{repo}/contents/"
50+
token = settings.github_token
51+
headers = {
52+
"Authorization": f"Bearer {token}",
53+
"Accept": "application/vnd.github+json",
54+
}
55+
56+
try:
57+
async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=60)) as session:
58+
async with session.get(contents_url, headers=headers) as response:
59+
response.raise_for_status()
60+
contents_data = await response.json()
61+
62+
files_to_download: List[Dict[str, Any]] = []
63+
key_files = ["readme.md", "pyproject.toml", "requirements.txt", "package.json", "dockerfile", "main.py", "app.py", "index.js"]
64+
for item in contents_data:
65+
if item.get("type") == "file" and item.get("name").lower() in key_files:
66+
files_to_download.append(item)
67+
68+
if not files_to_download:
69+
files_to_download = [item for item in contents_data if item.get("type") == "file"][:5]
70+
71+
if not files_to_download:
72+
return {"status": "error", "message": "No files found in the root directory."}
73+
74+
# async tasks to download all files in parallel
75+
tasks = [fetch_file_content(session, item["download_url"], headers) for item in files_to_download]
76+
file_contents_list = await asyncio.gather(*tasks)
77+
78+
# Combine file names with their content
79+
files = [
80+
{"name": item["name"], "content": content}
81+
for item, content in zip(files_to_download, file_contents_list) if content
82+
]
83+
84+
if not files:
85+
return {"status": "error", "message": "Could not retrieve content from any key files."}
86+
87+
except aiohttp.ClientError as e:
88+
logger.error(f"GitHub API request failed: {e}")
89+
return {"status": "error", "message": f"An error occurred while fetching repository data: {e}"}
90+
except Exception as e:
91+
logger.error(f"An unexpected error occurred: {e}")
92+
return {"status": "error", "message": f"An unexpected error occurred: {e}"}
93+
94+
# Generate documentation with the language model
95+
try:
96+
file_contents_str = "\n\n".join(
97+
f"File: {file['name']}\n\n```\n{file['content']}\n```"
98+
for file in files
99+
)
100+
101+
prompt = f"""
102+
Generate comprehensive documentation in Markdown for the repository: {owner}/{repo}.
103+
Based on the following files:
104+
{file_contents_str}
105+
Please generate a README.md file that includes:
106+
- A brief introduction to the project.
107+
- An overview of the key files and project structure.
108+
- Instructions on how to get started.
109+
"""
110+
response = await llm.ainvoke([HumanMessage(content=prompt)])
111+
documentation = response.content.strip()
112+
113+
return {"status": "success", "documentation": documentation}
114+
115+
except Exception as e:
116+
logger.error(f"Error generating documentation: {e}")
117+
return {"status": "error", "message": f"An error occurred during documentation generation: {e}"}

backend/main.py

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -61,14 +61,29 @@ async def start_background_tasks(self):
6161
raise
6262

6363
async def test_weaviate_connection(self):
64-
"""Test Weaviate connection during startup."""
65-
try:
66-
async with get_weaviate_client() as client:
67-
if await client.is_ready():
68-
logger.info("Weaviate connection successful and ready")
69-
except Exception as e:
70-
logger.error(f"Failed to connect to Weaviate: {e}")
71-
raise
64+
logger.info("Attempting to connect to Weaviate...")
65+
max_retries = 5
66+
retry_delay = 5
67+
68+
for attempt in range(max_retries):
69+
try:
70+
async with get_weaviate_client() as client:
71+
if await client.is_ready():
72+
logger.info("Successfully connected to Weaviate.")
73+
return
74+
75+
logger.warning("Weaviate client connected, but the service is not ready yet.")
76+
77+
except Exception as e:
78+
logger.warning(f"Attempt {attempt + 1}/{max_retries} failed to connect to Weaviate: {e}")
79+
80+
if attempt < max_retries - 1:
81+
logger.info(f"Retrying in {retry_delay} seconds...")
82+
await asyncio.sleep(retry_delay)
83+
else:
84+
logger.error("Could not connect to Weaviate after several retries. Please ensure Docker services are running and accessible.")
85+
raise ConnectionError("Failed to connect to Weaviate after multiple attempts.")
86+
7287

7388
async def stop_background_tasks(self):
7489
"""Stops all background tasks and connections gracefully."""

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ dependencies = [
2626
"uvicorn (>=0.35.0,<0.36.0)",
2727
"ddgs (>=9.0.2,<10.0.0)",
2828
"fastmcp>=2.11.3,<3.0.0",
29+
"httpx (>=0.27.0,<0.28.0)",
2930
"discord-py (>=2.5.2,<3.0.0)",
3031
]
3132

0 commit comments

Comments
 (0)