Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions backend/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ DISCORD_BOT_TOKEN=
# ENABLE_DISCORD_BOT=true

GITHUB_TOKEN=
# Add Org Name here
GITHUB_ORG=

# EMBEDDING_MODEL=BAAI/bge-small-en-v1.5
# EMBEDDING_MAX_BATCH_SIZE=32
Expand Down
69 changes: 38 additions & 31 deletions backend/app/agents/devrel/github/github_toolkit.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,28 @@
import logging
import os
import json
import re
from typing import Dict, Any
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.messages import HumanMessage
from app.core.config import settings
from .prompts.intent_analysis import GITHUB_INTENT_ANALYSIS_PROMPT
from .tools.search import handle_web_search
# TODO: Implement all tools
from .tools.github_support import handle_github_supp
from .tools.contributor_recommendation import handle_contributor_recommendation
# from .tools.repository_query import handle_repo_query
# from .tools.issue_creation import handle_issue_creation
# from .tools.documentation_generation import handle_documentation_generation
from .tools.general_github_help import handle_general_github_help

logger = logging.getLogger(__name__)

DEFAULT_ORG = os.getenv("GITHUB_ORG")


def normalize_org(org_from_user: str = None) -> str:
"""Fallback to env org if user does not specify one."""
if org_from_user and org_from_user.strip():
return org_from_user.strip()
return DEFAULT_ORG


class GitHubToolkit:
"""
Expand All @@ -32,30 +42,37 @@ def __init__(self):
"web_search",
"contributor_recommendation",
"repo_support",
"github_support",
"issue_creation",
"documentation_generation",
"find_good_first_issues",
"general_github_help"
]

async def classify_intent(self, user_query: str) -> Dict[str, Any]:
"""
Classify intent and return classification with reasoning.

Args:
user_query: The user's request or question

Returns:
Dictionary containing classification, reasoning, and confidence
"""
"""Classify intent and return classification with reasoning."""
logger.info(f"Classifying intent for query: {user_query[:100]}")

try:
prompt = GITHUB_INTENT_ANALYSIS_PROMPT.format(user_query=user_query)
response = await self.llm.ainvoke([HumanMessage(content=prompt)])

import json
result = json.loads(response.content.strip())
content = response.content.strip()

try:
result = json.loads(content)
except json.JSONDecodeError:
match = re.search(r"\{.*\}", content, re.DOTALL)
if match:
result = json.loads(match.group())
else:
logger.error(f"Invalid JSON in LLM response: {content}")
return {
"classification": "general_github_help",
"reasoning": "Failed to parse LLM response as JSON",
"confidence": "low",
"query": user_query
}

classification = result.get("classification")
if classification not in self.tools:
Expand All @@ -65,21 +82,12 @@ async def classify_intent(self, user_query: str) -> Dict[str, Any]:

result["query"] = user_query

logger.info(f"Classified intent as for query: {user_query} is: {classification}")
logger.info(f"Classified intent for query: {user_query} -> {classification}")
logger.info(f"Reasoning: {result.get('reasoning', 'No reasoning provided')}")
logger.info(f"Confidence: {result.get('confidence', 'unknown')}")

return result

except json.JSONDecodeError as e:
logger.error(f"Error parsing JSON response from LLM: {str(e)}")
logger.error(f"Raw response: {response.content}")
return {
"classification": "general_github_help",
"reasoning": f"Failed to parse LLM response: {str(e)}",
"confidence": "low",
"query": user_query
}
except Exception as e:
logger.error(f"Error in intent classification: {str(e)}")
return {
Expand All @@ -90,9 +98,7 @@ async def classify_intent(self, user_query: str) -> Dict[str, Any]:
}

async def execute(self, query: str) -> Dict[str, Any]:
"""
Main execution method - classifies intent and delegates to appropriate tools
"""
"""Main execution method - classifies intent and delegates to appropriate tools"""
logger.info(f"Executing GitHub toolkit for query: {query[:100]}")

try:
Expand All @@ -103,15 +109,16 @@ async def execute(self, query: str) -> Dict[str, Any]:

if classification == "contributor_recommendation":
result = await handle_contributor_recommendation(query)
elif classification == "github_support":
org = normalize_org()
result = await handle_github_supp(query, org=org)
result["org_used"] = org
elif classification == "repo_support":
result = "Not implemented"
# result = await handle_repo_query(query)
elif classification == "issue_creation":
result = "Not implemented"
# result = await handle_issue_creation(query)
elif classification == "documentation_generation":
result = "Not implemented"
# result = await handle_documentation_generation(query)
elif classification == "web_search":
result = await handle_web_search(query)
else:
Expand Down
24 changes: 19 additions & 5 deletions backend/app/agents/devrel/github/prompts/intent_analysis.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
GITHUB_INTENT_ANALYSIS_PROMPT = """You are an expert GitHub DevRel AI assistant. Analyze the user query and classify the intent.

AVAILABLE FUNCTIONS:
- web_search: Search the web for information
- contributor_recommendation: Finding the right people to review PRs, assign issues, or collaborate (supports both issue URLs and general queries)
- github_support: Questions about repository information, structure, stats, issues, stars, forks, description, or any repository metadata
- web_search: Search the web for general information
- contributor_recommendation: Finding the right people to review PRs, assign issues, or collaborate
- repo_support: Questions about codebase structure, dependencies, impact analysis, architecture
- issue_creation: Creating bug reports, feature requests, or tracking items
- documentation_generation: Generating docs, READMEs, API docs, guides, or explanations
Expand All @@ -12,24 +13,37 @@
USER QUERY: {user_query}

Classification guidelines:
- github_support:
- ALWAYS classify as `github_support` if the query asks about:
- repository information
- stats (stars, forks, watchers, issues)
- open issues, closed issues, or "what issues"
- description, license, URL, metadata
- any question containing "<repo> repo", "repository", "repo", "issues in", "stars in", "forks in"
- Example queries:
- "What all issues are in Dev.ai repo?" → github_support
- "How many stars does Devr.AI repo have?" → github_support
- "Show me forks of Aossie-org/Dev.ai" → github_support
- contributor_recommendation:
* "who should review this PR/issue?"
* "find experts in React/Python/ML"
* "recommend assignees for stripe integration"
* "best people for database optimization"
* URLs like github.com/owner/repo/issues/123
* "I need help with RabbitMQ, can you suggest some people?"
- repo_support: Code structure, dependencies, impact analysis, architecture
- repo_support: Code structure, dependencies, impact analysis, architecture
- issue_creation: Creating bugs, features, tracking items
- documentation_generation: Docs, READMEs, guides, explanations
- find_good_first_issues: Beginners, newcomers, "good first issue"
- web_search: General information needing external search
- web_search: Only for information that cannot be found through GitHub API (like news, articles, external documentation)
- general_github_help: General GitHub questions not covered above

IMPORTANT: Repository information queries (issues count, stars, forks, description) should ALWAYS use github_support, not web_search.

CRITICAL: Return ONLY raw JSON. No markdown, no code blocks, no explanation text.

{{
"classification": "function_name_from_list_above",
"reasoning": "Brief explanation of why you chose this function",
"confidence": "high|medium|low"
}}"""
}}"""
101 changes: 101 additions & 0 deletions backend/app/agents/devrel/github/services/github_mcp_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
import logging
import os
from typing import Dict, Any, Optional, List, Union
import aiohttp
import asyncio

logger = logging.getLogger(__name__)

class GitHubMCPClient:
"""Client for communicating with the GitHub MCP server."""

def __init__(self, mcp_server_url: str = "http://localhost:8001"):
self.mcp_server_url = mcp_server_url
self.session: Optional[aiohttp.ClientSession] = None
# Default org pulled from environment
self.org = os.getenv("GITHUB_ORG", "Aossie-org")

async def __aenter__(self):
# Async context manager entry
self.session = aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=15))
return self

async def __aexit__(self, exc_type, exc_val, exc_tb):
# Async context manager exit
if self.session:
await self.session.close()

async def get_github_supp(self, repo: str, owner: Optional[str] = None) -> Dict[str, Any]:
"""
Fetch metadata for a single repository.
Owner defaults to org from environment if not provided.
"""
if not self.session:
raise RuntimeError("Client not initialized. Use async context manager.")

owner = owner or self.org

try:
payload = {"owner": owner, "repo": repo}

async with self.session.post(
f"{self.mcp_server_url}/github_support",
json=payload,
headers={"Content-Type": "application/json"},
) as response:
if response.status == 200:
result = await response.json()
if result.get("status") == "success":
return result.get("data", {})
else:
return {"error": result.get("error", "Unknown error")}
else:
logger.error(f"MCP server error: {response.status}")
return {"error": f"MCP server error: {response.status}"}

except aiohttp.ClientError as e:
logger.exception("Error communicating with MCP server: %s", e)
return {"error": f"Communication error: {str(e)}"}
except Exception as e:
logger.exception("Unexpected error: %s", e)
return {"error": f"Unexpected error: {str(e)}"}

async def list_org_repos(self, org: str) -> Union[List[Dict[str, Any]], Dict[str, Any]]:
if not self.session:
raise RuntimeError("Client not initialized. Use async context manager.")

try:
payload = {"org": org}
async with self.session.post(
f"{self.mcp_server_url}/list_org_repos",
json=payload,
headers={"Content-Type": "application/json"},
) as response:
if response.status == 200:
result = await response.json()
if result.get("status") == "success":
return result.get("data", [])
else:
return {"error": result.get("error", "Unknown error")}
else:
logger.error(f"MCP server error: {response.status}")
return {"error": f"MCP server error: {response.status}"}
except aiohttp.ClientError as e:
logger.error(f"Error communicating with MCP server: {e}")
return {"error": f"Communication error: {str(e)}"}
except Exception as e:
logger.error(f"Unexpected error: {e}")
return {"error": f"Unexpected error: {str(e)}"}


async def is_server_available(self) -> bool:
"""Health check for MCP server."""
if not self.session:
return False

try:
async with self.session.get(f"{self.mcp_server_url}/health", timeout=5) as response:
return response.status == 200
Comment on lines +97 to +98
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Request timeout misuse: aiohttp expects ClientTimeout, not an int.

Passing timeout=5 raises TypeError at runtime. Use ClientTimeout or rely on the session default.

-            async with self.session.get(f"{self.mcp_server_url}/health", timeout=5) as response:
+            async with self.session.get(
+                f"{self.mcp_server_url}/health",
+                timeout=aiohttp.ClientTimeout(total=5),
+            ) as response:
                 return response.status == 200
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
async with self.session.get(f"{self.mcp_server_url}/health", timeout=5) as response:
return response.status == 200
async with self.session.get(
f"{self.mcp_server_url}/health",
timeout=aiohttp.ClientTimeout(total=5),
) as response:
return response.status == 200
🤖 Prompt for AI Agents
In backend/app/agents/devrel/github/services/github_mcp_client.py around lines
70 to 71, the call to self.session.get(..., timeout=5) is incorrect because
aiohttp expects a ClientTimeout object (or no timeout) not an int; update the
call to pass aiohttp.ClientTimeout(total=5) by importing ClientTimeout from
aiohttp (or remove the timeout argument to use the session default) so the
request uses a proper ClientTimeout instance and avoids the TypeError.

except (aiohttp.ClientError, asyncio.TimeoutError) as e:
logger.debug(f"Health check failed: {e}")
return False
91 changes: 91 additions & 0 deletions backend/app/agents/devrel/github/services/github_mcp_server.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
import os
import logging
import asyncio
from dotenv import load_dotenv, find_dotenv
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from .github_mcp_service import GitHubMCPService
from typing import Optional

dotenv_path = find_dotenv(usecwd=True)
if dotenv_path:
load_dotenv(dotenv_path=dotenv_path)
else:
load_dotenv()

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

app = FastAPI(title="GitHub MCP Server", version="1.0.0")

# Load env vars
GITHUB_ORG = os.getenv("GITHUB_ORG")
if not GITHUB_ORG:
logger.warning("GITHUB_ORG not set in .env — defaulting to manual owner input")

github_service: Optional[GitHubMCPService] = None
try:
token = os.getenv("GITHUB_TOKEN") or os.getenv("GH_TOKEN")
if not token:
logger.warning("GITHUB_TOKEN/GH_TOKEN not set; GitHub API calls may be rate-limited or fail.")
github_service = GitHubMCPService(token=token)
logger.info("GitHub service initialized successfully")
except Exception as e:
logger.exception("Failed to initialize GitHub service")
github_service = None

class RepoInfoRequest(BaseModel):
repo: str
owner: Optional[str] = None

class RepoInfoResponse(BaseModel):
status: str
data: dict
error: str = None

@app.get("/health")
async def health_check():
"""Health check endpoint"""
return {"status": "healthy", "service": "github-mcp"}

class OrgInfoRequest(BaseModel):
org: str

@app.post("/list_org_repos")
async def list_org_repos(request: OrgInfoRequest):
try:
if not github_service:
raise HTTPException(status_code=503, detail="GitHub service not available")

result = await asyncio.to_thread(github_service.list_org_repos, request.org)

if "error" in result:
return {"status": "error", "data": {}, "error": result["error"]}

return {"status": "success", "data": result}

except Exception as e:
logger.exception("Error listing org repos")
raise HTTPException(status_code=500, detail=str(e))

@app.post("/github_support")
async def get_github_supp(request: RepoInfoRequest):
"""Get repo details, using fixed org from env"""
if not github_service:
raise HTTPException(status_code=503, detail="GitHub service not available")
owner = request.owner or GITHUB_ORG
if not owner:
raise HTTPException(status_code=400, detail="Missing owner; provide 'owner' or set GITHUB_ORG")

try:
result = await asyncio.to_thread(github_service.repo_query, owner, request.repo)
if "error" in result:
return RepoInfoResponse(status="error", data={}, error=result["error"])
return RepoInfoResponse(status="success", data=result)
except Exception as e:
logger.exception("Error getting repo info")
raise HTTPException(status_code=500, detail=str(e))

if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8001)
Loading