Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions backend/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ DISCORD_BOT_TOKEN=
# ENABLE_DISCORD_BOT=true

GITHUB_TOKEN=
# Add Org Name here
GITHUB_ORG=

# EMBEDDING_MODEL=BAAI/bge-small-en-v1.5
# EMBEDDING_MAX_BATCH_SIZE=32
Expand Down
37 changes: 36 additions & 1 deletion backend/app/agents/devrel/github/github_toolkit.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import logging
import os
from typing import Dict, Any
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.messages import HumanMessage
from app.core.config import settings
from .prompts.intent_analysis import GITHUB_INTENT_ANALYSIS_PROMPT
from .tools.search import handle_web_search
from .tools.github_support import handle_github_supp
# TODO: Implement all tools
from .tools.contributor_recommendation import handle_contributor_recommendation
# from .tools.repository_query import handle_repo_query
Expand All @@ -13,6 +15,16 @@
from .tools.general_github_help import handle_general_github_help
logger = logging.getLogger(__name__)

DEFAULT_ORG = os.getenv("GITHUB_ORG")

def normalize_org(org_from_user: str = None) -> str:
"""
Always fallback to env org if user does not specify one.
"""
if org_from_user and org_from_user.strip():
return org_from_user.strip()
return DEFAULT_ORG

Comment on lines +17 to +25
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

DEFAULT_ORG may be None; align env keys and guarantee a string.

Make DEFAULT_ORG robust and consistent with .env.

-DEFAULT_ORG = os.getenv("GITHUB_ORG")
+DEFAULT_ORG = (
+    os.getenv("GITHUB_ORG")
+    or os.getenv("GITHUB_DEFAULT_ORG")
+    or "Aossie-org"
+)
@@
-def normalize_org(org_from_user: str = None) -> str:
+from typing import Optional
+
+def normalize_org(org_from_user: Optional[str] = None) -> str:
@@
-    if org_from_user and org_from_user.strip():
-        return org_from_user.strip()
-    return DEFAULT_ORG
+    user_org = (org_from_user or "").strip() if org_from_user else ""
+    return user_org or DEFAULT_ORG
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
DEFAULT_ORG = os.getenv("GITHUB_ORG")
def normalize_org(org_from_user: str = None) -> str:
"""
Always fallback to env org if user does not specify one.
"""
if org_from_user and org_from_user.strip():
return org_from_user.strip()
return DEFAULT_ORG
DEFAULT_ORG = (
os.getenv("GITHUB_ORG")
or os.getenv("GITHUB_DEFAULT_ORG")
or "Aossie-org"
)
from typing import Optional
def normalize_org(org_from_user: Optional[str] = None) -> str:
"""
Always fallback to env org if user does not specify one.
"""
user_org = (org_from_user or "").strip() if org_from_user else ""
return user_org or DEFAULT_ORG
🧰 Tools
🪛 Ruff (0.12.2)

20-20: PEP 484 prohibits implicit Optional

Convert to T | None

(RUF013)

🤖 Prompt for AI Agents
In backend/app/agents/devrel/github/github_toolkit.py around lines 18 to 27,
DEFAULT_ORG can be None and may not match .env expectations — change its
initialization to read the exact .env key and guarantee a string (e.g.,
DEFAULT_ORG = os.getenv("GITHUB_ORG", "") then .strip()), and update
normalize_org to return DEFAULT_ORG (stripped) when the user value is empty so
the function always returns a string; ensure you trim whitespace on both sources
before returning.


class GitHubToolkit:
"""
Expand All @@ -32,6 +44,7 @@ def __init__(self):
"web_search",
"contributor_recommendation",
"repo_support",
"github_support",
"issue_creation",
"documentation_generation",
"find_good_first_issues",
Expand All @@ -55,7 +68,25 @@ async def classify_intent(self, user_query: str) -> Dict[str, Any]:
response = await self.llm.ainvoke([HumanMessage(content=prompt)])

import json
result = json.loads(response.content.strip())
import re

content = response.content.strip()

candidates = []
cb = re.search(r'```(?:json)?\s*({[\s\S]*?})\s*```', content, flags=re.IGNORECASE)
if cb:
candidates.append(cb.group(1))
candidates.extend(m.group(0) for m in re.finditer(r'\{[\s\S]*?\}', content))

result = None
for payload in candidates:
try:
result = json.loads(payload)
break
except json.JSONDecodeError:
continue
if result is None:
raise json.JSONDecodeError("No valid JSON object found in LLM response", content, 0)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you please elaborate on what this regex is for? seems to me like for extracting repo/org from the user query.
But won't it work without regex as used specifically?

A bit confused cuz github_support.py too has regex defined.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

actually this regex is different than github_support.py, github support regex extracts repo/org from user query but, this toolkit regex takes LLM response (react supervisor) and extract json payloads from it. without this the pipeline would fail if there is a slightly malformed response basically it is a safeguard for structured output parsing.

Copy link
Contributor

@smokeyScraper smokeyScraper Sep 23, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh I get it. But the previous JSON logic works pretty well. Did you face any case where this didn't work? Initially, I faced cuz the model used to output ```json{} somewhat like a structure, but later it was fixed after changing the prompt. So, I guess no need for this. Can you please revert this change @DhruvK278 ?


classification = result.get("classification")
if classification not in self.tools:
Expand Down Expand Up @@ -103,6 +134,10 @@ async def execute(self, query: str) -> Dict[str, Any]:

if classification == "contributor_recommendation":
result = await handle_contributor_recommendation(query)
elif classification == "github_support":
org = normalize_org()
result = await handle_github_supp(query, org=org)
result["org_used"] = org
elif classification == "repo_support":
result = "Not implemented"
# result = await handle_repo_query(query)
Expand Down
24 changes: 19 additions & 5 deletions backend/app/agents/devrel/github/prompts/intent_analysis.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
GITHUB_INTENT_ANALYSIS_PROMPT = """You are an expert GitHub DevRel AI assistant. Analyze the user query and classify the intent.

AVAILABLE FUNCTIONS:
- web_search: Search the web for information
- contributor_recommendation: Finding the right people to review PRs, assign issues, or collaborate (supports both issue URLs and general queries)
- github_support: Questions about repository information, structure, stats, issues, stars, forks, description, or any repository metadata
- web_search: Search the web for general information
- contributor_recommendation: Finding the right people to review PRs, assign issues, or collaborate
- repo_support: Questions about codebase structure, dependencies, impact analysis, architecture
- issue_creation: Creating bug reports, feature requests, or tracking items
- documentation_generation: Generating docs, READMEs, API docs, guides, or explanations
Expand All @@ -12,24 +13,37 @@
USER QUERY: {user_query}

Classification guidelines:
- github_support:
- ALWAYS classify as `github_support` if the query asks about:
- repository information
- stats (stars, forks, watchers, issues)
- open issues, closed issues, or "what issues"
- description, license, URL, metadata
- any question containing "<repo> repo", "repository", "repo", "issues in", "stars in", "forks in"
- Example queries:
- "What all issues are in Dev.ai repo?" → github_support
- "How many stars does Devr.AI repo have?" → github_support
- "Show me forks of Aossie-org/Dev.ai" → github_support
- contributor_recommendation:
* "who should review this PR/issue?"
* "find experts in React/Python/ML"
* "recommend assignees for stripe integration"
* "best people for database optimization"
* URLs like github.com/owner/repo/issues/123
* "I need help with RabbitMQ, can you suggest some people?"
- repo_support: Code structure, dependencies, impact analysis, architecture
- repo_support: Code structure, dependencies, impact analysis, architecture
- issue_creation: Creating bugs, features, tracking items
- documentation_generation: Docs, READMEs, guides, explanations
- find_good_first_issues: Beginners, newcomers, "good first issue"
- web_search: General information needing external search
- web_search: Only for information that cannot be found through GitHub API (like news, articles, external documentation)
- general_github_help: General GitHub questions not covered above

IMPORTANT: Repository information queries (issues count, stars, forks, description) should ALWAYS use github_support, not web_search.

CRITICAL: Return ONLY raw JSON. No markdown, no code blocks, no explanation text.

{{
"classification": "function_name_from_list_above",
"reasoning": "Brief explanation of why you chose this function",
"confidence": "high|medium|low"
}}"""
}}"""
105 changes: 105 additions & 0 deletions backend/app/agents/devrel/github/services/github_mcp_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import logging
import os
from typing import Dict, Any, Optional, List, Union
import aiohttp
import asyncio

logger = logging.getLogger(__name__)

class GitHubMCPClient:
"""
Client for communicating with the GitHub MCP server.
"""

def __init__(self, mcp_server_url: str = "http://localhost:8001"):
self.mcp_server_url = mcp_server_url
self.session: Optional[aiohttp.ClientSession] = None
# Default org pulled from environment
self.org = os.getenv("GITHUB_ORG", "Aossie-org")

async def __aenter__(self):
# Async context manager entry
self.session = aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=15))
return self

async def __aexit__(self, exc_type, exc_val, exc_tb):
# Async context manager exit
if self.session:
await self.session.close()

async def get_github_supp(self, repo: str, owner: Optional[str] = None) -> Dict[str, Any]:
"""
Fetch metadata for a single repository.
Owner defaults to org from environment if not provided.
"""
if not self.session:
raise RuntimeError("Client not initialized. Use async context manager.")

owner = owner or self.org

try:
payload = {"owner": owner, "repo": repo}

async with self.session.post(
f"{self.mcp_server_url}/github_support",
json=payload,
headers={"Content-Type": "application/json"},
) as response:
if response.status == 200:
result = await response.json()
if result.get("status") == "success":
return result.get("data", {})
else:
return {"error": result.get("error", "Unknown error")}
else:
logger.error(f"MCP server error: {response.status}")
return {"error": f"MCP server error: {response.status}"}

except aiohttp.ClientError as e:
logger.exception("Error communicating with MCP server: %s", e)
return {"error": f"Communication error: {str(e)}"}
except Exception as e:
logger.exception("Unexpected error: %s", e)
return {"error": f"Unexpected error: {str(e)}"}

async def list_org_repos(self, org: str) -> Union[List[Dict[str, Any]], Dict[str, Any]]:
if not self.session:
raise RuntimeError("Client not initialized. Use async context manager.")

try:
payload = {"org": org}
async with self.session.post(
f"{self.mcp_server_url}/list_org_repos",
json=payload,
headers={"Content-Type": "application/json"},
) as response:
if response.status == 200:
result = await response.json()
if result.get("status") == "success":
return result.get("data", [])
else:
return {"error": result.get("error", "Unknown error")}
else:
logger.error(f"MCP server error: {response.status}")
return {"error": f"MCP server error: {response.status}"}
except aiohttp.ClientError as e:
logger.error(f"Error communicating with MCP server: {e}")
return {"error": f"Communication error: {str(e)}"}
except Exception as e:
logger.error(f"Unexpected error: {e}")
return {"error": f"Unexpected error: {str(e)}"}


async def is_server_available(self) -> bool:
"""
Health check for MCP server.
"""
if not self.session:
return False

try:
async with self.session.get(f"{self.mcp_server_url}/health", timeout=5) as response:
return response.status == 200
Comment on lines +97 to +98
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Request timeout misuse: aiohttp expects ClientTimeout, not an int.

Passing timeout=5 raises TypeError at runtime. Use ClientTimeout or rely on the session default.

-            async with self.session.get(f"{self.mcp_server_url}/health", timeout=5) as response:
+            async with self.session.get(
+                f"{self.mcp_server_url}/health",
+                timeout=aiohttp.ClientTimeout(total=5),
+            ) as response:
                 return response.status == 200
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
async with self.session.get(f"{self.mcp_server_url}/health", timeout=5) as response:
return response.status == 200
async with self.session.get(
f"{self.mcp_server_url}/health",
timeout=aiohttp.ClientTimeout(total=5),
) as response:
return response.status == 200
🤖 Prompt for AI Agents
In backend/app/agents/devrel/github/services/github_mcp_client.py around lines
70 to 71, the call to self.session.get(..., timeout=5) is incorrect because
aiohttp expects a ClientTimeout object (or no timeout) not an int; update the
call to pass aiohttp.ClientTimeout(total=5) by importing ClientTimeout from
aiohttp (or remove the timeout argument to use the session default) so the
request uses a proper ClientTimeout instance and avoids the TypeError.

except (aiohttp.ClientError, asyncio.TimeoutError) as e:
logger.debug(f"Health check failed: {e}")
return False
91 changes: 91 additions & 0 deletions backend/app/agents/devrel/github/services/github_mcp_server.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
import os
import logging
import asyncio
from dotenv import load_dotenv, find_dotenv
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from .github_mcp_service import GitHubMCPService
from typing import Optional

dotenv_path = find_dotenv(usecwd=True)
if dotenv_path:
load_dotenv(dotenv_path=dotenv_path)
else:
load_dotenv()

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

app = FastAPI(title="GitHub MCP Server", version="1.0.0")

# Load env vars
GITHUB_ORG = os.getenv("GITHUB_ORG")
if not GITHUB_ORG:
logger.warning("GITHUB_ORG not set in .env — defaulting to manual owner input")

github_service: Optional[GitHubMCPService] = None
try:
token = os.getenv("GITHUB_TOKEN") or os.getenv("GH_TOKEN")
if not token:
logger.warning("GITHUB_TOKEN/GH_TOKEN not set; GitHub API calls may be rate-limited or fail.")
github_service = GitHubMCPService(token=token)
logger.info("GitHub service initialized successfully")
except Exception as e:
logger.exception("Failed to initialize GitHub service")
github_service = None

class RepoInfoRequest(BaseModel):
repo: str
owner: Optional[str] = None

class RepoInfoResponse(BaseModel):
status: str
data: dict
error: str = None

@app.get("/health")
async def health_check():
"""Health check endpoint"""
return {"status": "healthy", "service": "github-mcp"}

class OrgInfoRequest(BaseModel):
org: str

@app.post("/list_org_repos")
async def list_org_repos(request: OrgInfoRequest):
try:
if not github_service:
raise HTTPException(status_code=500, detail="GitHub service not available")

result = github_service.list_org_repos(request.org)

if "error" in result:
return {"status": "error", "data": {}, "error": result["error"]}

return {"status": "success", "data": result}

except Exception as e:
logger.exception("Error listing org repos")
raise HTTPException(status_code=500, detail=str(e))

@app.post("/github_support")
async def get_github_supp(request: RepoInfoRequest):
"""Get repo details, using fixed org from env"""
if not github_service:
raise HTTPException(status_code=503, detail="GitHub service not available")
owner = request.owner or GITHUB_ORG
if not owner:
raise HTTPException(status_code=400, detail="Missing owner; provide 'owner' or set GITHUB_ORG")

try:
result = await asyncio.to_thread(github_service.repo_query, owner, request.repo)
if "error" in result:
return RepoInfoResponse(status="error", data={}, error=result["error"])
return RepoInfoResponse(status="success", data=result)
except Exception as e:
logger.exception("Error getting repo info")
raise HTTPException(status_code=500, detail=str(e))

if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8001)
Loading