AOSSIE-Org · smokeyScraper · Sep 29, 2025 · Aug 24, 2025 · Aug 26, 2025 · Aug 26, 2025
diff --git a/backend/.env.example b/backend/.env.example
@@ -9,6 +9,8 @@ DISCORD_BOT_TOKEN=
 # ENABLE_DISCORD_BOT=true
 
 GITHUB_TOKEN=
+# Add Org Name here
+GITHUB_ORG=
 
 # EMBEDDING_MODEL=BAAI/bge-small-en-v1.5
 # EMBEDDING_MAX_BATCH_SIZE=32

diff --git a/backend/app/agents/devrel/github/github_toolkit.py b/backend/app/agents/devrel/github/github_toolkit.py
@@ -1,18 +1,28 @@
 import logging
+import os
+import json
+import re
 from typing import Dict, Any
 from langchain_google_genai import ChatGoogleGenerativeAI
 from langchain_core.messages import HumanMessage
 from app.core.config import settings
 from .prompts.intent_analysis import GITHUB_INTENT_ANALYSIS_PROMPT
 from .tools.search import handle_web_search
-# TODO: Implement all tools
+from .tools.github_support import handle_github_supp
 from .tools.contributor_recommendation import handle_contributor_recommendation
-# from .tools.repository_query import handle_repo_query
-# from .tools.issue_creation import handle_issue_creation
-# from .tools.documentation_generation import handle_documentation_generation
 from .tools.general_github_help import handle_general_github_help
+
 logger = logging.getLogger(__name__)
 
+DEFAULT_ORG = os.getenv("GITHUB_ORG")
+
+
+def normalize_org(org_from_user: str = None) -> str:
+    """Fallback to env org if user does not specify one."""
+    if org_from_user and org_from_user.strip():
+        return org_from_user.strip()
+    return DEFAULT_ORG
+
 
 class GitHubToolkit:
     """
@@ -32,30 +42,37 @@ def __init__(self):
             "web_search",
             "contributor_recommendation",
             "repo_support",
+            "github_support",
             "issue_creation",
             "documentation_generation",
             "find_good_first_issues",
             "general_github_help"
         ]
 
     async def classify_intent(self, user_query: str) -> Dict[str, Any]:
-        """
-        Classify intent and return classification with reasoning.
-
-        Args:
-            user_query: The user's request or question
-
-        Returns:
-            Dictionary containing classification, reasoning, and confidence
-        """
+        """Classify intent and return classification with reasoning."""
         logger.info(f"Classifying intent for query: {user_query[:100]}")
 
         try:
             prompt = GITHUB_INTENT_ANALYSIS_PROMPT.format(user_query=user_query)
             response = await self.llm.ainvoke([HumanMessage(content=prompt)])
 
-            import json
-            result = json.loads(response.content.strip())
+            content = response.content.strip()
+
+            try:
+                result = json.loads(content)
+            except json.JSONDecodeError:
+                match = re.search(r"\{.*\}", content, re.DOTALL)
+                if match:
+                    result = json.loads(match.group())
+                else:
+                    logger.error(f"Invalid JSON in LLM response: {content}")
+                    return {
+                        "classification": "general_github_help",
+                        "reasoning": "Failed to parse LLM response as JSON",
+                        "confidence": "low",
+                        "query": user_query
+                    }
 
             classification = result.get("classification")
             if classification not in self.tools:
@@ -65,21 +82,12 @@ async def classify_intent(self, user_query: str) -> Dict[str, Any]:
 
             result["query"] = user_query
 
-            logger.info(f"Classified intent as for query: {user_query} is: {classification}")
+            logger.info(f"Classified intent for query: {user_query} -> {classification}")
             logger.info(f"Reasoning: {result.get('reasoning', 'No reasoning provided')}")
             logger.info(f"Confidence: {result.get('confidence', 'unknown')}")
 
             return result
 
-        except json.JSONDecodeError as e:
-            logger.error(f"Error parsing JSON response from LLM: {str(e)}")
-            logger.error(f"Raw response: {response.content}")
-            return {
-                "classification": "general_github_help",
-                "reasoning": f"Failed to parse LLM response: {str(e)}",
-                "confidence": "low",
-                "query": user_query
-            }
         except Exception as e:
             logger.error(f"Error in intent classification: {str(e)}")
             return {
@@ -90,9 +98,7 @@ async def classify_intent(self, user_query: str) -> Dict[str, Any]:
             }
 
     async def execute(self, query: str) -> Dict[str, Any]:
-        """
-        Main execution method - classifies intent and delegates to appropriate tools
-        """
+        """Main execution method - classifies intent and delegates to appropriate tools"""
         logger.info(f"Executing GitHub toolkit for query: {query[:100]}")
 
         try:
@@ -103,15 +109,16 @@ async def execute(self, query: str) -> Dict[str, Any]:
 
             if classification == "contributor_recommendation":
                 result = await handle_contributor_recommendation(query)
+            elif classification == "github_support":
+                org = normalize_org()
+                result = await handle_github_supp(query, org=org)
+                result["org_used"] = org
             elif classification == "repo_support":
                 result = "Not implemented"
-                # result = await handle_repo_query(query)
             elif classification == "issue_creation":
                 result = "Not implemented"
-                # result = await handle_issue_creation(query)
             elif classification == "documentation_generation":
                 result = "Not implemented"
-                # result = await handle_documentation_generation(query)
             elif classification == "web_search":
                 result = await handle_web_search(query)
             else:

diff --git a/backend/app/agents/devrel/github/prompts/intent_analysis.py b/backend/app/agents/devrel/github/prompts/intent_analysis.py
@@ -1,8 +1,9 @@
 GITHUB_INTENT_ANALYSIS_PROMPT = """You are an expert GitHub DevRel AI assistant. Analyze the user query and classify the intent.
 
 AVAILABLE FUNCTIONS:
-- web_search: Search the web for information  
-- contributor_recommendation: Finding the right people to review PRs, assign issues, or collaborate (supports both issue URLs and general queries)
+- github_support: Questions about repository information, structure, stats, issues, stars, forks, description, or any repository metadata
+- web_search: Search the web for general information
+- contributor_recommendation: Finding the right people to review PRs, assign issues, or collaborate
 - repo_support: Questions about codebase structure, dependencies, impact analysis, architecture
 - issue_creation: Creating bug reports, feature requests, or tracking items
 - documentation_generation: Generating docs, READMEs, API docs, guides, or explanations
@@ -12,24 +13,37 @@
 USER QUERY: {user_query}
 
 Classification guidelines:
+- github_support: 
+  - ALWAYS classify as `github_support` if the query asks about:
+    - repository information
+    - stats (stars, forks, watchers, issues)
+    - open issues, closed issues, or "what issues"
+    - description, license, URL, metadata
+    - any question containing "<repo> repo", "repository", "repo", "issues in", "stars in", "forks in"
+  - Example queries:
+    - "What all issues are in Dev.ai repo?" → github_support
+    - "How many stars does Devr.AI repo have?" → github_support
+    - "Show me forks of Aossie-org/Dev.ai" → github_support
 - contributor_recommendation: 
   * "who should review this PR/issue?"
   * "find experts in React/Python/ML"
   * "recommend assignees for stripe integration"
   * "best people for database optimization"
   * URLs like github.com/owner/repo/issues/123
   * "I need help with RabbitMQ, can you suggest some people?"
-- repo_support: Code structure, dependencies, impact analysis, architecture  
+- repo_support: Code structure, dependencies, impact analysis, architecture
 - issue_creation: Creating bugs, features, tracking items
 - documentation_generation: Docs, READMEs, guides, explanations
 - find_good_first_issues: Beginners, newcomers, "good first issue"
-- web_search: General information needing external search
+- web_search: Only for information that cannot be found through GitHub API (like news, articles, external documentation)
 - general_github_help: General GitHub questions not covered above
 
+IMPORTANT: Repository information queries (issues count, stars, forks, description) should ALWAYS use github_support, not web_search.
+
 CRITICAL: Return ONLY raw JSON. No markdown, no code blocks, no explanation text.
 
 {{
   "classification": "function_name_from_list_above",
   "reasoning": "Brief explanation of why you chose this function",
   "confidence": "high|medium|low"
-}}"""
+}}"""
diff --git a/backend/app/agents/devrel/github/services/github_mcp_client.py b/backend/app/agents/devrel/github/services/github_mcp_client.py
@@ -0,0 +1,101 @@
+import logging
+import os
+from typing import Dict, Any, Optional, List, Union
+import aiohttp
+import asyncio
+
+logger = logging.getLogger(__name__)
+
+class GitHubMCPClient:
+    """Client for communicating with the GitHub MCP server."""
+
+    def __init__(self, mcp_server_url: str = "http://localhost:8001"):
+        self.mcp_server_url = mcp_server_url
+        self.session: Optional[aiohttp.ClientSession] = None
+        # Default org pulled from environment
+        self.org = os.getenv("GITHUB_ORG", "Aossie-org")
+
+    async def __aenter__(self):
+        # Async context manager entry
+        self.session = aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=15))
+        return self
+
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        # Async context manager exit
+        if self.session:
+            await self.session.close()
+
+    async def get_github_supp(self, repo: str, owner: Optional[str] = None) -> Dict[str, Any]:
+        """
+        Fetch metadata for a single repository.
+        Owner defaults to org from environment if not provided.
+        """
+        if not self.session:
+            raise RuntimeError("Client not initialized. Use async context manager.")
+
+        owner = owner or self.org 
+
+        try:
+            payload = {"owner": owner, "repo": repo}
+
+            async with self.session.post(
+                f"{self.mcp_server_url}/github_support",
+                json=payload,
+                headers={"Content-Type": "application/json"},
+            ) as response:
+                if response.status == 200:
+                    result = await response.json()
+                    if result.get("status") == "success":
+                        return result.get("data", {})
+                    else:
+                        return {"error": result.get("error", "Unknown error")}
+                else:
+                    logger.error(f"MCP server error: {response.status}")
+                    return {"error": f"MCP server error: {response.status}"}
+
+        except aiohttp.ClientError as e:
+            logger.exception("Error communicating with MCP server: %s", e)
+            return {"error": f"Communication error: {str(e)}"}
+        except Exception as e:
+            logger.exception("Unexpected error: %s", e)
+            return {"error": f"Unexpected error: {str(e)}"}
+
+    async def list_org_repos(self, org: str) -> Union[List[Dict[str, Any]], Dict[str, Any]]:
+        if not self.session:
+            raise RuntimeError("Client not initialized. Use async context manager.")
+
+        try:
+            payload = {"org": org}
+            async with self.session.post(
+                f"{self.mcp_server_url}/list_org_repos",
+                json=payload,
+                headers={"Content-Type": "application/json"},
+            ) as response:
+                if response.status == 200:
+                    result = await response.json()
+                    if result.get("status") == "success":
+                        return result.get("data", [])
+                    else:
+                        return {"error": result.get("error", "Unknown error")}
+                else:
+                    logger.error(f"MCP server error: {response.status}")
+                    return {"error": f"MCP server error: {response.status}"}
+        except aiohttp.ClientError as e:
+            logger.error(f"Error communicating with MCP server: {e}")
+            return {"error": f"Communication error: {str(e)}"}
+        except Exception as e:
+            logger.error(f"Unexpected error: {e}")
+            return {"error": f"Unexpected error: {str(e)}"}
+
+
+    async def is_server_available(self) -> bool:
+        """Health check for MCP server."""
+        if not self.session:
+            return False
+
+        try:
+            async with self.session.get(f"{self.mcp_server_url}/health", timeout=5) as response:
+                return response.status == 200
-            async with self.session.get(f"{self.mcp_server_url}/health", timeout=5) as response:
-                return response.status == 200
+             async with self.session.get(
+                f"{self.mcp_server_url}/health",
+                timeout=aiohttp.ClientTimeout(total=5),
+             ) as response:
+                 return response.status == 200
-            async with self.session.get(f"{self.mcp_server_url}/health", timeout=5) as response:
-                return response.status == 200
+             async with self.session.get(
+                f"{self.mcp_server_url}/health",
+                timeout=aiohttp.ClientTimeout(total=5),
+             ) as response:
+                 return response.status == 200
+        except (aiohttp.ClientError, asyncio.TimeoutError) as e:
+            logger.debug(f"Health check failed: {e}")
+            return False
diff --git a/backend/app/agents/devrel/github/services/github_mcp_server.py b/backend/app/agents/devrel/github/services/github_mcp_server.py
@@ -0,0 +1,91 @@
+import os
+import logging
+import asyncio
+from dotenv import load_dotenv, find_dotenv
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+from .github_mcp_service import GitHubMCPService
+from typing import Optional
+
+dotenv_path = find_dotenv(usecwd=True)
+if dotenv_path:
+    load_dotenv(dotenv_path=dotenv_path)
+else:
+    load_dotenv()
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+app = FastAPI(title="GitHub MCP Server", version="1.0.0")
+
+# Load env vars
+GITHUB_ORG = os.getenv("GITHUB_ORG")
+if not GITHUB_ORG:
+    logger.warning("GITHUB_ORG not set in .env — defaulting to manual owner input")
+
+github_service: Optional[GitHubMCPService] = None
+try:
+    token = os.getenv("GITHUB_TOKEN") or os.getenv("GH_TOKEN")
+    if not token:
+        logger.warning("GITHUB_TOKEN/GH_TOKEN not set; GitHub API calls may be rate-limited or fail.")
+    github_service = GitHubMCPService(token=token)
+    logger.info("GitHub service initialized successfully")
+except Exception as e:
+    logger.exception("Failed to initialize GitHub service")
+    github_service = None
+
+class RepoInfoRequest(BaseModel):
+    repo: str 
+    owner: Optional[str] = None
+
+class RepoInfoResponse(BaseModel):
+    status: str
+    data: dict
+    error: str = None
+
+@app.get("/health")
+async def health_check():
+    """Health check endpoint"""
+    return {"status": "healthy", "service": "github-mcp"}
+
+class OrgInfoRequest(BaseModel):
+    org: str
+
+@app.post("/list_org_repos")
+async def list_org_repos(request: OrgInfoRequest):
+    try:
+        if not github_service:
+            raise HTTPException(status_code=503, detail="GitHub service not available")
+
+        result = await asyncio.to_thread(github_service.list_org_repos, request.org)
+
+        if "error" in result:
+            return {"status": "error", "data": {}, "error": result["error"]}
+
+        return {"status": "success", "data": result}
+
+    except Exception as e:
+        logger.exception("Error listing org repos")
+        raise HTTPException(status_code=500, detail=str(e))
+
+@app.post("/github_support")
+async def get_github_supp(request: RepoInfoRequest):
+    """Get repo details, using fixed org from env"""
+    if not github_service:
+        raise HTTPException(status_code=503, detail="GitHub service not available")
+    owner = request.owner or GITHUB_ORG
+    if not owner:
+        raise HTTPException(status_code=400, detail="Missing owner; provide 'owner' or set GITHUB_ORG")
+
+    try:
+        result = await asyncio.to_thread(github_service.repo_query, owner, request.repo)
+        if "error" in result:
+            return RepoInfoResponse(status="error", data={}, error=result["error"])
+        return RepoInfoResponse(status="success", data=result)
+    except Exception as e:
+        logger.exception("Error getting repo info")
+        raise HTTPException(status_code=500, detail=str(e))
+
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8001)