diff --git a/backend/examples/cli_research.py b/backend/examples/cli_research.py
index a086496b..fb83fe30 100644
--- a/backend/examples/cli_research.py
+++ b/backend/examples/cli_research.py
@@ -1,43 +1,44 @@
 import argparse
-from langchain_core.messages import HumanMessage
+import asyncio
+import os
+from dotenv import load_dotenv
 from agent.graph import graph
 
+load_dotenv()
 
-def main() -> None:
-    """Run the research agent from the command line."""
-    parser = argparse.ArgumentParser(description="Run the LangGraph research agent")
-    parser.add_argument("question", help="Research question")
+async def main():
+    # Setup command line arguments
+    parser = argparse.ArgumentParser(description="Local Research CLI")
+    parser.add_argument("topic", help="The topic to research")
     parser.add_argument(
-        "--initial-queries",
-        type=int,
-        default=3,
-        help="Number of initial search queries",
-    )
-    parser.add_argument(
-        "--max-loops",
-        type=int,
-        default=2,
-        help="Maximum number of research loops",
-    )
-    parser.add_argument(
-        "--reasoning-model",
-        default="gemini-2.5-pro-preview-05-06",
-        help="Model for the final answer",
+        "--dir", 
+        required=True, 
+        help="Path to the local directory containing .md files for research"
     )
     args = parser.parse_args()
 
-    state = {
-        "messages": [HumanMessage(content=args.question)],
-        "initial_search_query_count": args.initial_queries,
-        "max_research_loops": args.max_loops,
-        "reasoning_model": args.reasoning_model,
+    # Configure the graph with the local search directory
+    config = {
+        "configurable": {
+            "search_dir": args.dir,
+            "max_research_loops": 2 # Efficient looping for documentation analysis
+        }
     }
 
-    result = graph.invoke(state)
-    messages = result.get("messages", [])
-    if messages:
-        print(messages[-1].content)
+    # Initialize the research with the user's topic
+    inputs = {"messages": [("user", args.topic)]}
+    
+    print(f"Starting research in: {args.dir}")
+    print("-" * 30)
 
+    # Stream the results from the graph
+    async for event in graph.astream(inputs, config=config, stream_mode="values"):
+        message = event.get("messages")
+        if message:
+            if isinstance(message, list):
+                print(message[-1].content)
+            else:
+                print(message.content)
 
 if __name__ == "__main__":
-    main()
+    asyncio.run(main())
\ No newline at end of file
diff --git a/backend/pyproject.toml b/backend/pyproject.toml
index 09eb5988..cca2c566 100644
--- a/backend/pyproject.toml
+++ b/backend/pyproject.toml
@@ -11,13 +11,12 @@ requires-python = ">=3.11,<4.0"
 dependencies = [
     "langgraph>=0.2.6",
     "langchain>=0.3.19",
-    "langchain-google-genai",
+    "langchain-groq",
     "python-dotenv>=1.0.1",
     "langgraph-sdk>=0.1.57",
     "langgraph-cli",
     "langgraph-api",
     "fastapi",
-    "google-genai",
 ]
 
 
diff --git a/backend/src/agent/graph.py b/backend/src/agent/graph.py
index 0f19c3f2..66437570 100644
--- a/backend/src/agent/graph.py
+++ b/backend/src/agent/graph.py
@@ -1,13 +1,11 @@
 import os
-
 from agent.tools_and_schemas import SearchQueryList, Reflection
 from dotenv import load_dotenv
-from langchain_core.messages import AIMessage
+from langchain_core.messages import AIMessage, SystemMessage
 from langgraph.types import Send
-from langgraph.graph import StateGraph
-from langgraph.graph import START, END
+from langgraph.graph import StateGraph, START, END
 from langchain_core.runnables import RunnableConfig
-from google.genai import Client
+from langchain_groq import ChatGroq
 
 from agent.state import (
     OverallState,
@@ -19,156 +17,94 @@
 from agent.prompts import (
     get_current_date,
     query_writer_instructions,
-    web_searcher_instructions,
     reflection_instructions,
     answer_instructions,
 )
-from langchain_google_genai import ChatGoogleGenerativeAI
-from agent.utils import (
-    get_citations,
-    get_research_topic,
-    insert_citation_markers,
-    resolve_urls,
-)
+from agent.utils import get_research_topic
 
 load_dotenv()
 
-if os.getenv("GEMINI_API_KEY") is None:
-    raise ValueError("GEMINI_API_KEY is not set")
-
-# Used for Google Search API
-genai_client = Client(api_key=os.getenv("GEMINI_API_KEY"))
-
+# Initialize Groq LLM with Llama 3.3 70B
+llm = ChatGroq(
+    model="llama-3.3-70b-versatile",
+    temperature=0,
+    api_key=os.getenv("GROQ_API_KEY")
+)
 
-# Nodes
 def generate_query(state: OverallState, config: RunnableConfig) -> QueryGenerationState:
-    """LangGraph node that generates search queries based on the User's question.
-
-    Uses Gemini 2.0 Flash to create an optimized search queries for web research based on
-    the User's question.
-
-    Args:
-        state: Current graph state containing the User's question
-        config: Configuration for the runnable, including LLM provider settings
-
-    Returns:
-        Dictionary with state update, including search_query key containing the generated queries
-    """
+    """Generate search queries based on the user question."""
     configurable = Configuration.from_runnable_config(config)
-
-    # check for custom initial search query count
+    
     if state.get("initial_search_query_count") is None:
         state["initial_search_query_count"] = configurable.number_of_initial_queries
 
-    # init Gemini 2.0 Flash
-    llm = ChatGoogleGenerativeAI(
-        model=configurable.query_generator_model,
-        temperature=1.0,
-        max_retries=2,
-        api_key=os.getenv("GEMINI_API_KEY"),
-    )
     structured_llm = llm.with_structured_output(SearchQueryList)
-
-    # Format the prompt
-    current_date = get_current_date()
+    
     formatted_prompt = query_writer_instructions.format(
-        current_date=current_date,
+        current_date=get_current_date(),
         research_topic=get_research_topic(state["messages"]),
         number_queries=state["initial_search_query_count"],
     )
-    # Generate the search queries
+    
     result = structured_llm.invoke(formatted_prompt)
     return {"search_query": result.query}
 
-
-def continue_to_web_research(state: QueryGenerationState):
-    """LangGraph node that sends the search queries to the web research node.
-
-    This is used to spawn n number of web research nodes, one for each search query.
-    """
+def continue_to_local_research(state: QueryGenerationState):
+    """Route to parallel local search nodes."""
     return [
-        Send("web_research", {"search_query": search_query, "id": int(idx)})
-        for idx, search_query in enumerate(state["search_query"])
+        Send("local_research", {"search_query": q, "id": int(i)})
+        for i, q in enumerate(state["search_query"])
     ]
 
-
-def web_research(state: WebSearchState, config: RunnableConfig) -> OverallState:
-    """LangGraph node that performs web research using the native Google Search API tool.
-
-    Executes a web search using the native Google Search API tool in combination with Gemini 2.0 Flash.
-
-    Args:
-        state: Current graph state containing the search query and research loop count
-        config: Configuration for the runnable, including search API settings
-
-    Returns:
-        Dictionary with state update, including sources_gathered, research_loop_count, and web_research_results
-    """
-    # Configure
-    configurable = Configuration.from_runnable_config(config)
-    formatted_prompt = web_searcher_instructions.format(
-        current_date=get_current_date(),
-        research_topic=state["search_query"],
-    )
-
-    # Uses the google genai client as the langchain client doesn't return grounding metadata
-    response = genai_client.models.generate_content(
-        model=configurable.query_generator_model,
-        contents=formatted_prompt,
-        config={
-            "tools": [{"google_search": {}}],
-            "temperature": 0,
-        },
-    )
-    # resolve the urls to short urls for saving tokens and time
-    resolved_urls = resolve_urls(
-        response.candidates[0].grounding_metadata.grounding_chunks, state["id"]
-    )
-    # Gets the citations and adds them to the generated text
-    citations = get_citations(response, resolved_urls)
-    modified_text = insert_citation_markers(response.text, citations)
-    sources_gathered = [item for citation in citations for item in citation["segments"]]
+def local_research(state: WebSearchState, config: RunnableConfig) -> OverallState:
+    """Search for keywords within local markdown documentation."""
+    search_dir = config.get("configurable", {}).get("search_dir")
+    
+    if not search_dir:
+        return {"messages": [SystemMessage(content="Error: Search directory not provided.")]}
+
+    results = []
+    # Split the query into keywords for flexible matching
+    query_keywords = state.get("search_query", "").lower().split()
+
+    try:
+        for root, _, files in os.walk(search_dir):
+            for file in files:
+                # Target .md files for documentation research
+                if file.endswith(".md"):
+                    path = os.path.join(root, file)
+                    try:
+                        with open(path, 'r', encoding='utf-8', errors='ignore') as f:
+                            content = f.read()
+                            content_lower = content.lower()
+                            # Match if at least 2 keywords from the query exist in the file
+                            match_count = sum(1 for word in query_keywords if word in content_lower)
+                            if match_count >= 2: 
+                                # Using 2000 characters to capture enough code for comparison
+                                results.append(f"Source: {path}\nContent: {content[:2000]}\n")
+                    except:
+                        continue
+    except Exception as e:
+        return {"messages": [SystemMessage(content=f"FileSystem Error: {str(e)}")]}
+
+    final_content = "\n".join(results) if results else "No relevant information found."
 
     return {
-        "sources_gathered": sources_gathered,
+        "sources_gathered": [], 
         "search_query": [state["search_query"]],
-        "web_research_result": [modified_text],
+        "web_research_result": [final_content],
     }
 
-
 def reflection(state: OverallState, config: RunnableConfig) -> ReflectionState:
-    """LangGraph node that identifies knowledge gaps and generates potential follow-up queries.
-
-    Analyzes the current summary to identify areas for further research and generates
-    potential follow-up queries. Uses structured output to extract
-    the follow-up query in JSON format.
-
-    Args:
-        state: Current graph state containing the running summary and research topic
-        config: Configuration for the runnable, including LLM provider settings
-
-    Returns:
-        Dictionary with state update, including search_query key containing the generated follow-up query
-    """
-    configurable = Configuration.from_runnable_config(config)
-    # Increment the research loop count and get the reasoning model
+    """Analyze search results and identify knowledge gaps."""
     state["research_loop_count"] = state.get("research_loop_count", 0) + 1
-    reasoning_model = state.get("reasoning_model", configurable.reflection_model)
 
-    # Format the prompt
-    current_date = get_current_date()
     formatted_prompt = reflection_instructions.format(
-        current_date=current_date,
+        current_date=get_current_date(),
         research_topic=get_research_topic(state["messages"]),
         summaries="\n\n---\n\n".join(state["web_research_result"]),
     )
-    # init Reasoning Model
-    llm = ChatGoogleGenerativeAI(
-        model=reasoning_model,
-        temperature=1.0,
-        max_retries=2,
-        api_key=os.getenv("GEMINI_API_KEY"),
-    )
+    
     result = llm.with_structured_output(Reflection).invoke(formatted_prompt)
 
     return {
@@ -179,115 +115,45 @@ def reflection(state: OverallState, config: RunnableConfig) -> ReflectionState:
         "number_of_ran_queries": len(state["search_query"]),
     }
 
-
-def evaluate_research(
-    state: ReflectionState,
-    config: RunnableConfig,
-) -> OverallState:
-    """LangGraph routing function that determines the next step in the research flow.
-
-    Controls the research loop by deciding whether to continue gathering information
-    or to finalize the summary based on the configured maximum number of research loops.
-
-    Args:
-        state: Current graph state containing the research loop count
-        config: Configuration for the runnable, including max_research_loops setting
-
-    Returns:
-        String literal indicating the next node to visit ("web_research" or "finalize_summary")
-    """
+def evaluate_research(state: ReflectionState, config: RunnableConfig):
+    """Determine whether to continue research or finalize the answer."""
     configurable = Configuration.from_runnable_config(config)
-    max_research_loops = (
-        state.get("max_research_loops")
-        if state.get("max_research_loops") is not None
-        else configurable.max_research_loops
-    )
-    if state["is_sufficient"] or state["research_loop_count"] >= max_research_loops:
+    max_loops = state.get("max_research_loops", configurable.max_research_loops)
+    
+    if state["is_sufficient"] or state["research_loop_count"] >= max_loops:
         return "finalize_answer"
-    else:
-        return [
-            Send(
-                "web_research",
-                {
-                    "search_query": follow_up_query,
-                    "id": state["number_of_ran_queries"] + int(idx),
-                },
-            )
-            for idx, follow_up_query in enumerate(state["follow_up_queries"])
-        ]
-
+    
+    return [
+        Send("local_research", {
+            "search_query": q, 
+            "id": state["number_of_ran_queries"] + i
+        })
+        for i, q in enumerate(state["follow_up_queries"])
+    ]
 
 def finalize_answer(state: OverallState, config: RunnableConfig):
-    """LangGraph node that finalizes the research summary.
-
-    Prepares the final output by deduplicating and formatting sources, then
-    combining them with the running summary to create a well-structured
-    research report with proper citations.
-
-    Args:
-        state: Current graph state containing the running summary and sources gathered
-
-    Returns:
-        Dictionary with state update, including running_summary key containing the formatted final summary with sources
-    """
-    configurable = Configuration.from_runnable_config(config)
-    reasoning_model = state.get("reasoning_model") or configurable.answer_model
-
-    # Format the prompt
-    current_date = get_current_date()
+    """Generate final response based on all gathered information."""
     formatted_prompt = answer_instructions.format(
-        current_date=current_date,
+        current_date=get_current_date(),
         research_topic=get_research_topic(state["messages"]),
         summaries="\n---\n\n".join(state["web_research_result"]),
     )
 
-    # init Reasoning Model, default to Gemini 2.5 Flash
-    llm = ChatGoogleGenerativeAI(
-        model=reasoning_model,
-        temperature=0,
-        max_retries=2,
-        api_key=os.getenv("GEMINI_API_KEY"),
-    )
     result = llm.invoke(formatted_prompt)
+    return {"messages": [AIMessage(content=result.content)]}
 
-    # Replace the short urls with the original urls and add all used urls to the sources_gathered
-    unique_sources = []
-    for source in state["sources_gathered"]:
-        if source["short_url"] in result.content:
-            result.content = result.content.replace(
-                source["short_url"], source["value"]
-            )
-            unique_sources.append(source)
-
-    return {
-        "messages": [AIMessage(content=result.content)],
-        "sources_gathered": unique_sources,
-    }
-
-
-# Create our Agent Graph
+# Build the StateGraph
 builder = StateGraph(OverallState, config_schema=Configuration)
 
-# Define the nodes we will cycle between
 builder.add_node("generate_query", generate_query)
-builder.add_node("web_research", web_research)
+builder.add_node("local_research", local_research)
 builder.add_node("reflection", reflection)
 builder.add_node("finalize_answer", finalize_answer)
 
-# Set the entrypoint as `generate_query`
-# This means that this node is the first one called
 builder.add_edge(START, "generate_query")
-# Add conditional edge to continue with search queries in a parallel branch
-builder.add_conditional_edges(
-    "generate_query", continue_to_web_research, ["web_research"]
-)
-# Reflect on the web research
-builder.add_edge("web_research", "reflection")
-# Evaluate the research
-builder.add_conditional_edges(
-    "reflection", evaluate_research, ["web_research", "finalize_answer"]
-)
-# Finalize the answer
+builder.add_conditional_edges("generate_query", continue_to_local_research, ["local_research"])
+builder.add_edge("local_research", "reflection")
+builder.add_conditional_edges("reflection", evaluate_research, ["local_research", "finalize_answer"])
 builder.add_edge("finalize_answer", END)
 
-graph = builder.compile(name="pro-search-agent")
+graph = builder.compile(name="pro-search-agent")
\ No newline at end of file
diff --git a/backend/src/agent/prompts.py b/backend/src/agent/prompts.py
index 8963f6a6..2cc0a27a 100644
--- a/backend/src/agent/prompts.py
+++ b/backend/src/agent/prompts.py
@@ -1,96 +1,27 @@
 from datetime import datetime
 
-
 # Get current date in a readable format
 def get_current_date():
     return datetime.now().strftime("%B %d, %Y")
 
-
-query_writer_instructions = """Your goal is to generate sophisticated and diverse web search queries. These queries are intended for an advanced automated web research tool capable of analyzing complex results, following links, and synthesizing information.
+query_writer_instructions = """Your goal is to generate technical keywords and short phrases to search within local documentation files (.md). 
+Instead of broad web queries, focus on specific terms, function names, class names, or API concepts that are likely to appear in the documentation.
 
 Instructions:
-- Always prefer a single search query, only add another query if the original question requests multiple aspects or elements and one query is not enough.
-- Each query should focus on one specific aspect of the original question.
+- Generate keywords or very short technical phrases (e.g., 'interrupt', 'StateGraph', 'entrypoint', 'asm_graph').
 - Don't produce more than {number_queries} queries.
-- Queries should be diverse, if the topic is broad, generate more than 1 query.
-- Don't generate multiple similar queries, 1 is enough.
-- Query should ensure that the most current information is gathered. The current date is {current_date}.
+- Each query should be a single term or a 2-3 word phrase that matches technical content.
+- Avoid natural language questions; use technical vocabulary relevant to the topic.
 
 Format: 
-- Format your response as a JSON object with ALL two of these exact keys:
-   - "rationale": Brief explanation of why these queries are relevant
-   - "query": A list of search queries
-
-Example:
-
-Topic: What revenue grew more last year apple stock or the number of people buying an iphone
-```json
-{{
-    "rationale": "To answer this comparative growth question accurately, we need specific data points on Apple's stock performance and iPhone sales metrics. These queries target the precise financial information needed: company revenue trends, product-specific unit sales figures, and stock price movement over the same fiscal period for direct comparison.",
-    "query": ["Apple total revenue growth fiscal year 2024", "iPhone unit sales growth fiscal year 2024", "Apple stock price growth fiscal year 2024"],
-}}
-```
-
-Context: {research_topic}"""
-
-
-web_searcher_instructions = """Conduct targeted Google Searches to gather the most recent, credible information on "{research_topic}" and synthesize it into a verifiable text artifact.
-
-Instructions:
-- Query should ensure that the most current information is gathered. The current date is {current_date}.
-- Conduct multiple, diverse searches to gather comprehensive information.
-- Consolidate key findings while meticulously tracking the source(s) for each specific piece of information.
-- The output should be a well-written summary or report based on your search findings. 
-- Only include the information found in the search results, don't make up any information.
-
-Research Topic:
-{research_topic}
-"""
-
-reflection_instructions = """You are an expert research assistant analyzing summaries about "{research_topic}".
-
-Instructions:
-- Identify knowledge gaps or areas that need deeper exploration and generate a follow-up query. (1 or multiple).
-- If provided summaries are sufficient to answer the user's question, don't generate a follow-up query.
-- If there is a knowledge gap, generate a follow-up query that would help expand your understanding.
-- Focus on technical details, implementation specifics, or emerging trends that weren't fully covered.
-
-Requirements:
-- Ensure the follow-up query is self-contained and includes necessary context for web search.
-
-Output Format:
 - Format your response as a JSON object with these exact keys:
-   - "is_sufficient": true or false
-   - "knowledge_gap": Describe what information is missing or needs clarification
-   - "follow_up_queries": Write a specific question to address this gap
+   - "rationale": Brief explanation of why these keywords are relevant
+   - "query": A list of search terms/keywords
 
 Example:
+Topic: How to use interrupts in Functional API
 ```json
 {{
-    "is_sufficient": true, // or false
-    "knowledge_gap": "The summary lacks information about performance metrics and benchmarks", // "" if is_sufficient is true
-    "follow_up_queries": ["What are typical performance benchmarks and metrics used to evaluate [specific technology]?"] // [] if is_sufficient is true
-}}
-```
-
-Reflect carefully on the Summaries to identify knowledge gaps and produce a follow-up query. Then, produce your output following this JSON format:
-
-Summaries:
-{summaries}
-"""
-
-answer_instructions = """Generate a high-quality answer to the user's question based on the provided summaries.
-
-Instructions:
-- The current date is {current_date}.
-- You are the final step of a multi-step research process, don't mention that you are the final step. 
-- You have access to all the information gathered from the previous steps.
-- You have access to the user's question.
-- Generate a high-quality answer to the user's question based on the provided summaries and the user's question.
-- Include the sources you used from the Summaries in the answer correctly, use markdown format (e.g. [apnews](https://vertexaisearch.cloud.google.com/id/1-0)). THIS IS A MUST.
-
-User Context:
-- {research_topic}
-
-Summaries:
-{summaries}"""
+    "rationale": "I am looking for technical implementation of interrupts specifically within the functional approach of the framework.",
+    "query": ["interrupt", "functional_api", "@entrypoint", "human-in-the-loop"]
+}}
\ No newline at end of file