diff --git a/CommitHunter/Version1.ipynb b/CommitHunter/Version1.ipynb new file mode 100644 index 00000000..08505468 --- /dev/null +++ b/CommitHunter/Version1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "view-in-github" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "NW7h6VD2xC6a" + }, + "outputs": [], + "source": [ + "!pip install -q langchain together openai faiss-cpu tiktoken\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "NtPGB4JfxQqt" + }, + "outputs": [], + "source": [ + "import os\n", + "from langchain.llms import Together\n", + "\n", + "# Paste your Together.ai API key here (inside quotes)\n", + "os.environ[\"TOGETHER_API_KEY\"] = \"Token_KEY\"\n", + "\n", + "# Load the LLM from Together.ai (you can switch models later)\n", + "llm = Together(\n", + " model=\"mistralai/Mistral-7B-Instruct-v0.1\",\n", + " temperature=0.3,\n", + " max_tokens=512\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "6e52bb9b" + }, + "outputs": [], + "source": [ + "!pip install -q langchain-community" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "f9e0ba93" + }, + "outputs": [], + "source": [ + "!pip install -U langchain-together" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "fe5a52e5" + }, + "outputs": [], + "source": [ + "import os\n", + "from langchain_together import Together\n", + "\n", + "# Paste your Together.ai API key here (inside quotes)\n", + "os.environ[\"TOGETHER_API_KEY\"] = \"Token\"\n", + "\n", + "# Load the LLM from Together.ai (you can switch models later)\n", + "llm = Together(\n", + " model=\"mistralai/Mistral-7B-Instruct-v0.1\",\n", + " temperature=0.3,\n", + " max_tokens=512\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "5W6agZt6yPYW" + }, + "outputs": [], + "source": [ + "from langchain.prompts import PromptTemplate\n", + "from langchain.chains import LLMChain\n", + "\n", + "# Define prompt template\n", + "template = \"\"\"\n", + "You are a debugging assistant.\n", + "Given the following test failure and commit diff, determine whether the commit likely caused the failure.\n", + "\n", + "Test Failure:\n", + "{failure}\n", + "\n", + "Commit Diff:\n", + "{diff}\n", + "\n", + "Answer:\n", + "\"\"\"\n", + "\n", + "prompt = PromptTemplate(\n", + " input_variables=[\"failure\", \"diff\"],\n", + " template=template,\n", + ")\n", + "\n", + "test_failure = \"NullPointerException at NullHandler.java:42\"\n", + "commit_diff = \"\"\"\n", + "diff --git a/NullHandler.java b/NullHandler.java\n", + "@@ -40,6 +40,7 @@\n", + " public void handle() {\n", + " Object obj = null;\n", + " obj.toString(); // ← added line\n", + " }\n", + "\"\"\"\n", + "\n", + "\n", + "chain = prompt | llm\n", + "result = chain.invoke({\n", + " \"failure\": test_failure,\n", + " \"diff\": commit_diff\n", + "})\n", + "\n", + "print(\"🤖 GPT Response:\\n\", result)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "PnNaGqCW20Rt" + }, + "outputs": [], + "source": [ + "!pip install -q sentence-transformers\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ni717d8U5e1C" + }, + "outputs": [], + "source": [ + "from sentence_transformers import SentenceTransformer\n", + "import faiss\n", + "import numpy as np\n", + "\n", + "# Load open-source embedding model\n", + "embed_model = SentenceTransformer(\"all-MiniLM-L6-v2\") # or use bge-small-en\n", + "\n", + "# Simulate commit diffs\n", + "commit_diffs = [\n", + " (\"sha1\", \"Added null check to NullHandler.java\"),\n", + " (\"sha2\", \"Refactored config loading\"),\n", + " (\"sha3\", \"Removed unused function from TestRunner\"),\n", + " (\"sha4\", \"Introduced obj.toString() in NullHandler.java\"),\n", + " (\"sha5\", \"Added logging in exception handler\")\n", + "]\n", + "\n", + "# Embed each diff\n", + "texts = [text for _, text in commit_diffs]\n", + "embeddings = embed_model.encode(texts, convert_to_numpy=True)\n", + "\n", + "# Build FAISS index\n", + "dimension = embeddings.shape[1]\n", + "index = faiss.IndexFlatL2(dimension)\n", + "index.add(embeddings)\n", + "\n", + "# Store commit metadata separately\n", + "sha_lookup = [sha for sha, _ in commit_diffs]\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "yQzf5L3l6HHN" + }, + "outputs": [], + "source": [ + "# Test failure text\n", + "test_failure_context = \"NullPointerException at NullHandler.java:42 in handle()\"\n", + "\n", + "# Embed the failure\n", + "query_embedding = embed_model.encode([test_failure_context])\n", + "\n", + "# Search for top 3 most similar diffs\n", + "D, I = index.search(query_embedding, 3)\n", + "top_matches = [(sha_lookup[i], commit_diffs[i][1]) for i in I[0]]\n", + "\n", + "print(\"🔍 Top relevant commit diffs:\\n\")\n", + "for sha, diff in top_matches:\n", + " print(f\"Commit {sha}:\\n{diff}\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "0LiPUzbA7-WK" + }, + "outputs": [], + "source": [ + "from langchain.prompts import PromptTemplate\n", + "\n", + "template = \"\"\"\n", + "You are a debugging assistant.\n", + "\n", + "A test failed with the following message:\n", + "{failure}\n", + "\n", + "Here is a commit diff that was semantically similar:\n", + "{diff}\n", + "\n", + "Do you think this commit caused the failure? Why or why not?\n", + "\"\"\"\n", + "\n", + "prompt = PromptTemplate(\n", + " input_variables=[\"failure\", \"diff\"],\n", + " template=template,\n", + ")\n", + "\n", + "chain = prompt | llm\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "I7C-duv58Bdx" + }, + "outputs": [], + "source": [ + "print(\"🤖 GPT-style analysis:\\n\")\n", + "\n", + "for sha, diff in top_matches:\n", + " print(f\"🧩 Commit {sha}:\")\n", + " response = chain.invoke({\n", + " \"failure\": test_failure_context,\n", + " \"diff\": diff\n", + " })\n", + " print(response)\n", + " print(\"-\" * 60)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "fGZLq7VdARod" + }, + "outputs": [], + "source": [ + "combined_diff_text = \"\"\n", + "\n", + "for sha, diff in top_matches:\n", + " combined_diff_text += f\"--- Commit {sha} ---\\n{diff}\\n\\n\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "fWmt6Ii0ATHp" + }, + "outputs": [], + "source": [ + "from langchain.prompts import PromptTemplate\n", + "\n", + "combined_template = \"\"\"\n", + "You are a debugging assistant helping identify the root cause of a test failure.\n", + "\n", + "Here is the test failure:\n", + "{failure}\n", + "\n", + "Here are the top 3 semantically related commits:\n", + "{diffs}\n", + "\n", + "Based on the diff content and the test failure, which commit is most likely responsible? Explain your reasoning.\n", + "\"\"\"\n", + "\n", + "combined_prompt = PromptTemplate(\n", + " input_variables=[\"failure\", \"diffs\"],\n", + " template=combined_template\n", + ")\n", + "\n", + "# Create runnable chain\n", + "combined_chain = combined_prompt | llm\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "eQ6iKc4iAWSv" + }, + "outputs": [], + "source": [ + "result = combined_chain.invoke({\n", + " \"failure\": test_failure_context,\n", + " \"diffs\": combined_diff_text\n", + "})\n", + "\n", + "print(\"🤖 Final Verdict:\\n\")\n", + "print(result)\n" + ] + } + ], + "metadata": { + "colab": { + "include_colab_link": true, + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +}