Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
339 changes: 339 additions & 0 deletions CommitHunter/Version1.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,339 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "view-in-github"
},
"source": [
"<a href=\"https://colab.research.google.com/github/RashmiLnu/aqa-test-tools/blob/Google_Collab_Rashmi/Version1.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "NW7h6VD2xC6a"
},
"outputs": [],
"source": [
"!pip install -q langchain together openai faiss-cpu tiktoken\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "NtPGB4JfxQqt"
},
"outputs": [],
"source": [
"import os\n",
"from langchain.llms import Together\n",
"\n",
"# Paste your Together.ai API key here (inside quotes)\n",
"os.environ[\"TOGETHER_API_KEY\"] = \"Token_KEY\"\n",
"\n",
"# Load the LLM from Together.ai (you can switch models later)\n",
"llm = Together(\n",
" model=\"mistralai/Mistral-7B-Instruct-v0.1\",\n",
" temperature=0.3,\n",
" max_tokens=512\n",
")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "6e52bb9b"
},
"outputs": [],
"source": [
"!pip install -q langchain-community"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "f9e0ba93"
},
"outputs": [],
"source": [
"!pip install -U langchain-together"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "fe5a52e5"
},
"outputs": [],
"source": [
"import os\n",
"from langchain_together import Together\n",
"\n",
"# Paste your Together.ai API key here (inside quotes)\n",
"os.environ[\"TOGETHER_API_KEY\"] = \"Token\"\n",
"\n",
"# Load the LLM from Together.ai (you can switch models later)\n",
"llm = Together(\n",
" model=\"mistralai/Mistral-7B-Instruct-v0.1\",\n",
" temperature=0.3,\n",
" max_tokens=512\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "5W6agZt6yPYW"
},
"outputs": [],
"source": [
"from langchain.prompts import PromptTemplate\n",
"from langchain.chains import LLMChain\n",
"\n",
"# Define prompt template\n",
"template = \"\"\"\n",
"You are a debugging assistant.\n",
"Given the following test failure and commit diff, determine whether the commit likely caused the failure.\n",
"\n",
"Test Failure:\n",
"{failure}\n",
"\n",
"Commit Diff:\n",
"{diff}\n",
"\n",
"Answer:\n",
"\"\"\"\n",
"\n",
"prompt = PromptTemplate(\n",
" input_variables=[\"failure\", \"diff\"],\n",
" template=template,\n",
")\n",
"\n",
"test_failure = \"NullPointerException at NullHandler.java:42\"\n",
"commit_diff = \"\"\"\n",
"diff --git a/NullHandler.java b/NullHandler.java\n",
"@@ -40,6 +40,7 @@\n",
" public void handle() {\n",
" Object obj = null;\n",
" obj.toString(); // ← added line\n",
" }\n",
"\"\"\"\n",
"\n",
"\n",
"chain = prompt | llm\n",
"result = chain.invoke({\n",
" \"failure\": test_failure,\n",
" \"diff\": commit_diff\n",
"})\n",
"\n",
"print(\"🤖 GPT Response:\\n\", result)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "PnNaGqCW20Rt"
},
"outputs": [],
"source": [
"!pip install -q sentence-transformers\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "ni717d8U5e1C"
},
"outputs": [],
"source": [
"from sentence_transformers import SentenceTransformer\n",
"import faiss\n",
"import numpy as np\n",
"\n",
"# Load open-source embedding model\n",
"embed_model = SentenceTransformer(\"all-MiniLM-L6-v2\") # or use bge-small-en\n",
"\n",
"# Simulate commit diffs\n",
"commit_diffs = [\n",
" (\"sha1\", \"Added null check to NullHandler.java\"),\n",
" (\"sha2\", \"Refactored config loading\"),\n",
" (\"sha3\", \"Removed unused function from TestRunner\"),\n",
" (\"sha4\", \"Introduced obj.toString() in NullHandler.java\"),\n",
" (\"sha5\", \"Added logging in exception handler\")\n",
"]\n",
"\n",
"# Embed each diff\n",
"texts = [text for _, text in commit_diffs]\n",
"embeddings = embed_model.encode(texts, convert_to_numpy=True)\n",
"\n",
"# Build FAISS index\n",
"dimension = embeddings.shape[1]\n",
"index = faiss.IndexFlatL2(dimension)\n",
"index.add(embeddings)\n",
"\n",
"# Store commit metadata separately\n",
"sha_lookup = [sha for sha, _ in commit_diffs]\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "yQzf5L3l6HHN"
},
"outputs": [],
"source": [
"# Test failure text\n",
"test_failure_context = \"NullPointerException at NullHandler.java:42 in handle()\"\n",
"\n",
"# Embed the failure\n",
"query_embedding = embed_model.encode([test_failure_context])\n",
"\n",
"# Search for top 3 most similar diffs\n",
"D, I = index.search(query_embedding, 3)\n",
"top_matches = [(sha_lookup[i], commit_diffs[i][1]) for i in I[0]]\n",
"\n",
"print(\"🔍 Top relevant commit diffs:\\n\")\n",
"for sha, diff in top_matches:\n",
" print(f\"Commit {sha}:\\n{diff}\\n\")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "0LiPUzbA7-WK"
},
"outputs": [],
"source": [
"from langchain.prompts import PromptTemplate\n",
"\n",
"template = \"\"\"\n",
"You are a debugging assistant.\n",
"\n",
"A test failed with the following message:\n",
"{failure}\n",
"\n",
"Here is a commit diff that was semantically similar:\n",
"{diff}\n",
"\n",
"Do you think this commit caused the failure? Why or why not?\n",
"\"\"\"\n",
"\n",
"prompt = PromptTemplate(\n",
" input_variables=[\"failure\", \"diff\"],\n",
" template=template,\n",
")\n",
"\n",
"chain = prompt | llm\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "I7C-duv58Bdx"
},
"outputs": [],
"source": [
"print(\"🤖 GPT-style analysis:\\n\")\n",
"\n",
"for sha, diff in top_matches:\n",
" print(f\"🧩 Commit {sha}:\")\n",
" response = chain.invoke({\n",
" \"failure\": test_failure_context,\n",
" \"diff\": diff\n",
" })\n",
" print(response)\n",
" print(\"-\" * 60)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "fGZLq7VdARod"
},
"outputs": [],
"source": [
"combined_diff_text = \"\"\n",
"\n",
"for sha, diff in top_matches:\n",
" combined_diff_text += f\"--- Commit {sha} ---\\n{diff}\\n\\n\"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "fWmt6Ii0ATHp"
},
"outputs": [],
"source": [
"from langchain.prompts import PromptTemplate\n",
"\n",
"combined_template = \"\"\"\n",
"You are a debugging assistant helping identify the root cause of a test failure.\n",
"\n",
"Here is the test failure:\n",
"{failure}\n",
"\n",
"Here are the top 3 semantically related commits:\n",
"{diffs}\n",
"\n",
"Based on the diff content and the test failure, which commit is most likely responsible? Explain your reasoning.\n",
"\"\"\"\n",
"\n",
"combined_prompt = PromptTemplate(\n",
" input_variables=[\"failure\", \"diffs\"],\n",
" template=combined_template\n",
")\n",
"\n",
"# Create runnable chain\n",
"combined_chain = combined_prompt | llm\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "eQ6iKc4iAWSv"
},
"outputs": [],
"source": [
"result = combined_chain.invoke({\n",
" \"failure\": test_failure_context,\n",
" \"diffs\": combined_diff_text\n",
"})\n",
"\n",
"print(\"🤖 Final Verdict:\\n\")\n",
"print(result)\n"
]
}
],
"metadata": {
"colab": {
"include_colab_link": true,
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Loading