Skip to content
197 changes: 197 additions & 0 deletions supporting-blog-content/github-assistant/evaluation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
import logging
import sys
import os
import pandas as pd
from dotenv import load_dotenv
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Response
from llama_index.core.evaluation import (
DatasetGenerator,
RelevancyEvaluator,
FaithfulnessEvaluator,
EvaluationResult,
)
from llama_index.llms.openai import OpenAI
from tabulate import tabulate
import textwrap
import argparse
import traceback
from httpx import ReadTimeout

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

parser = argparse.ArgumentParser(
description="Process documents and questions for evaluation."
)
parser.add_argument(
"--num_documents",
type=int,
default=None,
help="Number of documents to process (default: all)",
)
parser.add_argument(
"--skip_documents",
type=int,
default=0,
help="Number of documents to skip at the beginning (default: 0)",
)
parser.add_argument(
"--num_questions",
type=int,
default=None,
help="Number of questions to process (default: all)",
)
parser.add_argument(
"--skip_questions",
type=int,
default=0,
help="Number of questions to skip at the beginning (default: 0)",
)
parser.add_argument(
"--process_last_questions",
action="store_true",
help="Process last N questions instead of first N",
)
args = parser.parse_args()

load_dotenv(".env")

reader = SimpleDirectoryReader("/tmp/elastic/production-readiness-review")
documents = reader.load_data()
print(f"First document: {documents[0].text}")
print(f"Second document: {documents[1].text}")
print(f"Thrid document: {documents[2].text}")


if args.skip_documents > 0:
documents = documents[args.skip_documents :]

if args.num_documents is not None:
documents = documents[: args.num_documents]

print(f"Number of documents loaded: {len(documents)}")

llm = OpenAI(model="gpt-4o", request_timeout=120)

data_generator = DatasetGenerator.from_documents(documents, llm=llm)

try:
eval_questions = data_generator.generate_questions_from_nodes()
if isinstance(eval_questions, str):
eval_questions_list = eval_questions.strip().split("\n")
else:
eval_questions_list = eval_questions
eval_questions_list = [q for q in eval_questions_list if q.strip()]

if args.skip_questions > 0:
eval_questions_list = eval_questions_list[args.skip_questions :]

if args.num_questions is not None:
if args.process_last_questions:
eval_questions_list = eval_questions_list[-args.num_questions :]
else:
eval_questions_list = eval_questions_list[: args.num_questions]

print("\All available questions generated:")
for idx, q in enumerate(eval_questions):
print(f"{idx}. {q}")

print("\nGenerated questions:")
for idx, q in enumerate(eval_questions_list, start=1):
print(f"{idx}. {q}")
except ReadTimeout as e:
print(
"Request to Ollama timed out during question generation. Please check the server or increase the timeout duration."
)
traceback.print_exc()
sys.exit(1)
except Exception as e:
print(f"An error occurred while generating questions: {e}")
traceback.print_exc()
sys.exit(1)

print(f"\nTotal number of questions generated: {len(eval_questions_list)}")

evaluator_relevancy = RelevancyEvaluator(llm=llm)
evaluator_faith = FaithfulnessEvaluator(llm=llm)

vector_index = VectorStoreIndex.from_documents(documents)


def display_eval_df(
query: str,
response: Response,
eval_result_relevancy: EvaluationResult,
eval_result_faith: EvaluationResult,
) -> None:
relevancy_feedback = getattr(eval_result_relevancy, "feedback", "")
relevancy_passing = getattr(eval_result_relevancy, "passing", False)
relevancy_passing_str = "Pass" if relevancy_passing else "Fail"

relevancy_score = 1.0 if relevancy_passing else 0.0

faithfulness_feedback = getattr(eval_result_faith, "feedback", "")
faithfulness_passing_bool = getattr(eval_result_faith, "passing", False)
faithfulness_passing = "Pass" if faithfulness_passing_bool else "Fail"

def wrap_text(text, width=50):
if text is None:
return ""
text = str(text)
text = text.replace("\r", "")
lines = text.split("\n")
wrapped_lines = []
for line in lines:
wrapped_lines.extend(textwrap.wrap(line, width=width))
wrapped_lines.append("")
return "\n".join(wrapped_lines)

if response.source_nodes:
source_content = wrap_text(response.source_nodes[0].node.get_content())
else:
source_content = ""

eval_data = {
"Query": wrap_text(query),
"Response": wrap_text(str(response)),
"Source": source_content,
"Relevancy Response": relevancy_passing_str,
"Relevancy Feedback": wrap_text(relevancy_feedback),
"Relevancy Score": wrap_text(str(relevancy_score)),
"Faith Response": faithfulness_passing,
"Faith Feedback": wrap_text(faithfulness_feedback),
}

eval_df = pd.DataFrame([eval_data])

print("\nEvaluation Result:")
print(
tabulate(
eval_df, headers="keys", tablefmt="grid", showindex=False, stralign="left"
)
)


query_engine = vector_index.as_query_engine(llm=llm)

total_questions = len(eval_questions_list)
for idx, question in enumerate(eval_questions_list, start=1):
try:
response_vector = query_engine.query(question)
eval_result_relevancy = evaluator_relevancy.evaluate_response(
query=question, response=response_vector
)
eval_result_faith = evaluator_faith.evaluate_response(response=response_vector)

print(f"\nProcessing Question {idx} of {total_questions}:")
display_eval_df(
question, response_vector, eval_result_relevancy, eval_result_faith
)
except ReadTimeout as e:
print(f"Request to OpenAI timed out while processing question {idx}.")
traceback.print_exc()
continue
except Exception as e:
print(f"An error occurred while processing question {idx}: {e}")
traceback.print_exc()
continue
201 changes: 201 additions & 0 deletions supporting-blog-content/github-assistant/index.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
from llama_index.core import (
Document,
Settings,
SimpleDirectoryReader,
StorageContext,
VectorStoreIndex,
)
from llama_index.core.node_parser import (
SentenceSplitter,
CodeSplitter,
MarkdownNodeParser,
JSONNodeParser,
)
from llama_index.vector_stores.elasticsearch import ElasticsearchStore
from dotenv import load_dotenv
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core.ingestion import IngestionPipeline
import tree_sitter_python as tspython
from tree_sitter_languages import get_parser, get_language
from tree_sitter import Parser, Language
import logging
import nest_asyncio
import elastic_transport
import sys
import subprocess
import shutil
import time
import glob
import os


nest_asyncio.apply()

load_dotenv(".env")

Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-large")
Settings.chunk_lines = 1024
Settings.chunk_size = 1024
Settings.chunk_lines_overlap = 20
Settings.max_chars = 1500


def clone_repository(owner, repo, branch, base_path="/tmp"):
branch = branch or os.getenv("GITHUB_BRANCH")
if not branch:
raise ValueError(
"Branch is not provided and GITHUB_BRANCH environment variable is not set."
)

local_repo_path = os.path.join(base_path, owner, repo)
clone_url = f"https://github.com/{owner}/{repo}.git"

if os.path.exists(local_repo_path):
print(f"Repository already exists at {local_repo_path}. Skipping clone.")
return local_repo_path

attempts = 3

for attempt in range(attempts):
try:
os.makedirs(local_repo_path, exist_ok=True)
print(f"Attempting to clone repository... Attempt {attempt + 1}")
subprocess.run(
["git", "clone", "-b", branch, clone_url, local_repo_path], check=True
)
print(f"Repository cloned into {local_repo_path}.")
return local_repo_path
except subprocess.CalledProcessError:
print(f"Attempt {attempt + 1} failed, retrying...")
time.sleep(10)
if attempt < attempts - 1:
continue
else:
raise Exception("Failed to clone repository after multiple attempts")


def print_docs_and_nodes(docs, nodes):
print("\n=== Documents ===\n")
for doc in docs:
print(f"Document ID: {doc.doc_id}")
print(f"Document Content:\n{doc.text}\n\n---\n")

print("\n=== Nodes ===\n")
for node in nodes:
print(f"Node ID: {node.id_}")
print(f"Node Content:\n{node.text}\n\n---\n")


def collect_and_print_file_summary(file_summary):
print("\n=== File Summary ===\n")
for summary in file_summary:
print(summary)


def parse_documents():
owner = os.getenv("GITHUB_OWNER")
repo = os.getenv("GITHUB_REPO")
branch = os.getenv("GITHUB_BRANCH")
base_path = os.getenv("BASE_PATH", "/tmp")

if not owner or not repo:
raise ValueError(
"GITHUB_OWNER and GITHUB_REPO environment variables must be set."
)

local_repo_path = clone_repository(owner, repo, branch, base_path)

nodes = []
file_summary = []

ts_parser = get_parser("typescript")
py_parser = get_parser("python")
go_parser = get_parser("go")
js_parser = get_parser("javascript")
bash_parser = get_parser("bash")
yaml_parser = get_parser("yaml")

parsers_and_extensions = [
(SentenceSplitter(), [".md"]),
(CodeSplitter(language="python", parser=py_parser), [".py", ".ipynb"]),
(CodeSplitter(language="typescript", parser=ts_parser), [".ts"]),
(CodeSplitter(language="go", parser=go_parser), [".go"]),
(CodeSplitter(language="javascript", parser=js_parser), [".js"]),
(CodeSplitter(language="bash", parser=bash_parser), [".bash", ",sh"]),
(CodeSplitter(language="yaml", parser=yaml_parser), [".yaml", ".yml"]),
(JSONNodeParser(), [".json"]),
]

for parser, extensions in parsers_and_extensions:
matching_files = []
for ext in extensions:
matching_files.extend(
glob.glob(f"{local_repo_path}/**/*{ext}", recursive=True)
)

if len(matching_files) > 0:
extension_list = ", ".join(extensions)
file_summary.append(
f"Found {len(matching_files)} {extension_list} files in the repository."
)

loader = SimpleDirectoryReader(
input_dir=local_repo_path, required_exts=extensions, recursive=True
)
docs = loader.load_data()
parsed_nodes = parser.get_nodes_from_documents(docs)

print_docs_and_nodes(docs, parsed_nodes)

nodes.extend(parsed_nodes)
else:
extension_list = ", ".join(extensions)
file_summary.append(f"No {extension_list} files found in the repository.")

collect_and_print_file_summary(file_summary)
print("\n")
return nodes


def get_es_vector_store():
print("Initializing Elasticsearch store...")
es_cloud_id = os.getenv("ELASTIC_CLOUD_ID")
es_user = os.getenv("ELASTIC_USER")
es_password = os.getenv("ELASTIC_PASSWORD")
index_name = os.getenv("ELASTIC_INDEX")
retries = 20
for attempt in range(retries):
try:
es_vector_store = ElasticsearchStore(
index_name=index_name,
es_cloud_id=es_cloud_id,
es_user=es_user,
es_password=es_password,
batch_size=100,
)
print("Elasticsearch store initialized.")
return es_vector_store
except elastic_transport.ConnectionTimeout:
print(f"Connection attempt {attempt + 1}/{retries} timed out. Retrying...")
time.sleep(10)
raise Exception("Failed to initialize Elasticsearch store after multiple attempts")


def main():
nodes = parse_documents()
es_vector_store = get_es_vector_store()

try:
pipeline = IngestionPipeline(
vector_store=es_vector_store,
)

pipeline.run(documents=nodes, show_progress=True)
finally:
if hasattr(es_vector_store, "close"):
es_vector_store.close()
print("Elasticsearch connection closed.")


if __name__ == "__main__":
main()
Loading
Loading