Skip to content

Commit 4d44d02

Browse files
committed
Integration
1 parent 7a12fc6 commit 4d44d02

File tree

20 files changed

+293
-310
lines changed

20 files changed

+293
-310
lines changed

docs/retrieval_docs/challenges.md

Lines changed: 0 additions & 105 deletions
This file was deleted.

services/chatbot/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
retrieval/docs

services/chatbot/Dockerfile

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,12 @@ RUN apt-get update && apt-get install -y \
99
WORKDIR /app
1010

1111
# Copy the current directory contents into the container at /app
12-
COPY . /app
12+
COPY requirements.txt /app/requirements.txt
1313
# Install any needed dependencies specified in requirements.txt
1414
RUN pip install --no-cache-dir -r requirements.txt
15+
COPY src /app
16+
COPY retrieval /app/retrieval
17+
ENV PYTHONPATH "${PYTHONPATH}:/app"
1518
CMD python3.12 -m gunicorn --bind 0.0.0.0:5002 chatbot_api:app
1619

1720
EXPOSE 5002

services/chatbot/build-image.bat

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
@echo off
22
cd /d chatbot
3+
m
4+
xcopy .\..\..\docs\ retrieval\docs\ /E /Y
35
cmd /c docker build -t crapi/crapi-chatbot:%VERSION% .
46
cd /d .\..\

services/chatbot/build-image.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515

1616
set -x
1717
cd "$(dirname $0)"
18+
mkdir -p retrieval
19+
cp -Rv ../../docs retrieval/
1820
docker build -t crapi/crapi-chatbot:${VERSION:-latest} .
1921
retVal=$?
2022
if [ $retVal -ne 0 ]; then

services/chatbot/chatbot_api.py

Lines changed: 0 additions & 94 deletions
This file was deleted.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
black

services/chatbot/requirements.txt

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
1-
langchain-openai==0.1.1
2-
Flask
3-
langchain
4-
chromadb
5-
markdown
6-
unstructured
1+
chromadb==0.4.24
2+
Flask==3.0.3
3+
langchain==0.1.16
4+
langchain_community==0.0.34
5+
langchain_core==0.1.45
6+
langchain_openai==0.1.3
7+
python-dotenv==1.0.1
8+
unstructured==0.13.3
9+
gunicorn==22.0.0
10+
markdown==3.6
Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
from flask import Flask
2+
from flask import request, jsonify
3+
import threading
4+
from langchain_openai import OpenAIEmbeddings
5+
from langchain.chains import RetrievalQAWithSourcesChain, LLMChain
6+
import os
7+
from langchain.memory import ConversationBufferWindowMemory
8+
from langchain_community.vectorstores import Chroma
9+
from langchain_openai import OpenAI
10+
from langchain_community.document_loaders import DirectoryLoader
11+
from langchain.memory import ConversationBufferWindowMemory
12+
from langchain.text_splitter import CharacterTextSplitter
13+
from langchain_core.prompts import PromptTemplate
14+
from langchain import PromptTemplate
15+
from langchain_community.document_loaders import UnstructuredMarkdownLoader
16+
import logging
17+
18+
app = Flask(__name__)
19+
20+
retriever = None
21+
persist_directory = os.environ.get("PERSIST_DIRECTORY")
22+
vulnerable_app_qa = None
23+
target_source_chunks = int(os.environ.get("TARGET_SOURCE_CHUNKS", 4))
24+
loaded_model_lock = threading.Lock()
25+
loaded_model = False
26+
logger = logging.getLogger(__name__)
27+
logger.setLevel(logging.DEBUG)
28+
29+
30+
def document_loader():
31+
try:
32+
load_dir = "retrieval"
33+
logger.debug("Loading documents from %s", load_dir)
34+
loader = DirectoryLoader(
35+
load_dir, glob="**/*.md", loader_cls=UnstructuredMarkdownLoader
36+
)
37+
documents = loader.load()
38+
logger.debug("Loaded %s documents", len(documents))
39+
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
40+
texts = text_splitter.split_documents(documents)
41+
embeddings = get_embeddings()
42+
os.system("rm -rf ./db")
43+
db = Chroma.from_documents(texts, embeddings, persist_directory="./db")
44+
db.persist()
45+
retriever = db.as_retriever(search_kwargs={"k": target_source_chunks})
46+
return retriever
47+
except Exception as e:
48+
logger.error("Error loading documents %s", e, exc_info=True)
49+
raise e
50+
51+
52+
def get_embeddings():
53+
return OpenAIEmbeddings()
54+
55+
56+
def get_llm():
57+
llm = OpenAI(temperature=0.6, model_name="gpt-3.5-turbo-instruct")
58+
return llm
59+
60+
61+
def get_qa_chain(llm, retriever):
62+
PROMPT = None
63+
prompt_template = """
64+
You are a helpful AI Assistant.
65+
{summaries}
66+
Previous Conversations till now: {chat_history}
67+
Reply to this Human question/instruction: {question}.
68+
Chatbot: """
69+
PROMPT = PromptTemplate(
70+
template=prompt_template, input_variables=["question", "chat_history"]
71+
)
72+
chain_type_kwargs = {"prompt": PROMPT}
73+
qa = RetrievalQAWithSourcesChain.from_chain_type(
74+
llm=llm,
75+
chain_type="stuff",
76+
retriever=retriever,
77+
chain_type_kwargs=chain_type_kwargs,
78+
memory=ConversationBufferWindowMemory(
79+
memory_key="chat_history", input_key="question", output_key="answer", k=6
80+
),
81+
)
82+
# qa = LLMChain(prompt=PROMPT, llm=llm, retriever= retriever , memory=ConversationBufferWindowMemory(memory_key="chat_history", input_key="question", k=6), verbose = False)
83+
return qa
84+
85+
86+
def qa_app(qa, query):
87+
result = qa(query)
88+
return result["answer"]
89+
90+
91+
@app.route("/chatbot/genai/init", methods=["POST"])
92+
def init_bot():
93+
try:
94+
with loaded_model_lock:
95+
if "openai_api_key" in request.json:
96+
print("Initializing bot", request.json["openai_api_key"])
97+
os.environ["OPENAI_API_KEY"] = request.json["openai_api_key"]
98+
global vulnerable_app_qa, retriever
99+
retriever = document_loader()
100+
llm = get_llm()
101+
vulnerable_app_qa = get_qa_chain(llm, retriever)
102+
loaded_model = True
103+
return jsonify({"message": "Model Initialized"}), 200
104+
else:
105+
return jsonify({"message": "openai_api_key not provided"}, 400)
106+
except Exception as e:
107+
print("Error initializing bot ", e)
108+
return jsonify({"message": "Not able to initialize model " + str(e)}), 400
109+
110+
111+
@app.route("/chatbot/genai/state", methods=["GET"])
112+
def state_bot():
113+
try:
114+
if loaded_model:
115+
return jsonify({"message": "Model already loaded"})
116+
except Exception as e:
117+
print("Error checking state ", e)
118+
return jsonify({"message": "Error checking state " + str(e)}), 400
119+
return jsonify({"message": "Model Error"}), 400
120+
121+
122+
@app.route("/chatbot/genai/ask", methods=["POST"])
123+
def ask_bot():
124+
question = request.json["question"]
125+
global vulnerable_app_qa
126+
answer = qa_app(vulnerable_app_qa, question)
127+
print("###########################################")
128+
print("Test Attacker Question: " + str(question))
129+
print("Vulnerability App Answer: " + str(answer))
130+
print("###########################################")
131+
return jsonify({"answer": answer}), 200
132+
133+
134+
if __name__ == "__main__":
135+
app.run(host="0.0.0.0", port=5002, debug=True)

0 commit comments

Comments
 (0)