Skip to content

Commit 4aea91f

Browse files
committed
Initial release
0 parents  commit 4aea91f

File tree

19 files changed

+1571
-0
lines changed

19 files changed

+1571
-0
lines changed

changelog.md

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# :blue[Version 0.0.2]
2+
### Agents
3+
- :green[**[+] CSV Agent** ]
4+
5+
### File Parsing
6+
- :green[**[+] CSV :** ] LOCAL
7+
8+
### General
9+
* Parse **Tables** from text
10+
* Parse **Charts** from text
11+
* Auto agent selection based on input data
12+
* Agent annotation
13+
* **Thoughts** and **Observations** are now integrated into agent output
14+
* Save agent chat in the same session
15+
* Processing files progress bar
16+
* Preprocessed data via Trained Button
17+
* Reset agent chat
18+
---
19+
20+
# :blue[Version 0.0.1]
21+
### Tools
22+
- :green[**[+] Vector Store Retriever** ]
23+
### Agents
24+
* :green[**[+] Conversational Retrival Agent** ]
25+
### File Parsing
26+
- :green[**[+] PDF :** ] PyPDFLoader ➜ [VECTOR]
27+
- :green[**[+] PNG, JPG, JPEG :** ] Unstructured Image Loader ➜ [VECTOR]
28+
- :green[**[+] DOCX :** ] Docx2Txt ➜ [VECTOR]
29+
- :green[**[+] MP3 :** ] OpenAI Whisper API ➜ [VECTOR]
30+
- :green[**[+] TXT** ][VECTOR]

dataset/trained/tables/medical.csv

Lines changed: 463 additions & 0 deletions
Large diffs are not rendered by default.

dataset/trained/vector/index.faiss

6.04 KB
Binary file not shown.

dataset/trained/vector/index.pkl

372 Bytes
Binary file not shown.

init.py

Lines changed: 279 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,279 @@
1+
import streamlit as st
2+
3+
from dotenv import load_dotenv
4+
from langchain.text_splitter import CharacterTextSplitter
5+
from langchain.embeddings import OpenAIEmbeddings
6+
from langchain.vectorstores import FAISS
7+
from annotated_text import annotated_text,annotation
8+
from utils.changelog import changelog_markdown
9+
from utils.session_state import *
10+
from utils.docs_parse import *
11+
from utils.callback import CustomHandler
12+
from utils.helpers import *
13+
from models.agents import agents_classes
14+
from utils.multi_modal import st_multi_modal
15+
load_dotenv()
16+
17+
18+
import json
19+
20+
def delete_messages():
21+
try:
22+
messages_session_state().clear()
23+
executor_session_state().memory.clear()
24+
except:
25+
print('No Memory for agent')
26+
def get_vectorstore(documents):
27+
embeddings = OpenAIEmbeddings()
28+
text_splitter = CharacterTextSplitter(
29+
chunk_size =1000,
30+
chunk_overlap = 0,
31+
separator= "\n"
32+
)
33+
34+
docs = text_splitter.split_documents(documents=documents)
35+
if(len(docs) == 0):
36+
return False
37+
else:
38+
for idx, doc in enumerate(docs, start=1):
39+
doc.metadata['doc_id'] = idx
40+
doc.metadata['source'] = doc.metadata['source'].split("\\")[-1]
41+
vectorstore = FAISS.from_documents(documents=docs, embedding=embeddings)
42+
return vectorstore
43+
44+
45+
def get_conversation_chain():
46+
st.progress(100, text=f'Getting Agents')
47+
datatype = st.session_state.data_type
48+
embeddings = OpenAIEmbeddings()
49+
try:
50+
vectorstore = FAISS.load_local(f"dataset/{datatype}/vector", embeddings)
51+
except:
52+
vectorstore = False
53+
try:
54+
csvs = get_file_names(f"dataset/{st.session_state.data_type}/tables")
55+
except:
56+
csvs = False
57+
try:
58+
images = json.loads(open(f'dataset/{st.session_state.data_type}/images/metadata.json', 'r').read())
59+
except:
60+
images = False
61+
conversation_chain = {}
62+
for el in agents_classes:
63+
arguments = agents_classes[el]['arguments']
64+
parameters = {}
65+
included = True
66+
for arg in arguments:
67+
value = eval(arg)
68+
if not value:
69+
included = False
70+
break
71+
parameters[arg] = eval(arg)
72+
if included:
73+
conversation_chain[el] = {
74+
"executor":agents_classes[el]['func'](**parameters),
75+
"messages":[]
76+
}
77+
if len(conversation_chain) == 0:
78+
return False
79+
st.session_state['conversation_chain'] = conversation_chain
80+
return True
81+
def visualize(user_question):
82+
83+
message_placeholder = st.container()
84+
return executor_session_state()({
85+
"input":user_question
86+
},callbacks = [CustomHandler(message_placeholder = message_placeholder)])
87+
88+
89+
90+
def handle_userinput(user_question):
91+
92+
with st.chat_message("user"):
93+
st.markdown(user_question)
94+
messages_session_state().append({"role": "user", "content": user_question})
95+
messages_session_state().append({"role": "assistant", "content": ""})
96+
97+
with st.chat_message("assistant"):
98+
visualize(user_question = user_question)
99+
if "source_documents" in messages_session_state()[-1]:
100+
display_buttons_in_columns(3,messages_session_state()[-1]["source_documents"])
101+
102+
103+
104+
def process(files):
105+
documents = []
106+
remove_dir('dataset/process')
107+
os.makedirs('dataset/process/tables')
108+
os.makedirs('dataset/process/images')
109+
os.makedirs('dataset/process/vector')
110+
#images_metadata = {}
111+
112+
for i,file in enumerate(files):
113+
st.progress((i)/len(files), text=f'Processing {file.name}')
114+
if file.name.endswith('.pdf'):
115+
docs = parse_pdf(file)
116+
documents.extend(docs)
117+
elif file.name.endswith('.csv'):
118+
parse_csv(file)
119+
elif file.name.endswith('.pptx'):
120+
docs = parse_pptx(file)
121+
documents.extend(docs)
122+
elif file.name.endswith('.links.txt'):
123+
docs = parse_links(file)
124+
documents.extend(docs)
125+
elif file.name.endswith('.txt'):
126+
docs = parse_txt(file)
127+
documents.extend(docs)
128+
elif file.name.endswith('.docx'):
129+
docs = parse_docx(file)
130+
documents.extend(docs)
131+
elif file.name.endswith('.png') or file.name.endswith('.jpg') or file.name.endswith('.jpeg'):
132+
print(file)
133+
docs = parse_image(file)
134+
print(docs)
135+
documents.extend(docs)
136+
#file_path,metadata = parse_image(file)
137+
#images_metadata[file_path] = metadata
138+
elif file.name.endswith('.mp3'):
139+
docs = parse_audio(file)
140+
documents.extend(docs)
141+
st.session_state.files.append(file.name)
142+
#with open('dataset/process/images/metadata.json', "w") as json_file:
143+
# json.dump(images_metadata, json_file, indent=4)
144+
remove_dir('temp')
145+
146+
# create vector store
147+
vectorstore = get_vectorstore(documents)
148+
149+
150+
if(vectorstore):
151+
vectorstore.save_local("dataset/process/vector")
152+
153+
def show_source(source,documents):
154+
with st.sidebar:
155+
st.subheader(f"Source: {source}")
156+
for doc in documents:
157+
st.write(f"...{doc.page_content}...")
158+
st.write('----')
159+
160+
count = 0
161+
def display_buttons_in_columns(num_columns, values):
162+
global count
163+
164+
# Calculate the number of rows needed to display the values
165+
num_rows = -(-len(values) // num_columns) # Ceiling division
166+
sources = list(values.keys())
167+
168+
# Create a grid layout with the specified number of columns
169+
col_width = 12 // num_columns
170+
for row in range(num_rows):
171+
cols = st.columns(num_columns)
172+
for col_idx, col in enumerate(cols):
173+
value_idx = row * num_columns + col_idx
174+
if value_idx < len(values):
175+
source = sources[value_idx]
176+
count = count+1
177+
col.button(source,key=f'b{count}',use_container_width=True,on_click=show_source,args=(source,values[source],))
178+
def agent_changed():
179+
st.session_state.agent_changed = True
180+
181+
182+
def main():
183+
st.set_page_config(page_title="Chat with Anything",
184+
page_icon=":exploding_head:")
185+
186+
187+
188+
init_session_state()
189+
190+
subheader = st.empty()
191+
place = st.empty()
192+
193+
with place:
194+
annotated_text(
195+
annotation(f"Chat with Anything",background="transparent",fontSize="40px",fontWeight="bold"),
196+
annotation("pre-alpha", "v0.0.2",background="#afa",fontSize="18px"),
197+
)
198+
199+
if not st.session_state.processed:
200+
remove_dir('output')
201+
remove_dir('dataset/process')
202+
st.subheader("Your documents")
203+
pdf_docs = st.file_uploader(
204+
"Upload your Documents here and click on 'Process'", accept_multiple_files=True,type=["txt","pdf","png","mp3","docx","csv","jpg"])
205+
process_button = st.button("Process",use_container_width=True,type='primary')
206+
207+
trained_button = st.button("Trained Data",use_container_width=True)
208+
209+
if process_button:
210+
my_bar = st.progress(0, text="Operation in progress")
211+
with my_bar:
212+
process(pdf_docs)
213+
st.session_state.data_type = "process"
214+
c = get_conversation_chain()
215+
if (c):
216+
st.session_state.processed = True
217+
st.experimental_rerun()
218+
else:
219+
st.session_state.data_type = None
220+
st.error('No Agents Avaialable')
221+
if trained_button:
222+
st.session_state.data_type = "trained"
223+
get_conversation_chain()
224+
st.session_state.processed = True
225+
st.experimental_rerun()
226+
with st.expander("## ChangeLog"):
227+
st.markdown(changelog_markdown)
228+
else:
229+
230+
with st.sidebar:
231+
if st.button('Retry',type="primary",use_container_width=True):
232+
reset_session_state()
233+
234+
with st.expander("Uploaded Files"):
235+
st.write(', '.join(st.session_state.files))
236+
option = st.selectbox(
237+
"Select an Agent",
238+
st.session_state.conversation_chain.keys(),
239+
placeholder="Select Your Agent",
240+
on_change=agent_changed,
241+
)
242+
243+
if option:
244+
eval(agents_classes[option]["annotated"])
245+
if(st.session_state.agent != option):
246+
change_agent_session_state(option)
247+
with place:
248+
col1,col2 = st.columns([11,1])
249+
with col1:
250+
annotated_text(
251+
annotation(f"""{option}""",background="transparent",fontSize="28px",fontWeight="bold"),
252+
)
253+
with col2:
254+
st.button('↺',type="primary",use_container_width=True,on_click=delete_messages)
255+
with subheader:
256+
pass
257+
258+
259+
260+
261+
for message in messages_session_state():
262+
with st.chat_message(message["role"]):
263+
placeholder = st.container()
264+
st_multi_modal(placeholder,message["content"],[])
265+
266+
if "source_documents" in message:
267+
display_buttons_in_columns(3,message["source_documents"])
268+
269+
user_question = st.chat_input("Ask a question about your documents:")
270+
271+
if user_question:
272+
handle_userinput(user_question)
273+
274+
if __name__ == '__main__':
275+
main()
276+
277+
278+
279+
#"show in a histogram sbp as a function of age with 10 years bins"

models/agents/__init__.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
import os
2+
import glob
3+
import importlib
4+
from utils.helpers import annotate
5+
6+
# Get a list of all Python files (agents) in the current directory
7+
agent_files = glob.glob(os.path.dirname(__file__) + "/*.py")
8+
9+
# Exclude __init__.py itself
10+
agent_files = [f for f in agent_files if not f.endswith("__init__.py") and not '$' in f]
11+
12+
# Import all agent modules dynamically and populate the agents_classes dictionary
13+
agents_classes = {}
14+
for agent_file in agent_files:
15+
module_name = os.path.basename(agent_file)[:-3] # Remove '.py' extension
16+
module = importlib.import_module(f".{module_name}", package=__name__)
17+
18+
#agent_class = getattr(module, f"Agent{module_name[-1]}") # Assuming the class name follows a pattern
19+
agents_classes[f'{module.name}'] = {'func': module.agent,'arguments':module.arguments,'annotated':annotate(module.annotated,module.arguments)}
20+
21+
# Expose the agents_classes dictionary as part of the package's public interface
22+
__all__ = ['agents_classes']

models/agents/agent_1.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
name = "Agent 1: Conservational Agent"
2+
arguments = ["vectorstore"]
3+
annotated = ["OpenAI Agent","Chat LLM","Retriever Tool","Memory"]
4+
5+
from langchain.agents.agent_toolkits import create_retriever_tool
6+
from langchain.agents.openai_functions_agent.agent_token_buffer_memory import AgentTokenBufferMemory
7+
from langchain.agents.openai_functions_agent.base import OpenAIFunctionsAgent
8+
from langchain.schema.messages import SystemMessage
9+
from langchain.prompts import MessagesPlaceholder
10+
from langchain.agents import AgentExecutor
11+
from models.llms.llms import *
12+
13+
14+
15+
def agent(vectorstore):
16+
17+
tool = create_retriever_tool(
18+
vectorstore.as_retriever(),
19+
"search",
20+
"Searches and returns documents based on knowledge base."
21+
)
22+
tools = [tool]
23+
24+
memory_key = "chat_history"
25+
system_message = SystemMessage(
26+
content=(
27+
"Do your best to answer the questions. "
28+
"Only use the tools to search for "
29+
"relevant information. Answers must be based ONLY on the tools"
30+
)
31+
)
32+
prompt = OpenAIFunctionsAgent.create_prompt(
33+
system_message=system_message,
34+
extra_prompt_messages=[MessagesPlaceholder(variable_name=memory_key)]
35+
)
36+
agent = OpenAIFunctionsAgent(llm=chat_llm, tools=tools, prompt=prompt)
37+
38+
memory = AgentTokenBufferMemory(memory_key=memory_key, llm=chat_llm)
39+
agent_executor = AgentExecutor(agent=agent, tools=tools, memory=memory,
40+
return_intermediate_steps=True)
41+
42+
return agent_executor
43+
44+
45+
46+
47+

0 commit comments

Comments
 (0)