1+ import streamlit as st
2+
3+ from dotenv import load_dotenv
4+ from langchain .text_splitter import CharacterTextSplitter
5+ from langchain .embeddings import OpenAIEmbeddings
6+ from langchain .vectorstores import FAISS
7+ from annotated_text import annotated_text ,annotation
8+ from utils .changelog import changelog_markdown
9+ from utils .session_state import *
10+ from utils .docs_parse import *
11+ from utils .callback import CustomHandler
12+ from utils .helpers import *
13+ from models .agents import agents_classes
14+ from utils .multi_modal import st_multi_modal
15+ load_dotenv ()
16+
17+
18+ import json
19+
20+ def delete_messages ():
21+ try :
22+ messages_session_state ().clear ()
23+ executor_session_state ().memory .clear ()
24+ except :
25+ print ('No Memory for agent' )
26+ def get_vectorstore (documents ):
27+ embeddings = OpenAIEmbeddings ()
28+ text_splitter = CharacterTextSplitter (
29+ chunk_size = 1000 ,
30+ chunk_overlap = 0 ,
31+ separator = "\n "
32+ )
33+
34+ docs = text_splitter .split_documents (documents = documents )
35+ if (len (docs ) == 0 ):
36+ return False
37+ else :
38+ for idx , doc in enumerate (docs , start = 1 ):
39+ doc .metadata ['doc_id' ] = idx
40+ doc .metadata ['source' ] = doc .metadata ['source' ].split ("\\ " )[- 1 ]
41+ vectorstore = FAISS .from_documents (documents = docs , embedding = embeddings )
42+ return vectorstore
43+
44+
45+ def get_conversation_chain ():
46+ st .progress (100 , text = f'Getting Agents' )
47+ datatype = st .session_state .data_type
48+ embeddings = OpenAIEmbeddings ()
49+ try :
50+ vectorstore = FAISS .load_local (f"dataset/{ datatype } /vector" , embeddings )
51+ except :
52+ vectorstore = False
53+ try :
54+ csvs = get_file_names (f"dataset/{ st .session_state .data_type } /tables" )
55+ except :
56+ csvs = False
57+ try :
58+ images = json .loads (open (f'dataset/{ st .session_state .data_type } /images/metadata.json' , 'r' ).read ())
59+ except :
60+ images = False
61+ conversation_chain = {}
62+ for el in agents_classes :
63+ arguments = agents_classes [el ]['arguments' ]
64+ parameters = {}
65+ included = True
66+ for arg in arguments :
67+ value = eval (arg )
68+ if not value :
69+ included = False
70+ break
71+ parameters [arg ] = eval (arg )
72+ if included :
73+ conversation_chain [el ] = {
74+ "executor" :agents_classes [el ]['func' ](** parameters ),
75+ "messages" :[]
76+ }
77+ if len (conversation_chain ) == 0 :
78+ return False
79+ st .session_state ['conversation_chain' ] = conversation_chain
80+ return True
81+ def visualize (user_question ):
82+
83+ message_placeholder = st .container ()
84+ return executor_session_state ()({
85+ "input" :user_question
86+ },callbacks = [CustomHandler (message_placeholder = message_placeholder )])
87+
88+
89+
90+ def handle_userinput (user_question ):
91+
92+ with st .chat_message ("user" ):
93+ st .markdown (user_question )
94+ messages_session_state ().append ({"role" : "user" , "content" : user_question })
95+ messages_session_state ().append ({"role" : "assistant" , "content" : "" })
96+
97+ with st .chat_message ("assistant" ):
98+ visualize (user_question = user_question )
99+ if "source_documents" in messages_session_state ()[- 1 ]:
100+ display_buttons_in_columns (3 ,messages_session_state ()[- 1 ]["source_documents" ])
101+
102+
103+
104+ def process (files ):
105+ documents = []
106+ remove_dir ('dataset/process' )
107+ os .makedirs ('dataset/process/tables' )
108+ os .makedirs ('dataset/process/images' )
109+ os .makedirs ('dataset/process/vector' )
110+ #images_metadata = {}
111+
112+ for i ,file in enumerate (files ):
113+ st .progress ((i )/ len (files ), text = f'Processing { file .name } ' )
114+ if file .name .endswith ('.pdf' ):
115+ docs = parse_pdf (file )
116+ documents .extend (docs )
117+ elif file .name .endswith ('.csv' ):
118+ parse_csv (file )
119+ elif file .name .endswith ('.pptx' ):
120+ docs = parse_pptx (file )
121+ documents .extend (docs )
122+ elif file .name .endswith ('.links.txt' ):
123+ docs = parse_links (file )
124+ documents .extend (docs )
125+ elif file .name .endswith ('.txt' ):
126+ docs = parse_txt (file )
127+ documents .extend (docs )
128+ elif file .name .endswith ('.docx' ):
129+ docs = parse_docx (file )
130+ documents .extend (docs )
131+ elif file .name .endswith ('.png' ) or file .name .endswith ('.jpg' ) or file .name .endswith ('.jpeg' ):
132+ print (file )
133+ docs = parse_image (file )
134+ print (docs )
135+ documents .extend (docs )
136+ #file_path,metadata = parse_image(file)
137+ #images_metadata[file_path] = metadata
138+ elif file .name .endswith ('.mp3' ):
139+ docs = parse_audio (file )
140+ documents .extend (docs )
141+ st .session_state .files .append (file .name )
142+ #with open('dataset/process/images/metadata.json', "w") as json_file:
143+ # json.dump(images_metadata, json_file, indent=4)
144+ remove_dir ('temp' )
145+
146+ # create vector store
147+ vectorstore = get_vectorstore (documents )
148+
149+
150+ if (vectorstore ):
151+ vectorstore .save_local ("dataset/process/vector" )
152+
153+ def show_source (source ,documents ):
154+ with st .sidebar :
155+ st .subheader (f"Source: { source } " )
156+ for doc in documents :
157+ st .write (f"...{ doc .page_content } ..." )
158+ st .write ('----' )
159+
160+ count = 0
161+ def display_buttons_in_columns (num_columns , values ):
162+ global count
163+
164+ # Calculate the number of rows needed to display the values
165+ num_rows = - (- len (values ) // num_columns ) # Ceiling division
166+ sources = list (values .keys ())
167+
168+ # Create a grid layout with the specified number of columns
169+ col_width = 12 // num_columns
170+ for row in range (num_rows ):
171+ cols = st .columns (num_columns )
172+ for col_idx , col in enumerate (cols ):
173+ value_idx = row * num_columns + col_idx
174+ if value_idx < len (values ):
175+ source = sources [value_idx ]
176+ count = count + 1
177+ col .button (source ,key = f'b{ count } ' ,use_container_width = True ,on_click = show_source ,args = (source ,values [source ],))
178+ def agent_changed ():
179+ st .session_state .agent_changed = True
180+
181+
182+ def main ():
183+ st .set_page_config (page_title = "Chat with Anything" ,
184+ page_icon = ":exploding_head:" )
185+
186+
187+
188+ init_session_state ()
189+
190+ subheader = st .empty ()
191+ place = st .empty ()
192+
193+ with place :
194+ annotated_text (
195+ annotation (f"Chat with Anything" ,background = "transparent" ,fontSize = "40px" ,fontWeight = "bold" ),
196+ annotation ("pre-alpha" , "v0.0.2" ,background = "#afa" ,fontSize = "18px" ),
197+ )
198+
199+ if not st .session_state .processed :
200+ remove_dir ('output' )
201+ remove_dir ('dataset/process' )
202+ st .subheader ("Your documents" )
203+ pdf_docs = st .file_uploader (
204+ "Upload your Documents here and click on 'Process'" , accept_multiple_files = True ,type = ["txt" ,"pdf" ,"png" ,"mp3" ,"docx" ,"csv" ,"jpg" ])
205+ process_button = st .button ("Process" ,use_container_width = True ,type = 'primary' )
206+
207+ trained_button = st .button ("Trained Data" ,use_container_width = True )
208+
209+ if process_button :
210+ my_bar = st .progress (0 , text = "Operation in progress" )
211+ with my_bar :
212+ process (pdf_docs )
213+ st .session_state .data_type = "process"
214+ c = get_conversation_chain ()
215+ if (c ):
216+ st .session_state .processed = True
217+ st .experimental_rerun ()
218+ else :
219+ st .session_state .data_type = None
220+ st .error ('No Agents Avaialable' )
221+ if trained_button :
222+ st .session_state .data_type = "trained"
223+ get_conversation_chain ()
224+ st .session_state .processed = True
225+ st .experimental_rerun ()
226+ with st .expander ("## ChangeLog" ):
227+ st .markdown (changelog_markdown )
228+ else :
229+
230+ with st .sidebar :
231+ if st .button ('Retry' ,type = "primary" ,use_container_width = True ):
232+ reset_session_state ()
233+
234+ with st .expander ("Uploaded Files" ):
235+ st .write (', ' .join (st .session_state .files ))
236+ option = st .selectbox (
237+ "Select an Agent" ,
238+ st .session_state .conversation_chain .keys (),
239+ placeholder = "Select Your Agent" ,
240+ on_change = agent_changed ,
241+ )
242+
243+ if option :
244+ eval (agents_classes [option ]["annotated" ])
245+ if (st .session_state .agent != option ):
246+ change_agent_session_state (option )
247+ with place :
248+ col1 ,col2 = st .columns ([11 ,1 ])
249+ with col1 :
250+ annotated_text (
251+ annotation (f"""{ option } """ ,background = "transparent" ,fontSize = "28px" ,fontWeight = "bold" ),
252+ )
253+ with col2 :
254+ st .button ('↺' ,type = "primary" ,use_container_width = True ,on_click = delete_messages )
255+ with subheader :
256+ pass
257+
258+
259+
260+
261+ for message in messages_session_state ():
262+ with st .chat_message (message ["role" ]):
263+ placeholder = st .container ()
264+ st_multi_modal (placeholder ,message ["content" ],[])
265+
266+ if "source_documents" in message :
267+ display_buttons_in_columns (3 ,message ["source_documents" ])
268+
269+ user_question = st .chat_input ("Ask a question about your documents:" )
270+
271+ if user_question :
272+ handle_userinput (user_question )
273+
274+ if __name__ == '__main__' :
275+ main ()
276+
277+
278+
279+ #"show in a histogram sbp as a function of age with 10 years bins"
0 commit comments