7
7
from agno .embedder .openai import OpenAIEmbedder
8
8
import tempfile
9
9
import os
10
+ from agno .document .chunking .document import DocumentChunking
10
11
11
- #initializing the session state variables
12
12
def init_session_state ():
13
13
"""Initialize session state variables"""
14
14
if 'openai_api_key' not in st .session_state :
@@ -23,55 +23,87 @@ def init_session_state():
23
23
st .session_state .legal_team = None
24
24
if 'knowledge_base' not in st .session_state :
25
25
st .session_state .knowledge_base = None
26
+ # Add a new state variable to track processed files
27
+ if 'processed_files' not in st .session_state :
28
+ st .session_state .processed_files = set ()
29
+
30
+ COLLECTION_NAME = "legal_documents" # Define your collection name
26
31
27
32
def init_qdrant ():
28
- """Initialize Qdrant vector database"""
29
- if not st .session_state .qdrant_api_key :
30
- raise ValueError ("Qdrant API key not provided" )
31
- if not st .session_state .qdrant_url :
32
- raise ValueError ("Qdrant URL not provided" )
33
-
34
- return Qdrant (
35
- collection = "legal_knowledge" ,
36
- url = st .session_state .qdrant_url ,
37
- api_key = st .session_state .qdrant_api_key ,
38
- https = True ,
39
- timeout = None ,
40
- distance = "cosine"
41
- )
33
+ """Initialize Qdrant client with configured settings."""
34
+ if not all ([st .session_state .qdrant_api_key , st .session_state .qdrant_url ]):
35
+ return None
36
+ try :
37
+ # Create Agno's Qdrant instance which implements VectorDb
38
+ vector_db = Qdrant (
39
+ collection = COLLECTION_NAME ,
40
+ url = st .session_state .qdrant_url ,
41
+ api_key = st .session_state .qdrant_api_key ,
42
+ embedder = OpenAIEmbedder (
43
+ id = "text-embedding-3-small" ,
44
+ api_key = st .session_state .openai_api_key
45
+ )
46
+ )
47
+ return vector_db
48
+ except Exception as e :
49
+ st .error (f"🔴 Qdrant connection failed: { str (e )} " )
50
+ return None
42
51
43
52
def process_document (uploaded_file , vector_db : Qdrant ):
44
- """Process document, create embeddings and store in Qdrant vector database"""
53
+ """
54
+ Process document, create embeddings and store in Qdrant vector database
55
+
56
+ Args:
57
+ uploaded_file: Streamlit uploaded file object
58
+ vector_db (Qdrant): Initialized Qdrant instance from Agno
59
+
60
+ Returns:
61
+ PDFKnowledgeBase: Initialized knowledge base with processed documents
62
+ """
45
63
if not st .session_state .openai_api_key :
46
64
raise ValueError ("OpenAI API key not provided" )
47
65
48
66
os .environ ['OPENAI_API_KEY' ] = st .session_state .openai_api_key
49
67
50
- with tempfile .TemporaryDirectory () as temp_dir :
51
-
52
- temp_file_path = os .path .join (temp_dir , uploaded_file .name )
53
- with open (temp_file_path , "wb" ) as f :
54
- f .write (uploaded_file .getbuffer ())
55
-
56
- try :
57
-
58
- embedder = OpenAIEmbedder (
59
- model = "text-embedding-3-small" ,
60
- api_key = st .session_state .openai_api_key
68
+ try :
69
+ # Save the uploaded file to a temporary location
70
+ with tempfile .NamedTemporaryFile (delete = False , suffix = '.pdf' ) as temp_file :
71
+ temp_file .write (uploaded_file .getvalue ())
72
+ temp_file_path = temp_file .name
73
+
74
+ st .info ("Loading and processing document..." )
75
+
76
+ # Create a PDFKnowledgeBase with the vector_db
77
+ knowledge_base = PDFKnowledgeBase (
78
+ path = temp_file_path , # Single string path, not a list
79
+ vector_db = vector_db ,
80
+ reader = PDFReader (),
81
+ chunking_strategy = DocumentChunking (
82
+ chunk_size = 1000 ,
83
+ overlap = 200
61
84
)
85
+ )
86
+
87
+ # Load the documents into the knowledge base
88
+ with st .spinner ('📤 Loading documents into knowledge base...' ):
89
+ try :
90
+ knowledge_base .load (recreate = True , upsert = True )
91
+ st .success ("✅ Documents stored successfully!" )
92
+ except Exception as e :
93
+ st .error (f"Error loading documents: { str (e )} " )
94
+ raise
95
+
96
+ # Clean up the temporary file
97
+ try :
98
+ os .unlink (temp_file_path )
99
+ except Exception :
100
+ pass
62
101
63
- # Creating knowledge base with explicit Qdrant configuration
64
- knowledge_base = PDFKnowledgeBase (
65
- path = temp_dir ,
66
- vector_db = vector_db ,
67
- reader = PDFReader (chunk = True ),
68
- embedder = embedder ,
69
- recreate_vector_db = True
70
- )
71
- knowledge_base .load ()
72
- return knowledge_base
73
- except Exception as e :
74
- raise Exception (f"Error processing document: { str (e )} " )
102
+ return knowledge_base
103
+
104
+ except Exception as e :
105
+ st .error (f"Document processing error: { str (e )} " )
106
+ raise Exception (f"Error processing document: { str (e )} " )
75
107
76
108
def main ():
77
109
st .set_page_config (page_title = "Legal Document Analyzer" , layout = "wide" )
@@ -102,7 +134,7 @@ def main():
102
134
103
135
qdrant_url = st .text_input (
104
136
"Qdrant URL" ,
105
- value = st .session_state .qdrant_url if st .session_state .qdrant_url else "https://f499085c-b4bf-4bda-a9a5-227f62a9ca20.us-west-2-0.aws.cloud.qdrant.io:6333 " ,
137
+ value = st .session_state .qdrant_url if st .session_state .qdrant_url else "" ,
106
138
help = "Enter your Qdrant instance URL"
107
139
)
108
140
if qdrant_url :
@@ -111,8 +143,10 @@ def main():
111
143
if all ([st .session_state .qdrant_api_key , st .session_state .qdrant_url ]):
112
144
try :
113
145
if not st .session_state .vector_db :
146
+ # Make sure we're initializing a QdrantClient here
114
147
st .session_state .vector_db = init_qdrant ()
115
- st .success ("Successfully connected to Qdrant!" )
148
+ if st .session_state .vector_db :
149
+ st .success ("Successfully connected to Qdrant!" )
116
150
except Exception as e :
117
151
st .error (f"Failed to connect to Qdrant: { str (e )} " )
118
152
@@ -123,80 +157,90 @@ def main():
123
157
uploaded_file = st .file_uploader ("Upload Legal Document" , type = ['pdf' ])
124
158
125
159
if uploaded_file :
126
- with st .spinner ("Processing document..." ):
127
- try :
128
- knowledge_base = process_document (uploaded_file , st .session_state .vector_db )
129
- st .session_state .knowledge_base = knowledge_base
130
-
131
- # Initialize agents
132
- legal_researcher = Agent (
133
- name = "Legal Researcher" ,
134
- role = "Legal research specialist" ,
135
- model = OpenAIChat (model = "gpt-4o" ),
136
- tools = [DuckDuckGoTools ()],
137
- knowledge = st .session_state .knowledge_base ,
138
- search_knowledge = True ,
139
- instructions = [
140
- "Find and cite relevant legal cases and precedents" ,
141
- "Provide detailed research summaries with sources" ,
142
- "Reference specific sections from the uploaded document" ,
143
- "Always search the knowledge base for relevant information"
144
- ],
145
- show_tool_calls = True ,
146
- markdown = True
147
- )
160
+ # Check if this file has already been processed
161
+ if uploaded_file .name not in st .session_state .processed_files :
162
+ with st .spinner ("Processing document..." ):
163
+ try :
164
+ # Process the document and get the knowledge base
165
+ knowledge_base = process_document (uploaded_file , st .session_state .vector_db )
166
+
167
+ if knowledge_base :
168
+ st .session_state .knowledge_base = knowledge_base
169
+ # Add the file to processed files
170
+ st .session_state .processed_files .add (uploaded_file .name )
171
+
172
+ # Initialize agents
173
+ legal_researcher = Agent (
174
+ name = "Legal Researcher" ,
175
+ role = "Legal research specialist" ,
176
+ model = OpenAIChat (id = "gpt-4o" ),
177
+ tools = [DuckDuckGoTools ()],
178
+ knowledge = st .session_state .knowledge_base ,
179
+ search_knowledge = True ,
180
+ instructions = [
181
+ "Find and cite relevant legal cases and precedents" ,
182
+ "Provide detailed research summaries with sources" ,
183
+ "Reference specific sections from the uploaded document" ,
184
+ "Always search the knowledge base for relevant information"
185
+ ],
186
+ show_tool_calls = True ,
187
+ markdown = True
188
+ )
148
189
149
- contract_analyst = Agent (
150
- name = "Contract Analyst" ,
151
- role = "Contract analysis specialist" ,
152
- model = OpenAIChat (model = "gpt-4o" ),
153
- knowledge = knowledge_base ,
154
- search_knowledge = True ,
155
- instructions = [
156
- "Review contracts thoroughly" ,
157
- "Identify key terms and potential issues" ,
158
- "Reference specific clauses from the document"
159
- ],
160
- markdown = True
161
- )
190
+ contract_analyst = Agent (
191
+ name = "Contract Analyst" ,
192
+ role = "Contract analysis specialist" ,
193
+ model = OpenAIChat (id = "gpt-4o" ),
194
+ knowledge = st . session_state . knowledge_base ,
195
+ search_knowledge = True ,
196
+ instructions = [
197
+ "Review contracts thoroughly" ,
198
+ "Identify key terms and potential issues" ,
199
+ "Reference specific clauses from the document"
200
+ ],
201
+ markdown = True
202
+ )
162
203
163
- legal_strategist = Agent (
164
- name = "Legal Strategist" ,
165
- role = "Legal strategy specialist" ,
166
- model = OpenAIChat (model = "gpt-4o" ),
167
- knowledge = knowledge_base ,
168
- search_knowledge = True ,
169
- instructions = [
170
- "Develop comprehensive legal strategies" ,
171
- "Provide actionable recommendations" ,
172
- "Consider both risks and opportunities"
173
- ],
174
- markdown = True
175
- )
204
+ legal_strategist = Agent (
205
+ name = "Legal Strategist" ,
206
+ role = "Legal strategy specialist" ,
207
+ model = OpenAIChat (id = "gpt-4o" ),
208
+ knowledge = st . session_state . knowledge_base ,
209
+ search_knowledge = True ,
210
+ instructions = [
211
+ "Develop comprehensive legal strategies" ,
212
+ "Provide actionable recommendations" ,
213
+ "Consider both risks and opportunities"
214
+ ],
215
+ markdown = True
216
+ )
176
217
177
- # Legal Agent Team
178
- st .session_state .legal_team = Agent (
179
- name = "Legal Team Lead" ,
180
- role = "Legal team coordinator" ,
181
- model = OpenAIChat (model = "gpt-4o" ),
182
- team = [legal_researcher , contract_analyst , legal_strategist ],
183
- knowledge = st .session_state .knowledge_base ,
184
- search_knowledge = True ,
185
- instructions = [
186
- "Coordinate analysis between team members" ,
187
- "Provide comprehensive responses" ,
188
- "Ensure all recommendations are properly sourced" ,
189
- "Reference specific parts of the uploaded document" ,
190
- "Always search the knowledge base before delegating tasks"
191
- ],
192
- show_tool_calls = True ,
193
- markdown = True
194
- )
195
-
196
- st .success ("✅ Document processed and team initialized!" )
197
-
198
- except Exception as e :
199
- st .error (f"Error processing document: { str (e )} " )
218
+ # Legal Agent Team
219
+ st .session_state .legal_team = Agent (
220
+ name = "Legal Team Lead" ,
221
+ role = "Legal team coordinator" ,
222
+ model = OpenAIChat (id = "gpt-4o" ),
223
+ team = [legal_researcher , contract_analyst , legal_strategist ],
224
+ knowledge = st .session_state .knowledge_base ,
225
+ search_knowledge = True ,
226
+ instructions = [
227
+ "Coordinate analysis between team members" ,
228
+ "Provide comprehensive responses" ,
229
+ "Ensure all recommendations are properly sourced" ,
230
+ "Reference specific parts of the uploaded document" ,
231
+ "Always search the knowledge base before delegating tasks"
232
+ ],
233
+ show_tool_calls = True ,
234
+ markdown = True
235
+ )
236
+
237
+ st .success ("✅ Document processed and team initialized!" )
238
+
239
+ except Exception as e :
240
+ st .error (f"Error processing document: { str (e )} " )
241
+ else :
242
+ # File already processed, just show a message
243
+ st .success ("✅ Document already processed and team ready!" )
200
244
201
245
st .divider ()
202
246
st .header ("🔍 Analysis Options" )
0 commit comments