-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapp.py
More file actions
286 lines (239 loc) · 10.4 KB
/
app.py
File metadata and controls
286 lines (239 loc) · 10.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
"""
Main application for RAG-based chat system.
Provides a Streamlit UI for interacting with chat agents.
"""
import streamlit as st
import pandas as pd
import os
from typing import Dict, List, Any, Optional
import psycopg2 # Import psycopg2
from ingest import DocumentIngester
from retriever import DocumentRetriever
from chat_agent import ChatAgent, AgentManager
# Set up the page
st.set_page_config(
page_title="RAG Chat System",
page_icon="🤖",
layout="wide"
)
# Initialize session state
if "agent_manager" not in st.session_state:
st.session_state.agent_manager = AgentManager()
if "chat_history" not in st.session_state:
st.session_state.chat_history = {}
if "current_agent_id" not in st.session_state:
st.session_state.current_agent_id = None
# --- Fix input clearing: signal and control on rerun ---
if "clear_input" not in st.session_state:
st.session_state.clear_input = False
# Function to create a new agent
def create_new_agent(url: str, agent_name: str) -> None:
"""Create a new agent from a URL."""
from urllib.parse import urlparse
import hashlib
parsed_url = urlparse(url)
domain = parsed_url.netloc.replace('.', '_')
url_hash = hashlib.md5(url.encode('utf-8')).hexdigest()[:8]
collection_name = f"{domain}_{url_hash}"
with st.spinner("Creating agent... This may take a while as we process the documentation."):
try:
from greenhouse_job_scraper import GreenhouseJobScraper
scraper = GreenhouseJobScraper()
job_details = scraper.scrape_job_details(url)
if not job_details:
st.error("Failed to scrape job details.")
else:
# Get job text and save to file
job_text = scraper.get_job_text(job_details)
scraper.save_to_txt(job_details)
# Create a document for ingestion with metadata
ingester = DocumentIngester()
documents = [{
"text": job_text,
"metadata": {
"source": "greenhouse_job_scraper",
"url": url,
"title": job_details.get('title', 'Job Description')
}
}]
# Use the ingester to chunk and embed the document
chunked_docs = ingester.chunk_documents(documents)
ingester.create_vector_store(chunked_docs, collection_name=collection_name)
# Create the agent with the collection name
agent = st.session_state.agent_manager.create_agent(url, agent_name, collection_name)
st.session_state.current_agent_id = agent.agent_id
st.session_state.chat_history[agent.agent_id] = []
st.success(f"Agent '{agent_name}' created successfully!")
except Exception as e:
st.error(f"Error creating agent: {str(e)}")
# Function to handle sending a message
def send_message(agent_id: str, message: str) -> None:
"""Send a message to an agent and get a response."""
if not message.strip():
return
# Add user message to chat history
st.session_state.chat_history[agent_id].append({"role": "user", "content": message})
agent = st.session_state.agent_manager.get_agent(agent_id)
with st.spinner("Thinking..."):
response = agent.query(message, show_ranking=st.session_state.get("show_ranking", False))
st.session_state.chat_history[agent.agent_id].append({"role": "assistant", "content": response["answer"]})
# Function to switch agents
def switch_agent(agent_id: str) -> None:
"""Switch to a different agent."""
st.session_state.current_agent_id = agent_id
if agent_id not in st.session_state.chat_history:
st.session_state.chat_history[agent_id] = []
# Function to delete an agent
def delete_agent(agent_id: str) -> None:
"""Delete an agent."""
if st.session_state.agent_manager.delete_agent(agent_id):
if agent_id in st.session_state.chat_history:
del st.session_state.chat_history[agent_id]
if st.session_state.current_agent_id == agent_id:
st.session_state.current_agent_id = None
st.success("Agent deleted successfully!")
else:
st.error("Failed to delete agent.")
# --- Database connection and data fetching ---
def fetch_jobs_data():
"""Fetches data from the 'jobs' table in the PostgreSQL database."""
try:
# Get the database URL from the environment variable
postgres_url = os.environ.get("POSTGRES_URL")
if not postgres_url:
st.error("POSTGRES_URL environment variable not set.")
return None
# Establish a connection to the PostgreSQL database
conn = psycopg2.connect(postgres_url)
cur = conn.cursor()
# Execute a query to fetch all data from the 'jobs' table
cur.execute("SELECT name, id FROM jobs") # Select name and id
data = cur.fetchall()
# Close the cursor and connection
cur.close()
conn.close()
return data
except Exception as e:
st.error(f"Error fetching data from the database: {e}")
return None
def fetch_skillz_data():
"""Fetches data from the 'skillz' table in the PostgreSQL database."""
try:
# Get the database URL from the environment variable
postgres_url = os.environ.get("POSTGRES_URL")
if not postgres_url:
st.error("POSTGRES_URL environment variable not set.")
return None
# Establish a connection to the PostgreSQL database
conn = psycopg2.connect(postgres_url)
cur = conn.cursor()
# Execute a query to fetch all data from the 'skillz' table
cur.execute("SELECT skill_name, company_name FROM skillz") # Select skill_name and company_name
data = cur.fetchall()
# Close the cursor and connection
cur.close()
conn.close()
return data
except Exception as e:
st.error(f"Error fetching data from the database: {e}")
return None
def fetch_unified_data():
"""Fetches and joins data from both jobs and skillz tables."""
try:
# Get the database URL from the environment variable
postgres_url = os.environ.get("POSTGRES_URL")
if not postgres_url:
st.error("POSTGRES_URL environment variable not set.")
return None
# Establish a connection to the PostgreSQL database
conn = psycopg2.connect(postgres_url)
cur = conn.cursor()
# Execute a query to join the jobs and skillz tables
# This query assumes there's a relationship between company_name in skillz
# and name in jobs. Adjust the JOIN condition as needed.
query = """
SELECT j.id, j.name as job_name, s.skill_name, s.company_name
FROM jobs j
LEFT JOIN skillz s ON j.name = s.company_name
ORDER BY j.name, s.skill_name
"""
cur.execute(query)
data = cur.fetchall()
# Close the cursor and connection
cur.close()
conn.close()
# Convert to DataFrame with appropriate column names
if data:
df = pd.DataFrame(data, columns=["job_id", "job_name", "skill_name", "company_name"])
return df
return None
except Exception as e:
st.error(f"Error fetching unified data from the database: {e}")
return None
# Main layout
st.title("RAG Chat System")
# Sidebar for agent management
with st.sidebar:
st.header("Agent Management")
# Add option to show ranking scores
if "show_ranking" not in st.session_state:
st.session_state["show_ranking"] = False
show_ranking = st.checkbox("Show document ranking scores", value=st.session_state["show_ranking"])
st.session_state["show_ranking"] = show_ranking
with st.expander("Create New Agent", expanded=True):
new_agent_url = st.text_input("Document URL", placeholder="https://example.com/docs", key="new_agent_url")
new_agent_name = st.text_input("Agent Name", placeholder="My Documentation Agent")
if st.button("Create Agent"):
if new_agent_url and new_agent_name:
create_new_agent(new_agent_url, new_agent_name)
else:
st.warning("Please provide both a URL and a name for the agent.")
st.subheader("Your Agents")
agents = st.session_state.agent_manager.list_agents()
if not agents:
st.info("No agents created yet. Create your first agent above!")
else:
for agent in agents:
col1, col2 = st.columns([3, 1])
with col1:
if st.button(f"{agent['agent_name']}", key=f"select_{agent['agent_id']}"):
switch_agent(agent['agent_id'])
with col2:
if st.button("🗑️", key=f"delete_{agent['agent_id']}" ):
delete_agent(agent['agent_id'])
st.divider()
st.markdown("### About")
st.markdown("""
This is a prototype RAG-based chat system that allows you to:
- Create agents from documentation sources
- Chat with agents to get information from the documentation
- Manage multiple agents for different documentation sources
""")
# Main chat interface
if st.session_state.current_agent_id:
agent = st.session_state.agent_manager.get_agent(st.session_state.current_agent_id)
st.header(f"Chat with {agent.agent_name}")
chat_container = st.container()
with chat_container:
for message in st.session_state.chat_history[agent.agent_id]:
if message["role"] == "user":
st.markdown(f"**You:** {message['content']}")
else:
st.markdown(f"**{agent.agent_name}:** {message['content']}")
# --- Updated input clearing logic ---
ui_value = "" if st.session_state.clear_input else st.session_state.get("user_input", "")
user_input = st.text_input("Your message:", key="user_input", value=ui_value)
send_pressed = st.button("Send")
if send_pressed:
send_message(agent.agent_id, user_input)
st.session_state.clear_input = True
st.rerun()
# After a rerun, reset the flag
if st.session_state.clear_input:
st.session_state.clear_input = False
else:
st.info("Select an agent from the sidebar or create a new one to start chatting.")
# Run the Streamlit app
if __name__ == "__main__":
# This is handled by Streamlit's execution model
pass