Skip to content

Commit 5b13be4

Browse files
committed
RAG update
1 parent 76788b4 commit 5b13be4

File tree

2 files changed

+83
-58
lines changed

2 files changed

+83
-58
lines changed

app.py

Lines changed: 80 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -53,10 +53,10 @@ def init_session_state(self):
5353
"selected_model": None,
5454
"use_rag": True, # Enable RAG by default
5555
"rag_config": {
56-
"chunk_size": 512,
57-
"chunk_overlap": 50,
56+
"chunk_size": 128,
57+
"chunk_overlap": 25,
5858
"similarity_threshold": 0.7,
59-
"top_k": 5,
59+
"top_k": 10,
6060
"embedding_model": "nomic-embed-text",
6161
"llm_model": None # Will be set to selected model
6262
}
@@ -68,7 +68,7 @@ def init_session_state(self):
6868

6969
# Create first chat if none exist
7070
if not st.session_state.chats:
71-
self.create_new_chat()
71+
self.create_new_chat(clear_rag=False) # Don't clear RAG on initialization
7272

7373
def init_rag_system(self):
7474
"""Initialize the RAG system"""
@@ -118,8 +118,8 @@ def init_rag_system(self):
118118
logger.error(f"Failed to initialize RAG system: {e}")
119119
st.session_state.rag_system = None
120120

121-
def create_new_chat(self):
122-
"""Create a new chat session with clean vector store"""
121+
def create_new_chat(self, clear_rag=False):
122+
"""Create a new chat session with optional vector store clearing"""
123123
chat_id = str(uuid.uuid4())
124124
st.session_state.chats[chat_id] = {
125125
"messages": [],
@@ -133,8 +133,8 @@ def create_new_chat(self):
133133
}
134134
st.session_state.current_chat_id = chat_id
135135

136-
# Clear the RAG system for fresh start
137-
if st.session_state.get("rag_system"):
136+
# Only clear the RAG system if explicitly requested
137+
if clear_rag and st.session_state.get("rag_system"):
138138
try:
139139
st.session_state.rag_system.clear_all_documents()
140140
logger.info(f"Cleared RAG system for new chat: {chat_id}")
@@ -168,7 +168,7 @@ def delete_chat(self, chat_id):
168168
if st.session_state.chats:
169169
st.session_state.current_chat_id = list(st.session_state.chats.keys())[0]
170170
else:
171-
self.create_new_chat()
171+
self.create_new_chat(clear_rag=False) # Don't clear RAG when auto-creating after deletion
172172

173173
class PDFProcessor:
174174
"""Simplified PDF processing"""
@@ -475,7 +475,7 @@ def render_sidebar(chat_manager):
475475

476476
# New chat button
477477
if st.button("New Chat", use_container_width=True, type="primary"):
478-
chat_manager.create_new_chat()
478+
chat_manager.create_new_chat(clear_rag=True) # Clear RAG for explicit new chat
479479
st.rerun()
480480

481481
st.divider()
@@ -620,7 +620,7 @@ def render_document_upload(chat_manager):
620620
with col2:
621621
if st.button("🗑️ Clear Upload", help="Clear file upload state if stuck"):
622622
# Force clear the uploader by creating a new chat
623-
chat_manager.create_new_chat()
623+
chat_manager.create_new_chat(clear_rag=True) # Clear RAG when clearing upload issues
624624
st.rerun()
625625

626626
# Use a unique key per chat to avoid file state conflicts
@@ -857,11 +857,20 @@ def render_chat_interface(chat_manager):
857857
# Generate AI response
858858
with st.chat_message("assistant"):
859859
try:
860+
# Debug: Log the RAG decision logic
861+
logger.info(f"RAG Decision Debug:")
862+
logger.info(f" use_rag: {st.session_state.use_rag}")
863+
logger.info(f" rag_system exists: {st.session_state.rag_system is not None}")
864+
logger.info(f" rag_processed: {chat.get('rag_processed')}")
865+
logger.info(f" rag_system.index exists: {getattr(st.session_state.rag_system, 'index', None) is not None if st.session_state.rag_system else False}")
866+
860867
# Try RAG system first if available and document is processed
861868
if (st.session_state.use_rag and
862869
st.session_state.rag_system and
863-
chat.get("rag_processed")):
870+
chat.get("rag_processed") and
871+
getattr(st.session_state.rag_system, 'index', None) is not None):
864872
try:
873+
logger.info("Using RAG system for response generation")
865874
response = generate_ai_response_rag(prompt, chat)
866875
chat_manager.add_message("assistant", response)
867876

@@ -883,6 +892,16 @@ def render_chat_interface(chat_manager):
883892
show_citations(response, chat, prompt)
884893
else:
885894
# Use traditional method
895+
logger.info("Using traditional method for response generation")
896+
if not st.session_state.use_rag:
897+
logger.info(" Reason: RAG disabled")
898+
elif not st.session_state.rag_system:
899+
logger.info(" Reason: RAG system not available")
900+
elif not chat.get("rag_processed"):
901+
logger.info(" Reason: Document not RAG processed")
902+
elif getattr(st.session_state.rag_system, 'index', None) is None:
903+
logger.info(" Reason: RAG system has no index")
904+
886905
response = generate_ai_response(prompt, chat["document_text"])
887906
# Note: Display is handled within generate_ai_response for reasoning support
888907
chat_manager.add_message("assistant", response)
@@ -895,6 +914,8 @@ def render_chat_interface(chat_manager):
895914
st.info("📄 Response generated using full document (RAG disabled)")
896915
elif not chat.get("rag_processed"):
897916
st.info("📄 Response generated using full document (RAG not processed)")
917+
elif getattr(st.session_state.rag_system, 'index', None) is None:
918+
st.info("📄 Response generated using full document (RAG system has no index)")
898919

899920
except Exception as e:
900921
st.error(f"Error generating response: {e}")
@@ -924,43 +945,47 @@ def generate_ai_response_rag(prompt, chat_data):
924945
logger.warning(f"Could not get unfiltered chunks: {e}")
925946
all_nodes = []
926947

927-
# Display retrieved chunks information
928-
if retrieval_info:
929-
# Show chunks that passed the similarity threshold
930-
with st.expander(f"📚 Retrieved {len(retrieval_info)} relevant chunks", expanded=False):
931-
for chunk in retrieval_info:
932-
st.markdown(f"**Chunk {chunk['chunk_id']}** (Score: {chunk['score']:.3f})")
933-
st.text_area(
934-
f"Content {chunk['chunk_id']} ({len(chunk['text'])} characters):",
935-
value=chunk['text'],
936-
height=200,
937-
disabled=True,
938-
key=f"chunk_{chunk['chunk_id']}_{hash(prompt)}"
939-
)
940-
st.markdown("---")
941-
context_text = "\n\n".join([chunk['text'] for chunk in retrieval_info])
942-
elif all_nodes:
943-
# Show chunks that were retrieved but didn't pass threshold
944-
st.warning(f"⚠️ **Similarity threshold too high**: Using top {min(3, len(all_nodes))} chunks with lower scores")
945-
with st.expander(f"📚 Using {min(3, len(all_nodes))} chunks (below threshold)", expanded=False):
946-
for i, node in enumerate(all_nodes[:3]):
947-
score = getattr(node, 'score', 0.0)
948-
st.markdown(f"**Chunk {i+1}** (Score: {score:.3f}) - Below threshold ({st.session_state.rag_config['similarity_threshold']})")
949-
st.text_area(
950-
f"Content {i+1} ({len(node.text)} characters):",
951-
value=node.text,
952-
height=200,
953-
disabled=True,
954-
key=f"fallback_chunk_{i}_{hash(prompt)}"
955-
)
956-
st.markdown("---")
957-
context_text = "\n\n".join([node.text for node in all_nodes[:3]])
958-
else:
959-
st.error("❌ No chunks retrieved - this shouldn't happen with a processed document")
960-
context_text = ""
961-
948+
# Display retrieved chunks information - create persistent container
949+
chunks_container = st.container()
950+
with chunks_container:
951+
if retrieval_info:
952+
# Show chunks that passed the similarity threshold
953+
with st.expander(f"📚 Retrieved {len(retrieval_info)} relevant chunks", expanded=False):
954+
for chunk in retrieval_info:
955+
st.markdown(f"**Chunk {chunk['chunk_id']}** (Score: {chunk['score']:.3f})")
956+
st.text_area(
957+
f"Content {chunk['chunk_id']} ({len(chunk['text'])} characters):",
958+
value=chunk['text'],
959+
height=200,
960+
disabled=True,
961+
key=f"chunk_{chunk['chunk_id']}_{hash(prompt)}"
962+
)
963+
st.markdown("---")
964+
context_text = "\n\n".join([chunk['text'] for chunk in retrieval_info])
965+
elif all_nodes:
966+
# Show chunks that were retrieved but didn't pass threshold
967+
st.warning(f"⚠️ **Similarity threshold too high**: Using top {min(3, len(all_nodes))} chunks with lower scores")
968+
with st.expander(f"📚 Using {min(3, len(all_nodes))} chunks (below threshold)", expanded=False):
969+
for i, node in enumerate(all_nodes[:3]):
970+
score = getattr(node, 'score', 0.0)
971+
st.markdown(f"**Chunk {i+1}** (Score: {score:.3f}) - Below threshold ({st.session_state.rag_config['similarity_threshold']})")
972+
st.text_area(
973+
f"Content {i+1} ({len(node.text)} characters):",
974+
value=node.text,
975+
height=200,
976+
disabled=True,
977+
key=f"fallback_chunk_{i}_{hash(prompt)}"
978+
)
979+
st.markdown("---")
980+
context_text = "\n\n".join([node.text for node in all_nodes[:3]])
981+
else:
982+
st.error("❌ No chunks retrieved - this shouldn't happen with a processed document")
983+
context_text = ""
984+
985+
# Create persistent containers for reasoning and answer
986+
reasoning_container = st.container()
987+
answer_container = st.container()
962988

963-
964989
# Create system prompt with retrieved context
965990
system_prompt = f"""You are a document analysis assistant. Answer questions ONLY using information from these relevant document excerpts:
966991
@@ -1017,9 +1042,9 @@ def generate_ai_response_rag(prompt, chat_data):
10171042
in_reasoning = False
10181043
reasoning_started = False
10191044

1020-
# Create containers for dynamic updates
1021-
reasoning_placeholder = st.empty()
1022-
answer_placeholder = st.empty()
1045+
# Create placeholders within the persistent containers
1046+
reasoning_placeholder = reasoning_container.empty()
1047+
answer_placeholder = answer_container.empty()
10231048

10241049
try:
10251050
if is_running_in_docker():
@@ -1058,20 +1083,20 @@ def generate_ai_response_rag(prompt, chat_data):
10581083
answer_content = full_response[think_end + 8:].strip()
10591084
in_reasoning = False
10601085

1061-
# Show completed reasoning in expandable container
1086+
# Show completed reasoning in persistent container
10621087
with reasoning_placeholder.container():
10631088
with st.expander("🤔 Reasoning", expanded=False):
10641089
st.markdown(reasoning_content)
10651090

1066-
# Show the actual answer
1091+
# Show the actual answer in persistent container
10671092
if answer_content:
10681093
answer_placeholder.markdown(answer_content)
10691094
else:
10701095
# Still in reasoning phase
10711096
in_reasoning = True
10721097
current_reasoning = full_response[think_start + 7:].strip()
10731098

1074-
# Show reasoning with spinner or content
1099+
# Show reasoning with spinner or content in persistent container
10751100
with reasoning_placeholder.container():
10761101
with st.expander("🤔 Reasoning", expanded=False):
10771102
if current_reasoning:
@@ -1080,7 +1105,7 @@ def generate_ai_response_rag(prompt, chat_data):
10801105
with st.spinner("Thinking..."):
10811106
st.empty()
10821107
else:
1083-
# No reasoning tags detected, stream normally
1108+
# No reasoning tags detected, stream normally in persistent container
10841109
answer_content = full_response
10851110
answer_placeholder.markdown(answer_content)
10861111

ragnarok/rag_system.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,10 +48,10 @@ def __init__(
4848
ollama_base_url: str = "http://localhost:11434",
4949
embedding_model: str = "nomic-embed-text",
5050
llm_model: str = "llama3.1:8b",
51-
chunk_size: int = 512,
52-
chunk_overlap: int = 50,
51+
chunk_size: int = 128,
52+
chunk_overlap: int = 25,
5353
similarity_threshold: float = 0.7,
54-
top_k: int = 5
54+
top_k: int = 10
5555
):
5656
"""
5757
Initialize the RAG system

0 commit comments

Comments
 (0)