@@ -53,10 +53,10 @@ def init_session_state(self):
53
53
"selected_model" : None ,
54
54
"use_rag" : True , # Enable RAG by default
55
55
"rag_config" : {
56
- "chunk_size" : 512 ,
57
- "chunk_overlap" : 50 ,
56
+ "chunk_size" : 128 ,
57
+ "chunk_overlap" : 25 ,
58
58
"similarity_threshold" : 0.7 ,
59
- "top_k" : 5 ,
59
+ "top_k" : 10 ,
60
60
"embedding_model" : "nomic-embed-text" ,
61
61
"llm_model" : None # Will be set to selected model
62
62
}
@@ -68,7 +68,7 @@ def init_session_state(self):
68
68
69
69
# Create first chat if none exist
70
70
if not st .session_state .chats :
71
- self .create_new_chat ()
71
+ self .create_new_chat (clear_rag = False ) # Don't clear RAG on initialization
72
72
73
73
def init_rag_system (self ):
74
74
"""Initialize the RAG system"""
@@ -118,8 +118,8 @@ def init_rag_system(self):
118
118
logger .error (f"Failed to initialize RAG system: { e } " )
119
119
st .session_state .rag_system = None
120
120
121
- def create_new_chat (self ):
122
- """Create a new chat session with clean vector store"""
121
+ def create_new_chat (self , clear_rag = False ):
122
+ """Create a new chat session with optional vector store clearing """
123
123
chat_id = str (uuid .uuid4 ())
124
124
st .session_state .chats [chat_id ] = {
125
125
"messages" : [],
@@ -133,8 +133,8 @@ def create_new_chat(self):
133
133
}
134
134
st .session_state .current_chat_id = chat_id
135
135
136
- # Clear the RAG system for fresh start
137
- if st .session_state .get ("rag_system" ):
136
+ # Only clear the RAG system if explicitly requested
137
+ if clear_rag and st .session_state .get ("rag_system" ):
138
138
try :
139
139
st .session_state .rag_system .clear_all_documents ()
140
140
logger .info (f"Cleared RAG system for new chat: { chat_id } " )
@@ -168,7 +168,7 @@ def delete_chat(self, chat_id):
168
168
if st .session_state .chats :
169
169
st .session_state .current_chat_id = list (st .session_state .chats .keys ())[0 ]
170
170
else :
171
- self .create_new_chat ()
171
+ self .create_new_chat (clear_rag = False ) # Don't clear RAG when auto-creating after deletion
172
172
173
173
class PDFProcessor :
174
174
"""Simplified PDF processing"""
@@ -475,7 +475,7 @@ def render_sidebar(chat_manager):
475
475
476
476
# New chat button
477
477
if st .button ("New Chat" , use_container_width = True , type = "primary" ):
478
- chat_manager .create_new_chat ()
478
+ chat_manager .create_new_chat (clear_rag = True ) # Clear RAG for explicit new chat
479
479
st .rerun ()
480
480
481
481
st .divider ()
@@ -620,7 +620,7 @@ def render_document_upload(chat_manager):
620
620
with col2 :
621
621
if st .button ("🗑️ Clear Upload" , help = "Clear file upload state if stuck" ):
622
622
# Force clear the uploader by creating a new chat
623
- chat_manager .create_new_chat ()
623
+ chat_manager .create_new_chat (clear_rag = True ) # Clear RAG when clearing upload issues
624
624
st .rerun ()
625
625
626
626
# Use a unique key per chat to avoid file state conflicts
@@ -857,11 +857,20 @@ def render_chat_interface(chat_manager):
857
857
# Generate AI response
858
858
with st .chat_message ("assistant" ):
859
859
try :
860
+ # Debug: Log the RAG decision logic
861
+ logger .info (f"RAG Decision Debug:" )
862
+ logger .info (f" use_rag: { st .session_state .use_rag } " )
863
+ logger .info (f" rag_system exists: { st .session_state .rag_system is not None } " )
864
+ logger .info (f" rag_processed: { chat .get ('rag_processed' )} " )
865
+ logger .info (f" rag_system.index exists: { getattr (st .session_state .rag_system , 'index' , None ) is not None if st .session_state .rag_system else False } " )
866
+
860
867
# Try RAG system first if available and document is processed
861
868
if (st .session_state .use_rag and
862
869
st .session_state .rag_system and
863
- chat .get ("rag_processed" )):
870
+ chat .get ("rag_processed" ) and
871
+ getattr (st .session_state .rag_system , 'index' , None ) is not None ):
864
872
try :
873
+ logger .info ("Using RAG system for response generation" )
865
874
response = generate_ai_response_rag (prompt , chat )
866
875
chat_manager .add_message ("assistant" , response )
867
876
@@ -883,6 +892,16 @@ def render_chat_interface(chat_manager):
883
892
show_citations (response , chat , prompt )
884
893
else :
885
894
# Use traditional method
895
+ logger .info ("Using traditional method for response generation" )
896
+ if not st .session_state .use_rag :
897
+ logger .info (" Reason: RAG disabled" )
898
+ elif not st .session_state .rag_system :
899
+ logger .info (" Reason: RAG system not available" )
900
+ elif not chat .get ("rag_processed" ):
901
+ logger .info (" Reason: Document not RAG processed" )
902
+ elif getattr (st .session_state .rag_system , 'index' , None ) is None :
903
+ logger .info (" Reason: RAG system has no index" )
904
+
886
905
response = generate_ai_response (prompt , chat ["document_text" ])
887
906
# Note: Display is handled within generate_ai_response for reasoning support
888
907
chat_manager .add_message ("assistant" , response )
@@ -895,6 +914,8 @@ def render_chat_interface(chat_manager):
895
914
st .info ("📄 Response generated using full document (RAG disabled)" )
896
915
elif not chat .get ("rag_processed" ):
897
916
st .info ("📄 Response generated using full document (RAG not processed)" )
917
+ elif getattr (st .session_state .rag_system , 'index' , None ) is None :
918
+ st .info ("📄 Response generated using full document (RAG system has no index)" )
898
919
899
920
except Exception as e :
900
921
st .error (f"Error generating response: { e } " )
@@ -924,43 +945,47 @@ def generate_ai_response_rag(prompt, chat_data):
924
945
logger .warning (f"Could not get unfiltered chunks: { e } " )
925
946
all_nodes = []
926
947
927
- # Display retrieved chunks information
928
- if retrieval_info :
929
- # Show chunks that passed the similarity threshold
930
- with st .expander (f"📚 Retrieved { len (retrieval_info )} relevant chunks" , expanded = False ):
931
- for chunk in retrieval_info :
932
- st .markdown (f"**Chunk { chunk ['chunk_id' ]} ** (Score: { chunk ['score' ]:.3f} )" )
933
- st .text_area (
934
- f"Content { chunk ['chunk_id' ]} ({ len (chunk ['text' ])} characters):" ,
935
- value = chunk ['text' ],
936
- height = 200 ,
937
- disabled = True ,
938
- key = f"chunk_{ chunk ['chunk_id' ]} _{ hash (prompt )} "
939
- )
940
- st .markdown ("---" )
941
- context_text = "\n \n " .join ([chunk ['text' ] for chunk in retrieval_info ])
942
- elif all_nodes :
943
- # Show chunks that were retrieved but didn't pass threshold
944
- st .warning (f"⚠️ **Similarity threshold too high**: Using top { min (3 , len (all_nodes ))} chunks with lower scores" )
945
- with st .expander (f"📚 Using { min (3 , len (all_nodes ))} chunks (below threshold)" , expanded = False ):
946
- for i , node in enumerate (all_nodes [:3 ]):
947
- score = getattr (node , 'score' , 0.0 )
948
- st .markdown (f"**Chunk { i + 1 } ** (Score: { score :.3f} ) - Below threshold ({ st .session_state .rag_config ['similarity_threshold' ]} )" )
949
- st .text_area (
950
- f"Content { i + 1 } ({ len (node .text )} characters):" ,
951
- value = node .text ,
952
- height = 200 ,
953
- disabled = True ,
954
- key = f"fallback_chunk_{ i } _{ hash (prompt )} "
955
- )
956
- st .markdown ("---" )
957
- context_text = "\n \n " .join ([node .text for node in all_nodes [:3 ]])
958
- else :
959
- st .error ("❌ No chunks retrieved - this shouldn't happen with a processed document" )
960
- context_text = ""
961
-
948
+ # Display retrieved chunks information - create persistent container
949
+ chunks_container = st .container ()
950
+ with chunks_container :
951
+ if retrieval_info :
952
+ # Show chunks that passed the similarity threshold
953
+ with st .expander (f"📚 Retrieved { len (retrieval_info )} relevant chunks" , expanded = False ):
954
+ for chunk in retrieval_info :
955
+ st .markdown (f"**Chunk { chunk ['chunk_id' ]} ** (Score: { chunk ['score' ]:.3f} )" )
956
+ st .text_area (
957
+ f"Content { chunk ['chunk_id' ]} ({ len (chunk ['text' ])} characters):" ,
958
+ value = chunk ['text' ],
959
+ height = 200 ,
960
+ disabled = True ,
961
+ key = f"chunk_{ chunk ['chunk_id' ]} _{ hash (prompt )} "
962
+ )
963
+ st .markdown ("---" )
964
+ context_text = "\n \n " .join ([chunk ['text' ] for chunk in retrieval_info ])
965
+ elif all_nodes :
966
+ # Show chunks that were retrieved but didn't pass threshold
967
+ st .warning (f"⚠️ **Similarity threshold too high**: Using top { min (3 , len (all_nodes ))} chunks with lower scores" )
968
+ with st .expander (f"📚 Using { min (3 , len (all_nodes ))} chunks (below threshold)" , expanded = False ):
969
+ for i , node in enumerate (all_nodes [:3 ]):
970
+ score = getattr (node , 'score' , 0.0 )
971
+ st .markdown (f"**Chunk { i + 1 } ** (Score: { score :.3f} ) - Below threshold ({ st .session_state .rag_config ['similarity_threshold' ]} )" )
972
+ st .text_area (
973
+ f"Content { i + 1 } ({ len (node .text )} characters):" ,
974
+ value = node .text ,
975
+ height = 200 ,
976
+ disabled = True ,
977
+ key = f"fallback_chunk_{ i } _{ hash (prompt )} "
978
+ )
979
+ st .markdown ("---" )
980
+ context_text = "\n \n " .join ([node .text for node in all_nodes [:3 ]])
981
+ else :
982
+ st .error ("❌ No chunks retrieved - this shouldn't happen with a processed document" )
983
+ context_text = ""
984
+
985
+ # Create persistent containers for reasoning and answer
986
+ reasoning_container = st .container ()
987
+ answer_container = st .container ()
962
988
963
-
964
989
# Create system prompt with retrieved context
965
990
system_prompt = f"""You are a document analysis assistant. Answer questions ONLY using information from these relevant document excerpts:
966
991
@@ -1017,9 +1042,9 @@ def generate_ai_response_rag(prompt, chat_data):
1017
1042
in_reasoning = False
1018
1043
reasoning_started = False
1019
1044
1020
- # Create containers for dynamic updates
1021
- reasoning_placeholder = st .empty ()
1022
- answer_placeholder = st .empty ()
1045
+ # Create placeholders within the persistent containers
1046
+ reasoning_placeholder = reasoning_container .empty ()
1047
+ answer_placeholder = answer_container .empty ()
1023
1048
1024
1049
try :
1025
1050
if is_running_in_docker ():
@@ -1058,20 +1083,20 @@ def generate_ai_response_rag(prompt, chat_data):
1058
1083
answer_content = full_response [think_end + 8 :].strip ()
1059
1084
in_reasoning = False
1060
1085
1061
- # Show completed reasoning in expandable container
1086
+ # Show completed reasoning in persistent container
1062
1087
with reasoning_placeholder .container ():
1063
1088
with st .expander ("🤔 Reasoning" , expanded = False ):
1064
1089
st .markdown (reasoning_content )
1065
1090
1066
- # Show the actual answer
1091
+ # Show the actual answer in persistent container
1067
1092
if answer_content :
1068
1093
answer_placeholder .markdown (answer_content )
1069
1094
else :
1070
1095
# Still in reasoning phase
1071
1096
in_reasoning = True
1072
1097
current_reasoning = full_response [think_start + 7 :].strip ()
1073
1098
1074
- # Show reasoning with spinner or content
1099
+ # Show reasoning with spinner or content in persistent container
1075
1100
with reasoning_placeholder .container ():
1076
1101
with st .expander ("🤔 Reasoning" , expanded = False ):
1077
1102
if current_reasoning :
@@ -1080,7 +1105,7 @@ def generate_ai_response_rag(prompt, chat_data):
1080
1105
with st .spinner ("Thinking..." ):
1081
1106
st .empty ()
1082
1107
else :
1083
- # No reasoning tags detected, stream normally
1108
+ # No reasoning tags detected, stream normally in persistent container
1084
1109
answer_content = full_response
1085
1110
answer_placeholder .markdown (answer_content )
1086
1111
0 commit comments