1
+ """
2
+ Chat Interface UI Components
3
+
4
+ Handles the main chat interface including message display, response generation,
5
+ and citation display.
6
+ """
7
+
8
+ import streamlit as st
9
+ import ollama
10
+ from loguru import logger
11
+ from streamlit_pdf_viewer import pdf_viewer
12
+
13
+ from .enhanced_pdf_processor import EnhancedPDFProcessor
14
+ from .llm_interface import PromptBuilder
15
+ from .ui_config import is_running_in_docker , get_ollama_base_url
16
+
17
+
18
+ def show_citations (response , chat , user_question = "" ):
19
+ """Show citation-based references"""
20
+ if not chat or not chat .document_content :
21
+ return
22
+
23
+ try :
24
+ pdf_processor = EnhancedPDFProcessor (chat .document_content )
25
+ pdf_processor .display_citation_based_references (
26
+ response , chat .document_text , user_question
27
+ )
28
+ except Exception as e :
29
+ st .warning (f"Could not show citations: { e } " )
30
+
31
+
32
+ def generate_response_with_ui (prompt , current_chat ):
33
+ """Generate AI response with UI components (streaming, reasoning, stop button) using RAG"""
34
+ # Set generation state
35
+ st .session_state .generating = True
36
+ st .session_state .stop_generation = False
37
+
38
+ try :
39
+ # Check RAG system requirements and load appropriate document
40
+ if not (st .session_state .rag_system and
41
+ current_chat .rag_processed and
42
+ current_chat .document_id ):
43
+ st .error ("❌ **RAG system not ready**" )
44
+ st .error ("Please ensure the document is processed with RAG system." )
45
+ return "Error: RAG system not ready for query processing."
46
+
47
+ # Ensure the correct document is loaded for this chat
48
+ if (st .session_state .rag_system .current_document_id != current_chat .document_id ):
49
+ try :
50
+ success = st .session_state .rag_system .load_document (current_chat .document_id )
51
+ if not success :
52
+ st .error (f"❌ **Could not load document** for this chat" )
53
+ st .error ("The document may have been deleted from the RAG system." )
54
+ return "Error: Could not load document for this chat."
55
+ except Exception as e :
56
+ st .error (f"❌ **Error loading document**: { str (e )} " )
57
+ return f"Error: Could not load document - { str (e )} "
58
+
59
+ logger .info ("Using RAG system for response generation" )
60
+
61
+ # Get retrieved chunks for RAG
62
+ try :
63
+ retrieval_info = st .session_state .rag_system .get_retrieval_info (prompt )
64
+
65
+ # Get all retrieved chunks (before filtering) for fallback
66
+ try :
67
+ from llama_index .core .retrievers import VectorIndexRetriever
68
+ retriever = VectorIndexRetriever (
69
+ index = st .session_state .rag_system .index ,
70
+ similarity_top_k = st .session_state .rag_system .top_k
71
+ )
72
+ all_nodes = retriever .retrieve (prompt )
73
+ except Exception as e :
74
+ logger .warning (f"Could not get unfiltered chunks: { e } " )
75
+ all_nodes = []
76
+
77
+ # Display retrieved chunks information IMMEDIATELY (before generation)
78
+ if retrieval_info :
79
+ # Show chunks that passed the similarity threshold
80
+ with st .expander (f"📚 Retrieved { len (retrieval_info )} relevant chunks" , expanded = False ):
81
+ for chunk in retrieval_info :
82
+ st .markdown (f"**Chunk { chunk ['chunk_id' ]} ** (Score: { chunk ['score' ]:.3f} )" )
83
+ st .text_area (
84
+ f"Content { chunk ['chunk_id' ]} ({ len (chunk ['text' ])} characters):" ,
85
+ value = chunk ['text' ],
86
+ height = 200 ,
87
+ disabled = True ,
88
+ key = f"chunk_{ chunk ['chunk_id' ]} _{ hash (prompt )} "
89
+ )
90
+ st .markdown ("---" )
91
+ context_text = "\n \n " .join ([chunk ['text' ] for chunk in retrieval_info ])
92
+ elif all_nodes :
93
+ # Show chunks that were retrieved but didn't pass threshold
94
+ st .warning (f"⚠️ **Similarity threshold too high**: Using top { min (3 , len (all_nodes ))} chunks with lower scores" )
95
+ with st .expander (f"📚 Using { min (3 , len (all_nodes ))} chunks (below threshold)" , expanded = False ):
96
+ for i , node in enumerate (all_nodes [:3 ]):
97
+ score = getattr (node , 'score' , 0.0 )
98
+ st .markdown (f"**Chunk { i + 1 } ** (Score: { score :.3f} ) - Below threshold ({ st .session_state .rag_config ['similarity_threshold' ]} )" )
99
+ st .text_area (
100
+ f"Content { i + 1 } ({ len (node .text )} characters):" ,
101
+ value = node .text ,
102
+ height = 200 ,
103
+ disabled = True ,
104
+ key = f"fallback_chunk_{ i } _{ hash (prompt )} "
105
+ )
106
+ st .markdown ("---" )
107
+ context_text = "\n \n " .join ([node .text for node in all_nodes [:3 ]])
108
+ else :
109
+ st .error ("❌ No chunks retrieved for this query" )
110
+ st .error ("Try adjusting your question or lowering the similarity threshold in RAG Settings." )
111
+ return "Error: No relevant content found for this query."
112
+
113
+ except Exception as rag_error :
114
+ logger .error (f"RAG retrieval failed: { rag_error } " )
115
+ st .error (f"❌ **RAG retrieval failed**: { str (rag_error )} " )
116
+ return f"Error: RAG retrieval failed - { str (rag_error )} "
117
+
118
+ # Create containers for dynamic updates (after chunks are displayed)
119
+ reasoning_placeholder = st .empty ()
120
+ answer_placeholder = st .empty ()
121
+ stop_container = st .container ()
122
+
123
+ # Show stop button
124
+ with stop_container :
125
+ stop_button_placeholder = st .empty ()
126
+ with stop_button_placeholder .container ():
127
+ col1 , col2 = st .columns ([10 , 1 ])
128
+ with col2 :
129
+ if st .button ("⏹" , key = f"stop_gen_{ hash (prompt )} " , help = "Stop generation" ):
130
+ st .session_state .stop_generation = True
131
+
132
+ # Generate response with streaming
133
+ full_response = ""
134
+ reasoning_content = ""
135
+ answer_content = ""
136
+ reasoning_started = False
137
+ in_reasoning = False
138
+ generation_stopped = False
139
+
140
+ # Use direct ollama streaming with RAG context
141
+ system_prompt = PromptBuilder .create_system_prompt (context_text , is_rag = True )
142
+ messages = [
143
+ {"role" : "system" , "content" : system_prompt },
144
+ {"role" : "user" , "content" : prompt }
145
+ ]
146
+
147
+ # Determine Ollama client based on environment
148
+ ollama_base_url = get_ollama_base_url ()
149
+ in_docker = is_running_in_docker ()
150
+
151
+ if in_docker :
152
+ client = ollama .Client (host = ollama_base_url )
153
+ chat_stream = client .chat (
154
+ model = st .session_state .selected_model ,
155
+ messages = messages ,
156
+ stream = True
157
+ )
158
+ else :
159
+ chat_stream = ollama .chat (
160
+ model = st .session_state .selected_model ,
161
+ messages = messages ,
162
+ stream = True
163
+ )
164
+
165
+ # Handle streaming response with reasoning support
166
+ chunk_count = 0
167
+ for chunk in chat_stream :
168
+ chunk_count += 1
169
+
170
+ # Check if user wants to stop on every chunk for responsiveness
171
+ if st .session_state .get ('stop_generation' , False ):
172
+ generation_stopped = True
173
+ logger .info (f"Generation stopped at chunk { chunk_count } " )
174
+ break
175
+
176
+ if chunk ['message' ]['content' ]:
177
+ chunk_content = chunk ['message' ]['content' ]
178
+ full_response += chunk_content
179
+
180
+ # Check for reasoning tags
181
+ think_start = full_response .find ('<think>' )
182
+ think_end = full_response .find ('</think>' )
183
+
184
+ if think_start != - 1 :
185
+ reasoning_started = True
186
+
187
+ if think_end != - 1 :
188
+ # Reasoning is complete, extract both parts
189
+ reasoning_content = full_response [think_start + 7 :think_end ].strip ()
190
+ answer_content = full_response [think_end + 8 :].strip ()
191
+ in_reasoning = False
192
+
193
+ # Show completed reasoning
194
+ with reasoning_placeholder .container ():
195
+ with st .expander ("🤔 Reasoning" , expanded = False ):
196
+ st .markdown (reasoning_content )
197
+
198
+ # Show the actual answer
199
+ if answer_content :
200
+ answer_placeholder .markdown (answer_content )
201
+ else :
202
+ # Still in reasoning phase
203
+ in_reasoning = True
204
+ current_reasoning = full_response [think_start + 7 :].strip ()
205
+
206
+ # Show reasoning with spinner or content
207
+ with reasoning_placeholder .container ():
208
+ with st .expander ("🤔 Reasoning" , expanded = False ):
209
+ if current_reasoning :
210
+ st .markdown (current_reasoning )
211
+ else :
212
+ with st .spinner ("Thinking..." ):
213
+ st .empty ()
214
+ else :
215
+ # No reasoning tags detected, stream normally
216
+ answer_content = full_response
217
+ answer_placeholder .markdown (answer_content )
218
+
219
+ # Handle stopped generation
220
+ if generation_stopped or st .session_state .get ('stop_generation' , False ):
221
+ final_answer = answer_content if reasoning_started else full_response
222
+ if final_answer .strip ():
223
+ final_answer += "\n \n *[Generation stopped by user]*"
224
+ else :
225
+ final_answer = "*[Generation stopped by user before any content was generated]*"
226
+
227
+ # Update display with stopped message
228
+ if reasoning_started and answer_content :
229
+ answer_placeholder .markdown (final_answer )
230
+ elif not reasoning_started :
231
+ answer_placeholder .markdown (final_answer )
232
+
233
+ # Show stopped message and remove stop button
234
+ with stop_container :
235
+ stop_button_placeholder .empty ()
236
+ st .info ("🛑 Generation stopped by user" )
237
+ else :
238
+ # Clear stop button when generation completes normally
239
+ with stop_container :
240
+ stop_button_placeholder .empty ()
241
+
242
+ # Show method information
243
+ st .info ("🔍 Response generated using RAG (semantic search)" )
244
+
245
+ # Return the final answer (without reasoning tags) for storage
246
+ final_answer = answer_content if reasoning_started else full_response
247
+
248
+ # Add stopped indicator if generation was interrupted
249
+ if generation_stopped or st .session_state .get ('stop_generation' , False ):
250
+ if final_answer .strip ():
251
+ final_answer += "\n \n *[Generation stopped by user]*"
252
+ else :
253
+ final_answer = "*[Generation stopped by user before any content was generated]*"
254
+
255
+ return final_answer
256
+
257
+ except Exception as e :
258
+ st .error (f"Error generating response: { e } " )
259
+ return ""
260
+ finally :
261
+ # Reset generation state
262
+ st .session_state .generating = False
263
+ st .session_state .stop_generation = False
264
+
265
+
266
+ def render_chat_interface ():
267
+ """Render the main chat interface"""
268
+ current_chat = st .session_state .chat_manager .get_current_chat ()
269
+ if not current_chat :
270
+ return
271
+
272
+ # Show current document info
273
+ if current_chat .document_name :
274
+ with st .expander ("📄 Current Document" , expanded = False ):
275
+ st .write (f"**Document:** { current_chat .document_name } " )
276
+
277
+ # Show RAG processing status
278
+ if current_chat .rag_processed :
279
+ rag_stats = current_chat .rag_stats or {}
280
+ st .success ("✅ Processed with RAG system" )
281
+ col1 , col2 = st .columns (2 )
282
+ with col1 :
283
+ st .metric ("Chunks Created" , rag_stats .get ("total_chunks" , 0 ))
284
+ with col2 :
285
+ st .metric ("Chunk Size" , f"{ rag_stats .get ('chunk_size' , 0 )} tokens" )
286
+ else :
287
+ st .warning ("⚠️ Document not processed with RAG system" )
288
+
289
+ # Show PDF and extracted text side by side
290
+ if current_chat .document_content and current_chat .document_text :
291
+ col1 , col2 = st .columns ([1 , 1 ])
292
+
293
+ with col1 :
294
+ st .subheader ("📄 PDF Document" )
295
+ pdf_viewer (
296
+ input = current_chat .document_content ,
297
+ width = "100%" ,
298
+ height = 600 ,
299
+ render_text = True ,
300
+ key = f"pdf_viewer_{ current_chat .id } "
301
+ )
302
+
303
+ with col2 :
304
+ st .subheader ("📝 Extracted Text" )
305
+ # Show extracted text in a scrollable container
306
+ st .text_area (
307
+ "Document content:" ,
308
+ value = current_chat .document_text ,
309
+ height = 600 ,
310
+ disabled = True ,
311
+ label_visibility = "collapsed"
312
+ )
313
+
314
+ # Display chat messages
315
+ for message in current_chat .messages :
316
+ with st .chat_message (message .role ):
317
+ st .markdown (message .content )
318
+
319
+ # Chat input - only show if document text is valid and not currently generating
320
+ if current_chat .document_text and current_chat .document_text .strip ():
321
+ if st .session_state .generating :
322
+ st .info ("🤖 Generating response... Use the stop button above to interrupt." )
323
+ st .chat_input ("Generating response..." , disabled = True )
324
+ else :
325
+ if prompt := st .chat_input ("Ask about your document..." ):
326
+ if not st .session_state .selected_model :
327
+ st .warning ("Please select a model first" )
328
+ return
329
+
330
+ # Add user message
331
+ with st .chat_message ("user" ):
332
+ st .markdown (prompt )
333
+ current_chat .add_message ("user" , prompt )
334
+
335
+ # Generate AI response
336
+ with st .chat_message ("assistant" ):
337
+ response = generate_response_with_ui (prompt , current_chat )
338
+ current_chat .add_message ("assistant" , response )
339
+ show_citations (response , current_chat , prompt )
340
+ else :
341
+ st .warning ("⚠️ **Chat Disabled**: No valid document content available. Please upload a PDF document with readable text to start chatting." )
0 commit comments