@@ -150,6 +150,192 @@ def get_available_models():
150
150
error_msg += "- Direct execution: `ollama serve`"
151
151
st .error (error_msg )
152
152
return []
153
+
154
+ @staticmethod
155
+ def get_model_info (model_name ):
156
+ """Get detailed model information including context length"""
157
+ try :
158
+ if in_docker :
159
+ # Docker - use explicit client configuration
160
+ client = ollama .Client (host = ollama_base_url )
161
+ model_info = client .show (model_name )
162
+ else :
163
+ # Direct execution - use default ollama client
164
+ model_info = ollama .show (model_name )
165
+
166
+ return model_info
167
+ except Exception as e :
168
+ logger .warning (f"Could not get model info for { model_name } : { e } " )
169
+ return None
170
+
171
+ @staticmethod
172
+ def get_context_length (model_name ):
173
+ """Get the context length for a specific model"""
174
+ model_info = ModelManager .get_model_info (model_name )
175
+
176
+ # Try to get context length from model parameters first
177
+ if model_info :
178
+ try :
179
+ # Check in parameters first
180
+ if 'parameters' in model_info :
181
+ params = model_info ['parameters' ]
182
+ if 'num_ctx' in params :
183
+ return int (params ['num_ctx' ])
184
+
185
+ # Check in model details/template
186
+ if 'details' in model_info :
187
+ details = model_info ['details' ]
188
+ if 'parameter_size' in details :
189
+ # Some models have context info in details
190
+ pass
191
+
192
+ except Exception as e :
193
+ logger .warning (f"Error parsing model info for { model_name } : { e } " )
194
+
195
+ # Fallback to default context lengths for common model families
196
+ try :
197
+ model_lower = model_name .lower ()
198
+ if 'llama3.1' in model_lower or 'llama-3.1' in model_lower :
199
+ return 131072 # 128k context
200
+ elif 'llama3' in model_lower or 'llama-3' in model_lower :
201
+ return 8192 # 8k context
202
+ elif 'llama2' in model_lower or 'llama-2' in model_lower :
203
+ return 4096 # 4k context
204
+ elif 'mistral' in model_lower :
205
+ return 32768 # 32k context for most Mistral models
206
+ elif 'codellama' in model_lower :
207
+ return 16384 # 16k context
208
+ elif 'deepseek' in model_lower :
209
+ return 32768 # 32k context
210
+ elif 'qwen' in model_lower :
211
+ return 32768 # 32k context
212
+ else :
213
+ # Default fallback
214
+ return 2048
215
+
216
+ except Exception as e :
217
+ logger .warning (f"Error parsing context length for { model_name } : { e } " )
218
+ return 2048 # Conservative default
219
+
220
+ class ContextChecker :
221
+ """Utility class for checking context window compatibility"""
222
+
223
+ @staticmethod
224
+ def estimate_token_count (text ):
225
+ """Estimate token count using multiple methods for better accuracy"""
226
+ if not text :
227
+ return 0
228
+
229
+ # Try to use tiktoken for more accurate counting (if available)
230
+ try :
231
+ import tiktoken
232
+ # Use cl100k_base encoding (used by GPT-4, similar to most modern LLMs)
233
+ encoding = tiktoken .get_encoding ("cl100k_base" )
234
+ return len (encoding .encode (text ))
235
+ except ImportError :
236
+ pass
237
+
238
+ # Fallback: Multiple estimation methods
239
+ char_count = len (text )
240
+ word_count = len (text .split ())
241
+
242
+ # Different estimation approaches
243
+ char_based = char_count // 4 # ~4 chars per token (conservative)
244
+ word_based = int (word_count * 1.3 ) # ~1.3 tokens per word (average)
245
+
246
+ # Use the higher estimate to be conservative
247
+ return max (char_based , word_based )
248
+
249
+ @staticmethod
250
+ def check_document_fits_context (document_text , model_name , system_prompt_length = 2000 ):
251
+ """Check if document + system prompt fits in model's context window"""
252
+ if not document_text or not model_name :
253
+ return True , None , None
254
+
255
+ context_length = ModelManager .get_context_length (model_name )
256
+ if context_length is None :
257
+ return True , None , "Could not determine model context length"
258
+
259
+ doc_tokens = ContextChecker .estimate_token_count (document_text )
260
+ total_tokens = doc_tokens + system_prompt_length # Reserve space for system prompt and user query
261
+
262
+ fits = total_tokens <= context_length
263
+ usage_percent = (total_tokens / context_length ) * 100
264
+
265
+ return fits , {
266
+ 'context_length' : context_length ,
267
+ 'document_tokens' : doc_tokens ,
268
+ 'total_estimated_tokens' : total_tokens ,
269
+ 'usage_percent' : usage_percent ,
270
+ 'available_tokens' : context_length - total_tokens
271
+ }, None
272
+
273
+ @staticmethod
274
+ def display_context_warning (context_info , model_name ):
275
+ """Display context window usage information and warnings"""
276
+ if not context_info :
277
+ return
278
+
279
+ usage_percent = context_info ['usage_percent' ]
280
+
281
+ if usage_percent > 100 :
282
+ st .error ("⚠️ **Document Too Large for Context Window**" )
283
+ st .error (f"""
284
+ **Model:** { model_name }
285
+ **Context Limit:** { context_info ['context_length' ]:,} tokens
286
+ **Document Size:** ~{ context_info ['document_tokens' ]:,} tokens
287
+ **Usage:** { usage_percent :.1f} % (exceeds limit by { context_info ['total_estimated_tokens' ] - context_info ['context_length' ]:,} tokens)
288
+
289
+ **The document is too large for this model's context window.**
290
+ """ )
291
+
292
+ with st .expander ("💡 Solutions for Large Documents" , expanded = True ):
293
+ st .markdown (f"""
294
+ **Option 1: Use a model with larger context window**
295
+ - Switch to a model like `llama3.1:8b` (128k context) or `mistral:latest` (32k context)
296
+
297
+ **Option 2: Create a custom model with larger context**
298
+ ```bash
299
+ # Create a Modelfile
300
+ echo "FROM { model_name }
301
+ PARAMETER num_ctx 32768" > Modelfile
302
+
303
+ # Create custom model
304
+ ollama create { model_name .split (':' )[0 ]} -large -f Modelfile
305
+ ```
306
+
307
+ **Option 3: Document chunking (future feature)**
308
+ - Break document into smaller chunks
309
+ - Process each chunk separately
310
+ """ )
311
+
312
+ elif usage_percent > 80 :
313
+ st .warning ("⚠️ **High Context Usage**" )
314
+ st .warning (f"""
315
+ **Model:** { model_name }
316
+ **Context Limit:** { context_info ['context_length' ]:,} tokens
317
+ **Document Size:** ~{ context_info ['document_tokens' ]:,} tokens
318
+ **Usage:** { usage_percent :.1f} % of context window
319
+ **Available:** ~{ context_info ['available_tokens' ]:,} tokens for conversation
320
+
321
+ **The document uses most of the context window. Long conversations may be truncated.**
322
+ """ )
323
+
324
+ elif usage_percent > 50 :
325
+ st .info ("ℹ️ **Moderate Context Usage**" )
326
+ st .info (f"""
327
+ **Model:** { model_name }
328
+ **Context Usage:** { usage_percent :.1f} % ({ context_info ['document_tokens' ]:,} / { context_info ['context_length' ]:,} tokens)
329
+ **Available:** ~{ context_info ['available_tokens' ]:,} tokens for conversation
330
+ """ )
331
+
332
+ else :
333
+ st .success ("✅ **Document fits comfortably in context window**" )
334
+ st .success (f"""
335
+ **Model:** { model_name }
336
+ **Context Usage:** { usage_percent :.1f} % ({ context_info ['document_tokens' ]:,} / { context_info ['context_length' ]:,} tokens)
337
+ **Available:** ~{ context_info ['available_tokens' ]:,} tokens for conversation
338
+ """ )
153
339
154
340
def render_sidebar (chat_manager ):
155
341
"""Render the sidebar with chat history"""
@@ -230,6 +416,43 @@ def render_document_upload(chat_manager):
230
416
231
417
st .success (f"Document '{ uploaded_file .name } ' processed successfully!" )
232
418
st .info (f"Extracted { len (extracted_text .split ()):,} words" )
419
+
420
+ # Check context window compatibility
421
+ if st .session_state .selected_model :
422
+ fits , context_info , error = ContextChecker .check_document_fits_context (
423
+ extracted_text , st .session_state .selected_model
424
+ )
425
+
426
+ if error :
427
+ st .warning (f"Could not check context compatibility: { error } " )
428
+ elif context_info :
429
+ usage_percent = context_info ['usage_percent' ]
430
+
431
+ # Always show progress bar and basic info after upload
432
+ st .markdown ("---" )
433
+ st .markdown ("**📊 Context Check:**" )
434
+
435
+ # Show progress bar for context usage
436
+ progress_value = min (usage_percent / 100 , 1.0 ) # Cap at 100% for display
437
+ st .progress (progress_value , text = f"Context Usage: { usage_percent :.1f} %" )
438
+
439
+ # Show status with appropriate color and clear messaging
440
+ if usage_percent > 100 :
441
+ st .error (f"⚠️ **Document too large** - Uses { usage_percent :.0f} % of context window" )
442
+ excess_tokens = context_info ['total_estimated_tokens' ] - context_info ['context_length' ]
443
+ st .caption (f"Document exceeds limit by ~{ excess_tokens :,} tokens" )
444
+ elif usage_percent > 80 :
445
+ st .warning (f"⚠️ **High context usage** - { usage_percent :.0f} % of { context_info ['context_length' ]:,} tokens" )
446
+ st .caption (f"~{ context_info ['available_tokens' ]:,} tokens remaining for conversation" )
447
+ elif usage_percent > 50 :
448
+ st .info (f"ℹ️ **Moderate context usage** - { usage_percent :.0f} % of { context_info ['context_length' ]:,} tokens" )
449
+ st .caption (f"~{ context_info ['available_tokens' ]:,} tokens remaining for conversation" )
450
+ else :
451
+ st .success (f"✅ **Good fit** - Uses { usage_percent :.0f} % of { context_info ['context_length' ]:,} tokens" )
452
+ st .caption (f"~{ context_info ['available_tokens' ]:,} tokens remaining for conversation" )
453
+ else :
454
+ st .info ("💡 Select a model to check context window compatibility" )
455
+
233
456
st .rerun ()
234
457
else :
235
458
st .error ("❌ **Document Processing Failed**" )
@@ -270,15 +493,46 @@ def render_chat_interface(chat_manager):
270
493
with st .expander ("📄 Current Document" , expanded = False ):
271
494
st .write (f"**Document:** { chat ['document_name' ]} " )
272
495
273
- # Show PDF viewer
274
- if chat .get ("document_content" ):
275
- pdf_viewer (
276
- input = chat ["document_content" ],
277
- width = "100%" ,
278
- height = 600 ,
279
- render_text = True ,
280
- key = f"pdf_viewer_{ st .session_state .current_chat_id } "
496
+ # Show context compatibility info with progress bar
497
+ if st .session_state .selected_model and chat .get ("document_text" ):
498
+ fits , context_info , error = ContextChecker .check_document_fits_context (
499
+ chat ["document_text" ], st .session_state .selected_model
281
500
)
501
+
502
+ if context_info :
503
+ usage_percent = context_info ['usage_percent' ]
504
+
505
+ # Show progress bar for context usage
506
+ progress_value = min (usage_percent / 100 , 1.0 ) # Cap at 100% for display
507
+ st .progress (progress_value , text = f"Context Usage: { usage_percent :.1f} %" )
508
+
509
+ # Show brief summary
510
+ st .caption (f"~{ context_info ['document_tokens' ]:,} tokens / { context_info ['context_length' ]:,} limit" )
511
+
512
+ # Show PDF and extracted text side by side
513
+ if chat .get ("document_content" ) and chat .get ("document_text" ):
514
+ col1 , col2 = st .columns ([1 , 1 ])
515
+
516
+ with col1 :
517
+ st .subheader ("📄 PDF Document" )
518
+ pdf_viewer (
519
+ input = chat ["document_content" ],
520
+ width = "100%" ,
521
+ height = 600 ,
522
+ render_text = True ,
523
+ key = f"pdf_viewer_{ st .session_state .current_chat_id } "
524
+ )
525
+
526
+ with col2 :
527
+ st .subheader ("📝 Extracted Text" )
528
+ # Show extracted text in a scrollable container
529
+ st .text_area (
530
+ "Document content:" ,
531
+ value = chat ["document_text" ],
532
+ height = 600 ,
533
+ disabled = True ,
534
+ label_visibility = "collapsed"
535
+ )
282
536
283
537
# Display chat messages
284
538
for message in chat .get ("messages" , []):
@@ -315,8 +569,6 @@ def render_chat_interface(chat_manager):
315
569
st .warning ("⚠️ **Chat Disabled**: No valid document content available. Please upload a PDF document with readable text to start chatting." )
316
570
st .info ("The document may have failed to process, or the extracted text may be empty. Try uploading a different PDF file." )
317
571
318
-
319
-
320
572
def generate_ai_response (prompt , document_text ):
321
573
"""Generate AI response using Ollama with reasoning support"""
322
574
@@ -516,13 +768,45 @@ def main():
516
768
# Model selection
517
769
available_models = ModelManager .get_available_models ()
518
770
if available_models :
771
+ previous_model = st .session_state .selected_model
519
772
st .session_state .selected_model = st .selectbox (
520
773
"Choose an Ollama model:" ,
521
774
available_models ,
522
775
index = 0 if not st .session_state .selected_model else
523
776
(available_models .index (st .session_state .selected_model )
524
- if st .session_state .selected_model in available_models else 0 )
777
+ if st .session_state .selected_model in available_models else 0 ),
778
+ key = "model_selector"
525
779
)
780
+
781
+ # Force rerun if model changed to ensure context check updates immediately
782
+ if previous_model != st .session_state .selected_model and previous_model is not None :
783
+ st .rerun ()
784
+
785
+ # Only show context warnings in main area for serious issues (>80% usage)
786
+ current_chat = chat_manager .get_current_chat ()
787
+ document_text = current_chat .get ("document_text" , "" )
788
+
789
+ if st .session_state .selected_model and document_text and document_text .strip ():
790
+ fits , context_info , error = ContextChecker .check_document_fits_context (
791
+ document_text , st .session_state .selected_model
792
+ )
793
+
794
+ if error :
795
+ st .markdown ("---" )
796
+ st .warning (f"⚠️ Could not check context compatibility: { error } " )
797
+ elif context_info :
798
+ usage_percent = context_info ['usage_percent' ]
799
+
800
+ # Only show warnings for serious issues (>80% usage)
801
+ if usage_percent > 100 :
802
+ st .markdown ("---" )
803
+ st .error (f"⚠️ **Document too large** - Uses { usage_percent :.0f} % of context window" )
804
+ excess_tokens = context_info ['total_estimated_tokens' ] - context_info ['context_length' ]
805
+ st .caption (f"Document exceeds limit by ~{ excess_tokens :,} tokens" )
806
+ elif usage_percent > 80 :
807
+ st .markdown ("---" )
808
+ st .warning (f"⚠️ **High context usage** - { usage_percent :.0f} % of { context_info ['context_length' ]:,} tokens" )
809
+ st .caption (f"~{ context_info ['available_tokens' ]:,} tokens remaining for conversation" )
526
810
else :
527
811
st .error ("No Ollama models found. Please ensure Ollama is running." )
528
812
return
0 commit comments