Skip to content

Commit a0bfbe5

Browse files
committed
context length display, extracted text view
1 parent 665c7a9 commit a0bfbe5

File tree

4 files changed

+476
-12
lines changed

4 files changed

+476
-12
lines changed

app.py

Lines changed: 295 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,192 @@ def get_available_models():
150150
error_msg += "- Direct execution: `ollama serve`"
151151
st.error(error_msg)
152152
return []
153+
154+
@staticmethod
155+
def get_model_info(model_name):
156+
"""Get detailed model information including context length"""
157+
try:
158+
if in_docker:
159+
# Docker - use explicit client configuration
160+
client = ollama.Client(host=ollama_base_url)
161+
model_info = client.show(model_name)
162+
else:
163+
# Direct execution - use default ollama client
164+
model_info = ollama.show(model_name)
165+
166+
return model_info
167+
except Exception as e:
168+
logger.warning(f"Could not get model info for {model_name}: {e}")
169+
return None
170+
171+
@staticmethod
172+
def get_context_length(model_name):
173+
"""Get the context length for a specific model"""
174+
model_info = ModelManager.get_model_info(model_name)
175+
176+
# Try to get context length from model parameters first
177+
if model_info:
178+
try:
179+
# Check in parameters first
180+
if 'parameters' in model_info:
181+
params = model_info['parameters']
182+
if 'num_ctx' in params:
183+
return int(params['num_ctx'])
184+
185+
# Check in model details/template
186+
if 'details' in model_info:
187+
details = model_info['details']
188+
if 'parameter_size' in details:
189+
# Some models have context info in details
190+
pass
191+
192+
except Exception as e:
193+
logger.warning(f"Error parsing model info for {model_name}: {e}")
194+
195+
# Fallback to default context lengths for common model families
196+
try:
197+
model_lower = model_name.lower()
198+
if 'llama3.1' in model_lower or 'llama-3.1' in model_lower:
199+
return 131072 # 128k context
200+
elif 'llama3' in model_lower or 'llama-3' in model_lower:
201+
return 8192 # 8k context
202+
elif 'llama2' in model_lower or 'llama-2' in model_lower:
203+
return 4096 # 4k context
204+
elif 'mistral' in model_lower:
205+
return 32768 # 32k context for most Mistral models
206+
elif 'codellama' in model_lower:
207+
return 16384 # 16k context
208+
elif 'deepseek' in model_lower:
209+
return 32768 # 32k context
210+
elif 'qwen' in model_lower:
211+
return 32768 # 32k context
212+
else:
213+
# Default fallback
214+
return 2048
215+
216+
except Exception as e:
217+
logger.warning(f"Error parsing context length for {model_name}: {e}")
218+
return 2048 # Conservative default
219+
220+
class ContextChecker:
221+
"""Utility class for checking context window compatibility"""
222+
223+
@staticmethod
224+
def estimate_token_count(text):
225+
"""Estimate token count using multiple methods for better accuracy"""
226+
if not text:
227+
return 0
228+
229+
# Try to use tiktoken for more accurate counting (if available)
230+
try:
231+
import tiktoken
232+
# Use cl100k_base encoding (used by GPT-4, similar to most modern LLMs)
233+
encoding = tiktoken.get_encoding("cl100k_base")
234+
return len(encoding.encode(text))
235+
except ImportError:
236+
pass
237+
238+
# Fallback: Multiple estimation methods
239+
char_count = len(text)
240+
word_count = len(text.split())
241+
242+
# Different estimation approaches
243+
char_based = char_count // 4 # ~4 chars per token (conservative)
244+
word_based = int(word_count * 1.3) # ~1.3 tokens per word (average)
245+
246+
# Use the higher estimate to be conservative
247+
return max(char_based, word_based)
248+
249+
@staticmethod
250+
def check_document_fits_context(document_text, model_name, system_prompt_length=2000):
251+
"""Check if document + system prompt fits in model's context window"""
252+
if not document_text or not model_name:
253+
return True, None, None
254+
255+
context_length = ModelManager.get_context_length(model_name)
256+
if context_length is None:
257+
return True, None, "Could not determine model context length"
258+
259+
doc_tokens = ContextChecker.estimate_token_count(document_text)
260+
total_tokens = doc_tokens + system_prompt_length # Reserve space for system prompt and user query
261+
262+
fits = total_tokens <= context_length
263+
usage_percent = (total_tokens / context_length) * 100
264+
265+
return fits, {
266+
'context_length': context_length,
267+
'document_tokens': doc_tokens,
268+
'total_estimated_tokens': total_tokens,
269+
'usage_percent': usage_percent,
270+
'available_tokens': context_length - total_tokens
271+
}, None
272+
273+
@staticmethod
274+
def display_context_warning(context_info, model_name):
275+
"""Display context window usage information and warnings"""
276+
if not context_info:
277+
return
278+
279+
usage_percent = context_info['usage_percent']
280+
281+
if usage_percent > 100:
282+
st.error("⚠️ **Document Too Large for Context Window**")
283+
st.error(f"""
284+
**Model:** {model_name}
285+
**Context Limit:** {context_info['context_length']:,} tokens
286+
**Document Size:** ~{context_info['document_tokens']:,} tokens
287+
**Usage:** {usage_percent:.1f}% (exceeds limit by {context_info['total_estimated_tokens'] - context_info['context_length']:,} tokens)
288+
289+
**The document is too large for this model's context window.**
290+
""")
291+
292+
with st.expander("💡 Solutions for Large Documents", expanded=True):
293+
st.markdown(f"""
294+
**Option 1: Use a model with larger context window**
295+
- Switch to a model like `llama3.1:8b` (128k context) or `mistral:latest` (32k context)
296+
297+
**Option 2: Create a custom model with larger context**
298+
```bash
299+
# Create a Modelfile
300+
echo "FROM {model_name}
301+
PARAMETER num_ctx 32768" > Modelfile
302+
303+
# Create custom model
304+
ollama create {model_name.split(':')[0]}-large -f Modelfile
305+
```
306+
307+
**Option 3: Document chunking (future feature)**
308+
- Break document into smaller chunks
309+
- Process each chunk separately
310+
""")
311+
312+
elif usage_percent > 80:
313+
st.warning("⚠️ **High Context Usage**")
314+
st.warning(f"""
315+
**Model:** {model_name}
316+
**Context Limit:** {context_info['context_length']:,} tokens
317+
**Document Size:** ~{context_info['document_tokens']:,} tokens
318+
**Usage:** {usage_percent:.1f}% of context window
319+
**Available:** ~{context_info['available_tokens']:,} tokens for conversation
320+
321+
**The document uses most of the context window. Long conversations may be truncated.**
322+
""")
323+
324+
elif usage_percent > 50:
325+
st.info("ℹ️ **Moderate Context Usage**")
326+
st.info(f"""
327+
**Model:** {model_name}
328+
**Context Usage:** {usage_percent:.1f}% ({context_info['document_tokens']:,} / {context_info['context_length']:,} tokens)
329+
**Available:** ~{context_info['available_tokens']:,} tokens for conversation
330+
""")
331+
332+
else:
333+
st.success("✅ **Document fits comfortably in context window**")
334+
st.success(f"""
335+
**Model:** {model_name}
336+
**Context Usage:** {usage_percent:.1f}% ({context_info['document_tokens']:,} / {context_info['context_length']:,} tokens)
337+
**Available:** ~{context_info['available_tokens']:,} tokens for conversation
338+
""")
153339

154340
def render_sidebar(chat_manager):
155341
"""Render the sidebar with chat history"""
@@ -230,6 +416,43 @@ def render_document_upload(chat_manager):
230416

231417
st.success(f"Document '{uploaded_file.name}' processed successfully!")
232418
st.info(f"Extracted {len(extracted_text.split()):,} words")
419+
420+
# Check context window compatibility
421+
if st.session_state.selected_model:
422+
fits, context_info, error = ContextChecker.check_document_fits_context(
423+
extracted_text, st.session_state.selected_model
424+
)
425+
426+
if error:
427+
st.warning(f"Could not check context compatibility: {error}")
428+
elif context_info:
429+
usage_percent = context_info['usage_percent']
430+
431+
# Always show progress bar and basic info after upload
432+
st.markdown("---")
433+
st.markdown("**📊 Context Check:**")
434+
435+
# Show progress bar for context usage
436+
progress_value = min(usage_percent / 100, 1.0) # Cap at 100% for display
437+
st.progress(progress_value, text=f"Context Usage: {usage_percent:.1f}%")
438+
439+
# Show status with appropriate color and clear messaging
440+
if usage_percent > 100:
441+
st.error(f"⚠️ **Document too large** - Uses {usage_percent:.0f}% of context window")
442+
excess_tokens = context_info['total_estimated_tokens'] - context_info['context_length']
443+
st.caption(f"Document exceeds limit by ~{excess_tokens:,} tokens")
444+
elif usage_percent > 80:
445+
st.warning(f"⚠️ **High context usage** - {usage_percent:.0f}% of {context_info['context_length']:,} tokens")
446+
st.caption(f"~{context_info['available_tokens']:,} tokens remaining for conversation")
447+
elif usage_percent > 50:
448+
st.info(f"ℹ️ **Moderate context usage** - {usage_percent:.0f}% of {context_info['context_length']:,} tokens")
449+
st.caption(f"~{context_info['available_tokens']:,} tokens remaining for conversation")
450+
else:
451+
st.success(f"✅ **Good fit** - Uses {usage_percent:.0f}% of {context_info['context_length']:,} tokens")
452+
st.caption(f"~{context_info['available_tokens']:,} tokens remaining for conversation")
453+
else:
454+
st.info("💡 Select a model to check context window compatibility")
455+
233456
st.rerun()
234457
else:
235458
st.error("❌ **Document Processing Failed**")
@@ -270,15 +493,46 @@ def render_chat_interface(chat_manager):
270493
with st.expander("📄 Current Document", expanded=False):
271494
st.write(f"**Document:** {chat['document_name']}")
272495

273-
# Show PDF viewer
274-
if chat.get("document_content"):
275-
pdf_viewer(
276-
input=chat["document_content"],
277-
width="100%",
278-
height=600,
279-
render_text=True,
280-
key=f"pdf_viewer_{st.session_state.current_chat_id}"
496+
# Show context compatibility info with progress bar
497+
if st.session_state.selected_model and chat.get("document_text"):
498+
fits, context_info, error = ContextChecker.check_document_fits_context(
499+
chat["document_text"], st.session_state.selected_model
281500
)
501+
502+
if context_info:
503+
usage_percent = context_info['usage_percent']
504+
505+
# Show progress bar for context usage
506+
progress_value = min(usage_percent / 100, 1.0) # Cap at 100% for display
507+
st.progress(progress_value, text=f"Context Usage: {usage_percent:.1f}%")
508+
509+
# Show brief summary
510+
st.caption(f"~{context_info['document_tokens']:,} tokens / {context_info['context_length']:,} limit")
511+
512+
# Show PDF and extracted text side by side
513+
if chat.get("document_content") and chat.get("document_text"):
514+
col1, col2 = st.columns([1, 1])
515+
516+
with col1:
517+
st.subheader("📄 PDF Document")
518+
pdf_viewer(
519+
input=chat["document_content"],
520+
width="100%",
521+
height=600,
522+
render_text=True,
523+
key=f"pdf_viewer_{st.session_state.current_chat_id}"
524+
)
525+
526+
with col2:
527+
st.subheader("📝 Extracted Text")
528+
# Show extracted text in a scrollable container
529+
st.text_area(
530+
"Document content:",
531+
value=chat["document_text"],
532+
height=600,
533+
disabled=True,
534+
label_visibility="collapsed"
535+
)
282536

283537
# Display chat messages
284538
for message in chat.get("messages", []):
@@ -315,8 +569,6 @@ def render_chat_interface(chat_manager):
315569
st.warning("⚠️ **Chat Disabled**: No valid document content available. Please upload a PDF document with readable text to start chatting.")
316570
st.info("The document may have failed to process, or the extracted text may be empty. Try uploading a different PDF file.")
317571

318-
319-
320572
def generate_ai_response(prompt, document_text):
321573
"""Generate AI response using Ollama with reasoning support"""
322574

@@ -516,13 +768,45 @@ def main():
516768
# Model selection
517769
available_models = ModelManager.get_available_models()
518770
if available_models:
771+
previous_model = st.session_state.selected_model
519772
st.session_state.selected_model = st.selectbox(
520773
"Choose an Ollama model:",
521774
available_models,
522775
index=0 if not st.session_state.selected_model else
523776
(available_models.index(st.session_state.selected_model)
524-
if st.session_state.selected_model in available_models else 0)
777+
if st.session_state.selected_model in available_models else 0),
778+
key="model_selector"
525779
)
780+
781+
# Force rerun if model changed to ensure context check updates immediately
782+
if previous_model != st.session_state.selected_model and previous_model is not None:
783+
st.rerun()
784+
785+
# Only show context warnings in main area for serious issues (>80% usage)
786+
current_chat = chat_manager.get_current_chat()
787+
document_text = current_chat.get("document_text", "")
788+
789+
if st.session_state.selected_model and document_text and document_text.strip():
790+
fits, context_info, error = ContextChecker.check_document_fits_context(
791+
document_text, st.session_state.selected_model
792+
)
793+
794+
if error:
795+
st.markdown("---")
796+
st.warning(f"⚠️ Could not check context compatibility: {error}")
797+
elif context_info:
798+
usage_percent = context_info['usage_percent']
799+
800+
# Only show warnings for serious issues (>80% usage)
801+
if usage_percent > 100:
802+
st.markdown("---")
803+
st.error(f"⚠️ **Document too large** - Uses {usage_percent:.0f}% of context window")
804+
excess_tokens = context_info['total_estimated_tokens'] - context_info['context_length']
805+
st.caption(f"Document exceeds limit by ~{excess_tokens:,} tokens")
806+
elif usage_percent > 80:
807+
st.markdown("---")
808+
st.warning(f"⚠️ **High context usage** - {usage_percent:.0f}% of {context_info['context_length']:,} tokens")
809+
st.caption(f"~{context_info['available_tokens']:,} tokens remaining for conversation")
526810
else:
527811
st.error("No Ollama models found. Please ensure Ollama is running.")
528812
return

environment.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,4 +17,5 @@ dependencies:
1717
- streamlit-pdf-viewer
1818
- PyMuPDF
1919
- loguru
20+
- tiktoken
2021
- -e .

requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,5 @@ PyMuPDF
77
pytest
88
pytest-cov
99
pillow
10-
loguru
10+
loguru
11+
tiktoken

0 commit comments

Comments
 (0)