Skip to content

Commit 2feacd7

Browse files
committed
Refactor document upload and context checking in app.py
- Removed the clear upload button for problematic files to streamline the upload process. - Enhanced context checking feedback by improving warning messages for high context usage. - Updated the RAG system to use a new LLM model and added logic to delete existing collections before creating new ones. - Introduced new tests for AI response generation and context checking to ensure functionality and robustness.
1 parent 7087102 commit 2feacd7

File tree

6 files changed

+460
-690
lines changed

6 files changed

+460
-690
lines changed

app.py

Lines changed: 22 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -592,14 +592,6 @@ def render_document_upload(chat_manager):
592592
st.header("Upload Document")
593593
st.info("Upload a PDF document to start chatting")
594594

595-
# Clear upload button for problematic files
596-
col1, col2 = st.columns([3, 1])
597-
with col2:
598-
if st.button("🗑️ Clear Upload", help="Clear file upload state if stuck"):
599-
# Force clear the uploader by creating a new chat
600-
chat_manager.create_new_chat(clear_rag=True) # Clear RAG when clearing upload issues
601-
st.rerun()
602-
603595
# Use a unique key per chat to avoid file state conflicts
604596
uploader_key = f"uploader_{st.session_state.current_chat_id}"
605597
uploaded_file = st.file_uploader(
@@ -693,39 +685,22 @@ def render_document_upload(chat_manager):
693685
)
694686

695687
if error:
696-
st.warning(f"Could not check context compatibility: {error}")
688+
st.markdown("---")
689+
st.info(f"ℹ️ {error}")
690+
st.caption("Context checking requires model configuration that includes context window size.")
697691
elif context_info:
698692
usage_percent = context_info['usage_percent']
699693

700-
# Always show progress bar and basic info after upload
701-
st.markdown("---")
702-
st.markdown("**📊 Context Check:**")
703-
704-
# Show progress bar for context usage
705-
progress_value = min(usage_percent / 100, 1.0) # Cap at 100% for display
706-
st.progress(progress_value, text=f"Context Usage: {usage_percent:.1f}%")
707-
708-
# Show status with appropriate color and clear messaging
694+
# Only show warnings for serious issues (>80% usage)
709695
if usage_percent > 100:
696+
st.markdown("---")
710697
st.error(f"⚠️ **Document too large** - Uses {usage_percent:.0f}% of context window")
711698
excess_tokens = context_info['total_estimated_tokens'] - context_info['context_length']
712699
st.caption(f"Document exceeds limit by ~{excess_tokens:,} tokens")
713700
elif usage_percent > 80:
701+
st.markdown("---")
714702
st.warning(f"⚠️ **High context usage** - {usage_percent:.0f}% of {context_info['context_length']:,} tokens")
715703
st.caption(f"~{context_info['available_tokens']:,} tokens remaining for conversation")
716-
elif usage_percent > 50:
717-
st.info(f"ℹ️ **Moderate context usage** - {usage_percent:.0f}% of {context_info['context_length']:,} tokens")
718-
st.caption(f"~{context_info['available_tokens']:,} tokens remaining for conversation")
719-
else:
720-
st.success(f"✅ **Good fit** - Uses {usage_percent:.0f}% of {context_info['context_length']:,} tokens")
721-
st.caption(f"~{context_info['available_tokens']:,} tokens remaining for conversation")
722-
723-
# Show breakdown in expander
724-
with st.expander("📊 Token Breakdown", expanded=False):
725-
st.write(f"**System prompt:** ~{context_info['system_tokens']:,} tokens")
726-
st.write(f"**Response reserve:** ~{context_info['response_reserve']:,} tokens")
727-
st.write(f"**Total estimated:** ~{context_info['total_estimated_tokens']:,} tokens")
728-
st.write(f"**Context limit:** {context_info['context_length']:,} tokens")
729704
else:
730705
st.info("💡 Select a model to check context window compatibility")
731706

@@ -791,19 +766,23 @@ def render_chat_interface(chat_manager):
791766
chat["document_text"], st.session_state.selected_model
792767
)
793768

794-
if context_info:
769+
if error:
770+
st.markdown("---")
771+
st.info(f"ℹ️ {error}")
772+
st.caption("Context checking requires model configuration that includes context window size.")
773+
elif context_info:
795774
usage_percent = context_info['usage_percent']
796775

797-
# Show progress bar for context usage
798-
progress_value = min(usage_percent / 100, 1.0) # Cap at 100% for display
799-
st.progress(progress_value, text=f"Context Usage: {usage_percent:.1f}%")
800-
801-
# Show brief summary
802-
st.caption(f"~{context_info['system_tokens']:,} tokens / {context_info['context_length']:,} limit")
803-
804-
# Recommend RAG for large documents
805-
if usage_percent > 80:
806-
st.info("💡 Consider enabling RAG for better handling of this large document")
776+
# Only show warnings for serious issues (>80% usage)
777+
if usage_percent > 100:
778+
st.markdown("---")
779+
st.error(f"⚠️ **Document too large** - Uses {usage_percent:.0f}% of context window")
780+
excess_tokens = context_info['total_estimated_tokens'] - context_info['context_length']
781+
st.caption(f"Document exceeds limit by ~{excess_tokens:,} tokens")
782+
elif usage_percent > 80:
783+
st.markdown("---")
784+
st.warning(f"⚠️ **High context usage** - {usage_percent:.0f}% of {context_info['context_length']:,} tokens")
785+
st.caption(f"~{context_info['available_tokens']:,} tokens remaining for conversation")
807786

808787
# Show PDF and extracted text side by side
809788
if chat.get("document_content") and chat.get("document_text"):
@@ -1278,11 +1257,7 @@ def main():
12781257
document_text, st.session_state.selected_model
12791258
)
12801259

1281-
if error:
1282-
st.markdown("---")
1283-
st.info(f"ℹ️ {error}")
1284-
st.caption("Context checking requires model configuration that includes context window size.")
1285-
elif context_info:
1260+
if context_info:
12861261
usage_percent = context_info['usage_percent']
12871262

12881263
# Only show warnings for serious issues (>80% usage)

ragnarok/rag_system.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ def __init__(
4747
self,
4848
ollama_base_url: str = "http://localhost:11434",
4949
embedding_model: str = "nomic-embed-text",
50-
llm_model: str = "llama3.1:8b",
50+
llm_model: str = "olmo2:13b",
5151
chunk_size: int = 128,
5252
chunk_overlap: int = 25,
5353
similarity_threshold: float = 0.7,
@@ -190,6 +190,19 @@ def process_document(self, document_text: str, document_id: str) -> Dict[str, An
190190
# Create collection for this document
191191
collection_name = f"doc_{document_id}"
192192

193+
# Check if collection already exists and delete it
194+
try:
195+
existing_collections = self.chroma_client.list_collections()
196+
for existing_collection in existing_collections:
197+
collection_obj = existing_collection if hasattr(existing_collection, 'name') else existing_collection
198+
existing_name = collection_obj.name if hasattr(collection_obj, 'name') else str(collection_obj)
199+
if existing_name == collection_name:
200+
self.chroma_client.delete_collection(existing_name)
201+
logger.info(f"Deleted existing collection: {existing_name}")
202+
break
203+
except Exception as e:
204+
logger.warning(f"Could not check/delete existing collection {collection_name}: {e}")
205+
193206
collection = self.chroma_client.create_collection(
194207
name=collection_name,
195208
metadata={

tests/test_ai_response.py

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
"""
2+
Simple tests for AI response generation - focuses on core logic
3+
"""
4+
import pytest
5+
import sys
6+
import os
7+
8+
# Add parent directory to path
9+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
10+
11+
import app
12+
from app import create_system_prompt, ModelManager
13+
14+
# Known available models
15+
EMBEDDING_MODEL = "nomic-embed-text:latest"
16+
LLM_MODEL = "olmo2:13b"
17+
18+
19+
def is_ollama_available():
20+
"""Check if Ollama is available"""
21+
try:
22+
import ollama
23+
ollama.list()
24+
return True
25+
except:
26+
return False
27+
28+
29+
@pytest.mark.skipif(not is_ollama_available(), reason="Ollama not available")
30+
class TestModelManager:
31+
"""Test ModelManager with real Ollama connection"""
32+
33+
def test_get_available_models_real(self):
34+
"""Test getting real model list from Ollama"""
35+
models = ModelManager.get_available_models()
36+
37+
assert isinstance(models, list)
38+
assert len(models) > 0 # Should have at least one model
39+
40+
# Should contain our known models
41+
assert EMBEDDING_MODEL in models
42+
assert LLM_MODEL in models
43+
44+
def test_get_model_info_real(self):
45+
"""Test getting model info for known model"""
46+
info = ModelManager.get_model_info(LLM_MODEL)
47+
48+
# The function returns various types, just check it doesn't crash
49+
assert info is not None
50+
51+
def test_get_context_length_real(self):
52+
"""Test getting context length for known model"""
53+
context_length = ModelManager.get_context_length(LLM_MODEL)
54+
55+
# May return None for unknown models, just check it doesn't crash
56+
assert context_length is None or isinstance(context_length, int)
57+
58+
59+
class TestContextChecker:
60+
"""Test context checking functionality"""
61+
62+
def test_estimate_token_count(self):
63+
"""Test token count estimation"""
64+
test_text = "This is a test sentence with several words."
65+
66+
token_count = app.ContextChecker.estimate_token_count(test_text)
67+
68+
assert isinstance(token_count, int)
69+
assert token_count > 0
70+
assert token_count < len(test_text) # Should be less than character count
71+
72+
def test_check_document_fits_context_basic(self):
73+
"""Test document context checking basic functionality"""
74+
short_text = "Short text."
75+
76+
result = app.ContextChecker.check_document_fits_context(
77+
short_text, LLM_MODEL, "Test prompt"
78+
)
79+
80+
# Function may return tuple or dict depending on model support
81+
assert result is not None
82+
# Just verify it doesn't crash and returns something
83+
assert len(result) > 0
84+
85+
86+
class TestEnvironmentDetection:
87+
"""Test environment detection functions"""
88+
89+
def test_docker_detection(self):
90+
"""Test Docker environment detection"""
91+
result = app.is_running_in_docker()
92+
93+
assert isinstance(result, bool)
94+
95+
def test_ollama_url_configuration(self):
96+
"""Test Ollama URL is properly configured"""
97+
assert app.ollama_base_url.startswith('http')
98+
assert ':11434' in app.ollama_base_url

0 commit comments

Comments
 (0)