clstaudt
diff --git a/‎app.py
Lines changed: 22 additions & 47 deletions b/‎app.py
Lines changed: 22 additions & 47 deletions
diff --git a/‎ragnarok/rag_system.py
Lines changed: 14 additions & 1 deletion b/‎ragnarok/rag_system.py
Lines changed: 14 additions & 1 deletion
diff --git a/‎tests/test_ai_response.py
Lines changed: 98 additions & 0 deletions b/‎tests/test_ai_response.py
Lines changed: 98 additions & 0 deletions
@@ -592,14 +592,6 @@ def render_document_upload(chat_manager):
     st.header("Upload Document")
     st.info("Upload a PDF document to start chatting")
 
-    # Clear upload button for problematic files
-    col1, col2 = st.columns([3, 1])
-    with col2:
-        if st.button("🗑️ Clear Upload", help="Clear file upload state if stuck"):
-            # Force clear the uploader by creating a new chat
-            chat_manager.create_new_chat(clear_rag=True)  # Clear RAG when clearing upload issues
-            st.rerun()
-    
     # Use a unique key per chat to avoid file state conflicts
     uploader_key = f"uploader_{st.session_state.current_chat_id}"
     uploaded_file = st.file_uploader(
@@ -693,39 +685,22 @@ def render_document_upload(chat_manager):
                     )
 
                     if error:
-                        st.warning(f"Could not check context compatibility: {error}")
+                        st.markdown("---")
+                        st.info(f"ℹ️ {error}")
+                        st.caption("Context checking requires model configuration that includes context window size.")
                     elif context_info:
                         usage_percent = context_info['usage_percent']
 
-                        # Always show progress bar and basic info after upload
-                        st.markdown("---")
-                        st.markdown("**📊 Context Check:**")
-                        
-                        # Show progress bar for context usage
-                        progress_value = min(usage_percent / 100, 1.0)  # Cap at 100% for display
-                        st.progress(progress_value, text=f"Context Usage: {usage_percent:.1f}%")
-                        
-                        # Show status with appropriate color and clear messaging
+                        # Only show warnings for serious issues (>80% usage)
                         if usage_percent > 100:
+                            st.markdown("---")
                             st.error(f"⚠️ **Document too large** - Uses {usage_percent:.0f}% of context window")
                             excess_tokens = context_info['total_estimated_tokens'] - context_info['context_length']
                             st.caption(f"Document exceeds limit by ~{excess_tokens:,} tokens")
                         elif usage_percent > 80:
+                            st.markdown("---")
                             st.warning(f"⚠️ **High context usage** - {usage_percent:.0f}% of {context_info['context_length']:,} tokens")
                             st.caption(f"~{context_info['available_tokens']:,} tokens remaining for conversation")
-                        elif usage_percent > 50:
-                            st.info(f"ℹ️ **Moderate context usage** - {usage_percent:.0f}% of {context_info['context_length']:,} tokens")
-                            st.caption(f"~{context_info['available_tokens']:,} tokens remaining for conversation")
-                        else:
-                            st.success(f"✅ **Good fit** - Uses {usage_percent:.0f}% of {context_info['context_length']:,} tokens")
-                            st.caption(f"~{context_info['available_tokens']:,} tokens remaining for conversation")
-                        
-                        # Show breakdown in expander
-                        with st.expander("📊 Token Breakdown", expanded=False):
-                            st.write(f"**System prompt:** ~{context_info['system_tokens']:,} tokens")
-                            st.write(f"**Response reserve:** ~{context_info['response_reserve']:,} tokens")
-                            st.write(f"**Total estimated:** ~{context_info['total_estimated_tokens']:,} tokens")
-                            st.write(f"**Context limit:** {context_info['context_length']:,} tokens")
                 else:
                     st.info("💡 Select a model to check context window compatibility")
 
@@ -791,19 +766,23 @@ def render_chat_interface(chat_manager):
                     chat["document_text"], st.session_state.selected_model
                 )
 
-                if context_info:
+                if error:
+                    st.markdown("---")
+                    st.info(f"ℹ️ {error}")
+                    st.caption("Context checking requires model configuration that includes context window size.")
+                elif context_info:
                     usage_percent = context_info['usage_percent']
 
-                    # Show progress bar for context usage
-                    progress_value = min(usage_percent / 100, 1.0)  # Cap at 100% for display
-                    st.progress(progress_value, text=f"Context Usage: {usage_percent:.1f}%")
-                    
-                    # Show brief summary
-                    st.caption(f"~{context_info['system_tokens']:,} tokens / {context_info['context_length']:,} limit")
-                    
-                    # Recommend RAG for large documents
-                    if usage_percent > 80:
-                        st.info("💡 Consider enabling RAG for better handling of this large document")
+                    # Only show warnings for serious issues (>80% usage)
+                    if usage_percent > 100:
+                        st.markdown("---")
+                        st.error(f"⚠️ **Document too large** - Uses {usage_percent:.0f}% of context window")
+                        excess_tokens = context_info['total_estimated_tokens'] - context_info['context_length']
+                        st.caption(f"Document exceeds limit by ~{excess_tokens:,} tokens")
+                    elif usage_percent > 80:
+                        st.markdown("---")
+                        st.warning(f"⚠️ **High context usage** - {usage_percent:.0f}% of {context_info['context_length']:,} tokens")
+                        st.caption(f"~{context_info['available_tokens']:,} tokens remaining for conversation")
 
             # Show PDF and extracted text side by side
             if chat.get("document_content") and chat.get("document_text"):
@@ -1278,11 +1257,7 @@ def main():
                 document_text, st.session_state.selected_model
             )
 
-            if error:
-                st.markdown("---")
-                st.info(f"ℹ️ {error}")
-                st.caption("Context checking requires model configuration that includes context window size.")
-            elif context_info:
+            if context_info:
                 usage_percent = context_info['usage_percent']
 
                 # Only show warnings for serious issues (>80% usage)
 
@@ -47,7 +47,7 @@ def __init__(
         self, 
         ollama_base_url: str = "http://localhost:11434",
         embedding_model: str = "nomic-embed-text",
-        llm_model: str = "llama3.1:8b",
+        llm_model: str = "olmo2:13b",
         chunk_size: int = 128,
         chunk_overlap: int = 25,
         similarity_threshold: float = 0.7,
@@ -190,6 +190,19 @@ def process_document(self, document_text: str, document_id: str) -> Dict[str, An
             # Create collection for this document
             collection_name = f"doc_{document_id}"
 
+            # Check if collection already exists and delete it
+            try:
+                existing_collections = self.chroma_client.list_collections()
+                for existing_collection in existing_collections:
+                    collection_obj = existing_collection if hasattr(existing_collection, 'name') else existing_collection
+                    existing_name = collection_obj.name if hasattr(collection_obj, 'name') else str(collection_obj)
+                    if existing_name == collection_name:
+                        self.chroma_client.delete_collection(existing_name)
+                        logger.info(f"Deleted existing collection: {existing_name}")
+                        break
+            except Exception as e:
+                logger.warning(f"Could not check/delete existing collection {collection_name}: {e}")
+            
             collection = self.chroma_client.create_collection(
                 name=collection_name,
                 metadata={
 
@@ -0,0 +1,98 @@
+"""
+Simple tests for AI response generation - focuses on core logic
+"""
+import pytest
+import sys
+import os
+
+# Add parent directory to path
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import app
+from app import create_system_prompt, ModelManager
+
+# Known available models
+EMBEDDING_MODEL = "nomic-embed-text:latest"
+LLM_MODEL = "olmo2:13b"
+
+
+def is_ollama_available():
+    """Check if Ollama is available"""
+    try:
+        import ollama
+        ollama.list()
+        return True
+    except:
+        return False
+
+
+@pytest.mark.skipif(not is_ollama_available(), reason="Ollama not available")
+class TestModelManager:
+    """Test ModelManager with real Ollama connection"""
+    
+    def test_get_available_models_real(self):
+        """Test getting real model list from Ollama"""
+        models = ModelManager.get_available_models()
+        
+        assert isinstance(models, list)
+        assert len(models) > 0  # Should have at least one model
+        
+        # Should contain our known models
+        assert EMBEDDING_MODEL in models
+        assert LLM_MODEL in models
+    
+    def test_get_model_info_real(self):
+        """Test getting model info for known model"""
+        info = ModelManager.get_model_info(LLM_MODEL)
+        
+        # The function returns various types, just check it doesn't crash
+        assert info is not None
+    
+    def test_get_context_length_real(self):
+        """Test getting context length for known model"""
+        context_length = ModelManager.get_context_length(LLM_MODEL)
+        
+        # May return None for unknown models, just check it doesn't crash
+        assert context_length is None or isinstance(context_length, int)
+
+
+class TestContextChecker:
+    """Test context checking functionality"""
+    
+    def test_estimate_token_count(self):
+        """Test token count estimation"""
+        test_text = "This is a test sentence with several words."
+        
+        token_count = app.ContextChecker.estimate_token_count(test_text)
+        
+        assert isinstance(token_count, int)
+        assert token_count > 0
+        assert token_count < len(test_text)  # Should be less than character count
+    
+    def test_check_document_fits_context_basic(self):
+        """Test document context checking basic functionality"""
+        short_text = "Short text."
+        
+        result = app.ContextChecker.check_document_fits_context(
+            short_text, LLM_MODEL, "Test prompt"
+        )
+        
+        # Function may return tuple or dict depending on model support
+        assert result is not None
+        # Just verify it doesn't crash and returns something
+        assert len(result) > 0
+
+
+class TestEnvironmentDetection:
+    """Test environment detection functions"""
+    
+    def test_docker_detection(self):
+        """Test Docker environment detection"""
+        result = app.is_running_in_docker()
+        
+        assert isinstance(result, bool)
+    
+    def test_ollama_url_configuration(self):
+        """Test Ollama URL is properly configured"""
+        assert app.ollama_base_url.startswith('http')
+        assert ':11434' in app.ollama_base_url