Add screenshots to README, update default RAG configuration, and enhance UI components for improved user experience.

clstaudt · clstaudt · commit 43a3e7efd441 · 2025-06-05T22:03:39.000+02:00
diff --git a/README.md b/README.md
@@ -17,6 +17,16 @@
 - ⚡ **Fast Processing** - Optimized document parsing and retrieval system
 - 🌐 **Easy Web Interface** - Simple Streamlit app, no technical knowledge required
 
+<div align="center">
+  <img src="assets/screenshot_01.png" alt="Ragadoc Main Interface" width="80%">
+  <p><em>Main chat interface with document upload and conversation</em></p>
+</div>
+
+<div align="center">
+  <img src="assets/screenshot_02.png" alt="Ragadoc Document Analysis" width="80%">
+  <p><em>Document analysis with citations and highlighted responses</em></p>
+</div>
+
 ## 🚀 Quick Start
 
 ### Model Selection Guide
@@ -158,9 +168,9 @@ The app automatically detects your installed Ollama models. Popular choices:
 ### Advanced Settings
 
 Configure in the sidebar:
-- **Chunk Size**: How much text to process at once (default: 512)
-- **Chunk Overlap**: Text overlap between chunks (default: 50)
-- **Top-K Results**: Number of relevant chunks to consider (default: 5)
+- **Chunk Size**: How much text to process at once
+- **Chunk Overlap**: Text overlap between chunks  
+- **Top-K Results**: Number of relevant chunks to consider
 
 
 
diff --git a/assets/screenshot_01.png b/assets/screenshot_01.png
diff --git a/assets/screenshot_02.png b/assets/screenshot_02.png
diff --git a/ragadoc/chat_manager.py b/ragadoc/chat_manager.py
@@ -11,6 +11,8 @@
 from dataclasses import dataclass, field
 from loguru import logger
 
+from .config import DEFAULT_RAG_CONFIG
+
 
 @dataclass
 class ChatMessage:
@@ -55,14 +57,7 @@ def __init__(self):
         """Initialize chat manager"""
         self.chats: Dict[str, ChatSession] = {}
         self.current_chat_id: Optional[str] = None
-        self._default_rag_config = {
-            "chunk_size": 256,
-            "chunk_overlap": 25,
-            "similarity_threshold": 0.7,
-            "top_k": 10,
-            "embedding_model": "nomic-embed-text",
-            "llm_model": None
-        }
+        self._default_rag_config = DEFAULT_RAG_CONFIG.copy()
     
     def create_new_chat(self, clear_rag_callback: Optional[callable] = None) -> str:
         """
diff --git a/ragadoc/config.py b/ragadoc/config.py
@@ -0,0 +1,32 @@
+"""
+Configuration constants for Ragadoc
+
+This module contains all default configuration values used throughout the application.
+"""
+
+# Default RAG Configuration
+DEFAULT_RAG_CONFIG = {
+    "chunk_size": 128,
+    "chunk_overlap": 64,
+    "similarity_threshold": 0.7,
+    "top_k": 10,
+    "embedding_model": "nomic-embed-text",
+    "llm_model": None
+}
+
+# RAG System Constructor Defaults
+DEFAULT_CHUNK_SIZE = DEFAULT_RAG_CONFIG["chunk_size"]
+DEFAULT_CHUNK_OVERLAP = DEFAULT_RAG_CONFIG["chunk_overlap"]
+DEFAULT_SIMILARITY_THRESHOLD = DEFAULT_RAG_CONFIG["similarity_threshold"]
+DEFAULT_TOP_K = DEFAULT_RAG_CONFIG["top_k"]
+DEFAULT_EMBEDDING_MODEL = DEFAULT_RAG_CONFIG["embedding_model"]
+
+# UI Slider Configuration
+CHUNK_SIZE_RANGE = (32, 1024)
+CHUNK_SIZE_STEP = 64
+CHUNK_OVERLAP_RANGE = (0, 200)
+CHUNK_OVERLAP_STEP = 10
+SIMILARITY_THRESHOLD_RANGE = (0.0, 1.0)
+SIMILARITY_THRESHOLD_STEP = 0.05
+TOP_K_RANGE = (1, 20)
+TOP_K_STEP = 1 
diff --git a/ragadoc/rag_system.py b/ragadoc/rag_system.py
@@ -37,6 +37,15 @@
 import chromadb
 from chromadb.config import Settings as ChromaSettings
 
+# Local imports
+from .config import (
+    DEFAULT_CHUNK_SIZE, 
+    DEFAULT_CHUNK_OVERLAP, 
+    DEFAULT_SIMILARITY_THRESHOLD, 
+    DEFAULT_TOP_K,
+    DEFAULT_EMBEDDING_MODEL
+)
+
 
 class RAGSystem:
     """
@@ -46,12 +55,12 @@ class RAGSystem:
     def __init__(
         self, 
         ollama_base_url: str = "http://localhost:11434",
-        embedding_model: str = "nomic-embed-text",
+        embedding_model: str = DEFAULT_EMBEDDING_MODEL,
         llm_model: str = "olmo2:13b",
-        chunk_size: int = 128,
-        chunk_overlap: int = 25,
-        similarity_threshold: float = 0.7,
-        top_k: int = 10
+        chunk_size: int = DEFAULT_CHUNK_SIZE,
+        chunk_overlap: int = DEFAULT_CHUNK_OVERLAP,
+        similarity_threshold: float = DEFAULT_SIMILARITY_THRESHOLD,
+        top_k: int = DEFAULT_TOP_K
     ):
         """
         Initialize the RAG system
diff --git a/ragadoc/ui_chat.py b/ragadoc/ui_chat.py
@@ -292,13 +292,10 @@ def render_chat_interface():
     
     # Show current document info
     if current_chat.document_name:
-        with st.expander("📄 Current Document", expanded=False):
-            st.write(f"**Document:** {current_chat.document_name}")
-            
-            # Show RAG processing status
+        with st.expander(f"📄 {current_chat.document_name}", expanded=False):
+            # Show RAG processing statistics
             if current_chat.rag_processed:
                 rag_stats = current_chat.rag_stats or {}
-                st.success("✅ Processed with RAG system")
                 col1, col2 = st.columns(2)
                 with col1:
                     st.metric("Chunks Created", rag_stats.get("total_chunks", 0))
diff --git a/ragadoc/ui_config.py b/ragadoc/ui_config.py
@@ -92,24 +92,20 @@ def add_logo_and_title():
     logo_path = Path(__file__).parent.parent / "assets" / "logo.png"
     
     if logo_path.exists():
-        # Create a centered header with logo and title
-        col1, col2, col3 = st.columns([1, 2, 1])
-        
-        with col2:
-            # Logo and title in the center column
-            st.markdown("""
-            <div class="logo-header">
-                <img src="data:image/png;base64,{}" alt="ragadoc logo">
-                <h1 style="margin: 0; font-size: 2.5rem;">ragadoc - AI Document Assistant</h1>
-                <p style="margin: 0.5rem 0 0 0; font-style: italic; color: #b3b3b3;">
-                    Ask questions about your documents - get grounded answers with citations and highlights.
-                </p>
-            </div>
-            """.format(get_base64_logo(logo_path)), unsafe_allow_html=True)
+        # Compact header with logo and title using proper CSS classes for styling
+        st.markdown("""
+        <div class="logo-header" style="margin-bottom: 1rem;">
+            <img src="data:image/png;base64,{}" alt="ragadoc logo" style="width: 70px; height: auto;">
+            <h1 style="margin: 0.5rem 0 0 0; font-size: 2rem;">ragadoc - AI Document Assistant</h1>
+            <p style="margin: 0.3rem 0 0 0; font-style: italic; color: #b3b3b3; font-size: 0.9rem;">
+                Ask questions about your documents - get grounded answers with citations and highlights.
+            </p>
+        </div>
+        """.format(get_base64_logo(logo_path)), unsafe_allow_html=True)
     else:
-        # Fallback to text-only title if logo not found
-        st.title("ragadoc - AI Document Assistant")
-        st.markdown("*Ask questions about your documents - get grounded answers with citations and highlights*", unsafe_allow_html=True)
+        # Fallback to text-only title with proper gradient styling
+        st.markdown("<h1 style='font-size: 2rem; margin: 0 0 0.5rem 0; text-align: center;'>ragadoc - AI Document Assistant</h1>", unsafe_allow_html=True)
+        st.markdown("<p style='margin: 0 0 1rem 0; font-style: italic; color: #b3b3b3; font-size: 0.9rem; text-align: center;'>Ask questions about your documents - get grounded answers with citations and highlights.</p>", unsafe_allow_html=True)
 
 
 def get_base64_logo(logo_path):
diff --git a/ragadoc/ui_session.py b/ragadoc/ui_session.py
@@ -13,6 +13,7 @@
 from .llm_interface import LLMInterface
 from .rag_system import create_rag_system
 from .ui_config import get_ollama_base_url, is_running_in_docker
+from .config import DEFAULT_RAG_CONFIG
 
 
 def init_session_state():
@@ -46,14 +47,7 @@ def init_session_state():
     
     # RAG configuration
     if "rag_config" not in st.session_state:
-        st.session_state.rag_config = {
-            "chunk_size": 256,
-            "chunk_overlap": 25,
-            "similarity_threshold": 0.7,
-            "top_k": 10,
-            "embedding_model": "nomic-embed-text",
-            "llm_model": None
-        }
+        st.session_state.rag_config = DEFAULT_RAG_CONFIG.copy()
     
     # Initialize RAG system
     if "rag_system" not in st.session_state:
diff --git a/ragadoc/ui_sidebar.py b/ragadoc/ui_sidebar.py
@@ -10,6 +10,12 @@
 
 from .ui_config import is_running_in_docker, get_ollama_base_url
 from .ui_session import init_rag_system
+from .config import (
+    CHUNK_SIZE_RANGE, CHUNK_SIZE_STEP,
+    CHUNK_OVERLAP_RANGE, CHUNK_OVERLAP_STEP,
+    SIMILARITY_THRESHOLD_RANGE, SIMILARITY_THRESHOLD_STEP,
+    TOP_K_RANGE, TOP_K_STEP
+)
 
 
 def render_sidebar():
@@ -80,10 +86,10 @@ def render_sidebar():
         if expert_mode:
             with st.expander("🔍 RAG Settings", expanded=False):
                 # RAG parameters (excluding embedding model which is now global)
-                chunk_size = st.slider("Chunk Size (tokens)", 32, 1024, st.session_state.rag_config["chunk_size"], 64)
-                chunk_overlap = st.slider("Chunk Overlap (tokens)", 0, 200, st.session_state.rag_config["chunk_overlap"], 10)
-                similarity_threshold = st.slider("Similarity Threshold", 0.0, 1.0, st.session_state.rag_config["similarity_threshold"], 0.05)
-                top_k = st.slider("Max Retrieved Chunks", 1, 20, st.session_state.rag_config["top_k"], 1)
+                chunk_size = st.slider("Chunk Size (tokens)", CHUNK_SIZE_RANGE[0], CHUNK_SIZE_RANGE[1], st.session_state.rag_config["chunk_size"], CHUNK_SIZE_STEP)
+                chunk_overlap = st.slider("Chunk Overlap (tokens)", CHUNK_OVERLAP_RANGE[0], CHUNK_OVERLAP_RANGE[1], st.session_state.rag_config["chunk_overlap"], CHUNK_OVERLAP_STEP)
+                similarity_threshold = st.slider("Similarity Threshold", SIMILARITY_THRESHOLD_RANGE[0], SIMILARITY_THRESHOLD_RANGE[1], st.session_state.rag_config["similarity_threshold"], SIMILARITY_THRESHOLD_STEP)
+                top_k = st.slider("Max Retrieved Chunks", TOP_K_RANGE[0], TOP_K_RANGE[1], st.session_state.rag_config["top_k"], TOP_K_STEP)
                 
                 # Update configuration if changed (excluding embedding model)
                 new_config = {
diff --git a/tests/test_rag_system.py b/tests/test_rag_system.py
@@ -13,6 +13,7 @@
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
 from ragadoc import RAGSystem, create_rag_system
+from ragadoc.config import DEFAULT_CHUNK_SIZE, DEFAULT_CHUNK_OVERLAP
 
 # Known available models
 EMBEDDING_MODEL = "nomic-embed-text:latest"
@@ -46,8 +47,8 @@ def clean_rag_system(temp_rag_dir):
     rag = RAGSystem(
         embedding_model=EMBEDDING_MODEL,
         llm_model=LLM_MODEL,
-        chunk_size=64,
-        chunk_overlap=16,
+        chunk_size=DEFAULT_CHUNK_SIZE,
+        chunk_overlap=DEFAULT_CHUNK_OVERLAP,
         top_k=2
     )
     
@@ -78,7 +79,7 @@ def test_rag_initialization(self, clean_rag_system):
         
         assert rag.embedding_model == EMBEDDING_MODEL
         assert rag.llm_model == LLM_MODEL
-        assert rag.chunk_size == 64
+        assert rag.chunk_size == DEFAULT_CHUNK_SIZE
         assert rag.current_document_id is None
     
     def test_document_processing_minimal(self, clean_rag_system):
@@ -179,8 +180,8 @@ def test_create_rag_system(self, temp_rag_dir):
         rag = create_rag_system(
             embedding_model=EMBEDDING_MODEL,
             llm_model=LLM_MODEL,
-            chunk_size=64,
-            chunk_overlap=16
+            chunk_size=DEFAULT_CHUNK_SIZE,
+            chunk_overlap=DEFAULT_CHUNK_OVERLAP
         )
         
         # Override storage directory
@@ -189,7 +190,7 @@ def test_create_rag_system(self, temp_rag_dir):
         rag._setup_chroma_client()
         
         assert isinstance(rag, RAGSystem)
-        assert rag.chunk_size == 64
+        assert rag.chunk_size == DEFAULT_CHUNK_SIZE
         
         # Quick functionality test
         doc_id = f"factory_test_{uuid.uuid4().hex[:8]}"