fix(lib): corregge context size Ollama e credentials fetch per autenticazione

strawberry-code · strawberry-code · commit e8c05eb4b2ac · 2025-12-02T21:59:08.000+01:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,10 @@ and the project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.
 
 ## [Unreleased]
 
+### Fixed
+- Frontend API calls now include credentials for proper session authentication
+- Ollama context size reduced from 8192 to 2048 to match nomic-embed-text model limit
+
 ## [1.1.2] - 2025-12-02
 
 ### Changed
diff --git a/frontend/static/app.js b/frontend/static/app.js
@@ -64,7 +64,7 @@ function ragifyApp() {
         // Collections
         async loadCollections() {
             try {
-                const res = await fetch('/api/collections');
+                const res = await fetch('/api/collections', { credentials: 'include' });
                 const data = await res.json();
                 console.log('API response:', data);
                 this.collections = data.collections || [];
@@ -99,7 +99,8 @@ function ragifyApp() {
                 const res = await fetch('/api/collections', {
                     method: 'POST',
                     headers: { 'Content-Type': 'application/json' },
-                    body: JSON.stringify({ name: this.newCollectionName })
+                    body: JSON.stringify({ name: this.newCollectionName }),
+                    credentials: 'include'
                 });
 
                 if (res.ok) {
@@ -120,7 +121,7 @@ function ragifyApp() {
             if (!confirm(`Delete collection "${name}"? This cannot be undone.`)) return;
 
             try {
-                const res = await fetch(`/api/collections/${name}`, { method: 'DELETE' });
+                const res = await fetch(`/api/collections/${name}`, { method: 'DELETE', credentials: 'include' });
                 if (res.ok) {
                     this.showToast('Collection deleted', 'success');
                     await this.loadCollections();
@@ -146,7 +147,7 @@ function ragifyApp() {
         // Jobs
         async loadJobs() {
             try {
-                const res = await fetch('/api/jobs?limit=10');
+                const res = await fetch('/api/jobs?limit=10', { credentials: 'include' });
                 const data = await res.json();
                 this.jobs = data.jobs || [];
             } catch (e) {
@@ -189,7 +190,8 @@ function ragifyApp() {
 
                     const res = await fetch('/api/upload', {
                         method: 'POST',
-                        body: formData
+                        body: formData,
+                        credentials: 'include'
                     });
 
                     if (res.ok) {
@@ -222,7 +224,8 @@ function ragifyApp() {
                         query: this.searchQuery,
                         collection: this.searchCollection,
                         limit: 10
-                    })
+                    }),
+                    credentials: 'include'
                 });
 
                 if (res.ok) {
diff --git a/lib/chunking.py b/lib/chunking.py
@@ -212,13 +212,13 @@ def _fallback_chunk(
     return final_chunks
 
 
-def validate_chunk_size(chunk_text: str, max_tokens: int = 8192) -> bool:
+def validate_chunk_size(chunk_text: str, max_tokens: int = 2048) -> bool:
     """
     Validate that chunk doesn't exceed embedding model's token limit.
-    
+
     Args:
         chunk_text: Chunk text to validate
-        max_tokens: Maximum allowed tokens (nomic-embed-text: 8192)
+        max_tokens: Maximum allowed tokens (nomic-embed-text: 2048)
         
     Returns:
         True if chunk is within limits
@@ -232,7 +232,7 @@ def create_chunks(
     chunk_size: int = 500,
     chunk_overlap: int = 50,
     min_tokens: int = 0,
-    max_tokens: int = 8192
+    max_tokens: int = 2048
 ) -> list[dict]:
     """
     Create chunks from text using two-level semantic chunking (chonkie + semchunk).
@@ -247,7 +247,7 @@ def create_chunks(
         chunk_size: Target chunk size in tokens (default: 500)
         chunk_overlap: Overlap between chunks in tokens (default: 50)
         min_tokens: Minimum chunk size to keep (default: 50)
-        max_tokens: Maximum chunk size before re-chunking (default: 8192)
+        max_tokens: Maximum chunk size before re-chunking (default: 2048)
 
     Returns:
         List of chunk dictionaries with text and metadata
diff --git a/lib/config.py b/lib/config.py
@@ -39,7 +39,7 @@ class ChunkingConfig(BaseModel):
     strategies: ChunkingStrategyConfig = Field(default_factory=ChunkingStrategyConfig)
     chunk_size: int = Field(default=512, description="Target chunk size in tokens")
     overlap: int = Field(default=50, description="Overlap between chunks in tokens")
-    max_tokens: int = Field(default=8192, description="Maximum tokens per chunk")
+    max_tokens: int = Field(default=2048, description="Maximum tokens per chunk (nomic-embed-text limit)")
 
 
 class EmbeddingConfig(BaseModel):
@@ -266,7 +266,7 @@ def merge_cli_args(config: RagifyConfig, args: dict) -> RagifyConfig:
     default: semantic
   chunk_size: 512
   overlap: 50
-  max_tokens: 8192
+  max_tokens: 2048
 
 embedding:
   provider: ollama
diff --git a/lib/embedding.py b/lib/embedding.py
@@ -16,7 +16,7 @@
 # Configuration
 OLLAMA_URL = os.getenv('OLLAMA_URL', 'http://localhost:11434')
 EMBEDDING_MODEL = "nomic-embed-text"
-MAX_TOKENS = 8192  # nomic-embed-text limit
+MAX_TOKENS = 2048  # nomic-embed-text context limit
 
 
 def get_embedding(text: str, timeout: int = 60, max_retries: int = 3) -> Optional[list[float]]:
@@ -46,7 +46,8 @@ def get_embedding(text: str, timeout: int = 60, max_retries: int = 3) -> Optiona
                 f"{OLLAMA_URL}/api/embeddings",
                 json={
                     "model": EMBEDDING_MODEL,
-                    "prompt": text
+                    "prompt": text,
+                    "options": {"num_ctx": MAX_TOKENS}
                 },
                 timeout=timeout
             )