fix(embedding): riduce EMBEDDING_BATCH_SIZE a 3 per evitare fallback lento su file grandi

strawberry-code · strawberry-code · commit 4a34b84bc08a · 2025-12-04T20:40:24.000+01:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,11 @@ and the project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.
 
 ## [Unreleased]
 
+## [1.3.2] - 2025-12-04
+
+### Fixed
+- Further reduced default EMBEDDING_BATCH_SIZE from 10 to 3 (batch_size × chunk_tokens must be < 2048)
+
 ## [1.3.1] - 2025-12-04
 
 ### Fixed
diff --git a/README.md b/README.md
@@ -94,7 +94,7 @@ docker compose up -d
 | `OLLAMA_MODEL` | `nomic-embed-text` | Embedding model |
 | `CHUNK_SIZE` | `400` | Target chunk size in tokens |
 | `CHUNK_MAX_TOKENS` | `1500` | Maximum chunk size (safe margin for nomic-embed-text 2048 limit) |
-| `EMBEDDING_BATCH_SIZE` | `10` | Chunks per embedding API call (reduce if Ollama errors) |
+| `EMBEDDING_BATCH_SIZE` | `3` | Chunks per embedding API call (batch_size × chunk_tokens < 2048) |
 
 ## Features
 
diff --git a/lib/embedding.py b/lib/embedding.py
@@ -17,7 +17,7 @@
 OLLAMA_URL = os.getenv('OLLAMA_URL', 'http://localhost:11434')
 EMBEDDING_MODEL = os.getenv('EMBEDDING_MODEL', 'nomic-embed-text')
 MAX_TOKENS = 2048  # nomic-embed-text context limit (configurable models may differ)
-EMBEDDING_BATCH_SIZE = int(os.getenv('EMBEDDING_BATCH_SIZE', '10'))  # Reduced from 32 to avoid Ollama batch decode errors
+EMBEDDING_BATCH_SIZE = int(os.getenv('EMBEDDING_BATCH_SIZE', '3'))  # Low default: batch_size * avg_chunk_tokens must be < 2048
 
 
 def get_embedding(text: str, timeout: int = 60, max_retries: int = 3) -> Optional[list[float]]: