topoteretes · Vasilije1990 · Jan 16, 2026 · Dec 28, 2025 · Jan 10, 2026 · Jan 11, 2026
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -427,10 +427,12 @@ git checkout -b feature/your-feature-name
 
 ## Code Style
 
-- Ruff for linting and formatting (configured in `pyproject.toml`)
-- Line length: 100 characters
-- Pre-commit hooks run ruff automatically
-- Type hints encouraged (mypy checks enabled)
+- **Formatter**: Ruff (configured in `pyproject.toml`)
+- **Line length**: 100 characters
+- **String quotes**: Use double quotes `"` not single quotes `'` (enforced by ruff-format)
+- **Pre-commit hooks**: Run ruff linting and formatting automatically
+- **Type hints**: Encouraged (mypy checks enabled)
+- **Important**: Always run `pre-commit run --all-files` before committing to catch formatting issues
 
 ## Testing Strategy
 

diff --git a/cognee/api/v1/cognify/cognify.py b/cognee/api/v1/cognify/cognify.py
@@ -252,7 +252,7 @@ async def get_default_tasks(  # TODO: Find out a better way to do this (Boris's
     chunk_size: int = None,
     config: Config = None,
     custom_prompt: Optional[str] = None,
-    chunks_per_batch: int = 100,
+    chunks_per_batch: int = None,
     **kwargs,
 ) -> list[Task]:
     if config is None:
@@ -272,12 +272,14 @@ async def get_default_tasks(  # TODO: Find out a better way to do this (Boris's
                 "ontology_config": {"ontology_resolver": get_default_ontology_resolver()}
             }
 
-    if chunks_per_batch is None:
-        chunks_per_batch = 100
-
     cognify_config = get_cognify_config()
     embed_triplets = cognify_config.triplet_embedding
 
+    if chunks_per_batch is None:
+        chunks_per_batch = (
+            cognify_config.chunks_per_batch if cognify_config.chunks_per_batch is not None else 100
+        )
+
     default_tasks = [
         Task(classify_documents),
         Task(
@@ -308,7 +310,7 @@ async def get_default_tasks(  # TODO: Find out a better way to do this (Boris's
 
 
 async def get_temporal_tasks(
-    user: User = None, chunker=TextChunker, chunk_size: int = None, chunks_per_batch: int = 10
+    user: User = None, chunker=TextChunker, chunk_size: int = None, chunks_per_batch: int = None
-    user: User = None, chunker=TextChunker, chunk_size: int = None, chunks_per_batch: int = None
+    user: User = None, chunker=TextChunker, chunk_size: int = None, chunks_per_batch: Optional[int] = None
-    user: User = None, chunker=TextChunker, chunk_size: int = None, chunks_per_batch: int = None
+    user: User = None, chunker=TextChunker, chunk_size: int = None, chunks_per_batch: Optional[int] = None
 ) -> list[Task]:
     """
     Builds and returns a list of temporal processing tasks to be executed in sequence.
@@ -330,7 +332,10 @@ async def get_temporal_tasks(
         list[Task]: A list of Task objects representing the temporal processing pipeline.
     """
     if chunks_per_batch is None:
-        chunks_per_batch = 10
+        from cognee.modules.cognify.config import get_cognify_config
+
+        configured = get_cognify_config().chunks_per_batch
+        chunks_per_batch = configured if configured is not None else 10
-    if chunks_per_batch is None:
-        chunks_per_batch = 10
-        from cognee.modules.cognify.config import get_cognify_config
-
-        configured = get_cognify_config().chunks_per_batch
-        chunks_per_batch = configured if configured is not None else 10
+    if chunks_per_batch is None:
+        configured = get_cognify_config().chunks_per_batch
+        chunks_per_batch = configured if configured is not None else 10
-    if chunks_per_batch is None:
-        chunks_per_batch = 10
-        from cognee.modules.cognify.config import get_cognify_config
-
-        configured = get_cognify_config().chunks_per_batch
-        chunks_per_batch = configured if configured is not None else 10
+    if chunks_per_batch is None:
+        configured = get_cognify_config().chunks_per_batch
+        chunks_per_batch = configured if configured is not None else 10
 
     temporal_tasks = [
         Task(classify_documents),

diff --git a/cognee/api/v1/cognify/routers/get_cognify_router.py b/cognee/api/v1/cognify/routers/get_cognify_router.py
@@ -46,6 +46,11 @@ class CognifyPayloadDTO(InDTO):
         examples=[[]],
         description="Reference to one or more previously uploaded ontologies",
     )
+    chunks_per_batch: Optional[int] = Field(
+        default=None,
+        description="Number of chunks to process per task batch in Cognify (overrides default).",
+        examples=[10, 20, 50, 100],
+    )
 
 
 def get_cognify_router() -> APIRouter:
@@ -146,6 +151,7 @@ async def cognify(payload: CognifyPayloadDTO, user: User = Depends(get_authentic
                 config=config_to_use,
                 run_in_background=payload.run_in_background,
                 custom_prompt=payload.custom_prompt,
+                chunks_per_batch=payload.chunks_per_batch,
             )
 
             # If any cognify run errored return JSONResponse with proper error status code

diff --git a/cognee/cli/commands/cognify_command.py b/cognee/cli/commands/cognify_command.py
@@ -62,6 +62,11 @@ def configure_parser(self, parser: argparse.ArgumentParser) -> None:
         parser.add_argument(
             "--verbose", "-v", action="store_true", help="Show detailed progress information"
         )
+        parser.add_argument(
+            "--chunks-per-batch",
+            type=int,
+            help="Number of chunks to process per task batch (try 50 for large single documents).",
+        )
 
     def execute(self, args: argparse.Namespace) -> None:
         try:
@@ -111,6 +116,7 @@ async def run_cognify():
                         chunk_size=args.chunk_size,
                         ontology_file_path=args.ontology_file,
                         run_in_background=args.background,
+                        chunks_per_batch=getattr(args, "chunks_per_batch", None),
                     )
                     return result
                 except Exception as e:

diff --git a/cognee/modules/cognify/config.py b/cognee/modules/cognify/config.py
@@ -9,13 +9,15 @@ class CognifyConfig(BaseSettings):
     classification_model: object = DefaultContentPrediction
     summarization_model: object = SummarizedContent
     triplet_embedding: bool = False
+    chunks_per_batch: Optional[int] = None
-    chunks_per_batch: Optional[int] = None
+    chunks_per_batch: Optional[int] = Field(
+        default=None,
+        description="Number of chunks to process per task batch in Cognify. Can be configured via CHUNKS_PER_BATCH environment variable. Higher values can improve processing speed but may cause max_token errors if set too high."
+    )
-    chunks_per_batch: Optional[int] = None
+    chunks_per_batch: Optional[int] = Field(
+        default=None,
+        description="Number of chunks to process per task batch in Cognify. Can be configured via CHUNKS_PER_BATCH environment variable. Higher values can improve processing speed but may cause max_token errors if set too high."
+    )
     model_config = SettingsConfigDict(env_file=".env", extra="allow")
 
     def to_dict(self) -> dict:
         return {
             "classification_model": self.classification_model,
             "summarization_model": self.summarization_model,
             "triplet_embedding": self.triplet_embedding,
+            "chunks_per_batch": self.chunks_per_batch,
         }
 
 

diff --git a/cognee/tests/cli_tests/cli_unit_tests/test_cli_commands.py b/cognee/tests/cli_tests/cli_unit_tests/test_cli_commands.py
@@ -238,6 +238,7 @@ def test_execute_basic_cognify(self, mock_asyncio_run):
             ontology_file_path=None,
             chunker=TextChunker,
             run_in_background=False,
+            chunks_per_batch=None,
         )
 
     @patch("cognee.cli.commands.cognify_command.asyncio.run")

diff --git a/cognee/tests/cli_tests/cli_unit_tests/test_cli_edge_cases.py b/cognee/tests/cli_tests/cli_unit_tests/test_cli_edge_cases.py
@@ -262,6 +262,7 @@ def test_cognify_invalid_chunk_size(self, mock_asyncio_run):
             ontology_file_path=None,
             chunker=TextChunker,
             run_in_background=False,
+            chunks_per_batch=None,
         )
 
     @patch("cognee.cli.commands.cognify_command.asyncio.run", side_effect=_mock_run)
@@ -295,6 +296,7 @@ def test_cognify_nonexistent_ontology_file(self, mock_asyncio_run):
             ontology_file_path="/nonexistent/path/ontology.owl",
             chunker=TextChunker,
             run_in_background=False,
+            chunks_per_batch=None,
         )
 
     @patch("cognee.cli.commands.cognify_command.asyncio.run")
@@ -373,6 +375,7 @@ def test_cognify_empty_datasets_list(self, mock_asyncio_run):
             ontology_file_path=None,
             chunker=TextChunker,
             run_in_background=False,
+            chunks_per_batch=None,
         )