fix: resolve CI failures and code quality issues

AlmostBald-TRADING · AlmostBald-TRADING · commit 3607d1b5a063 · 2025-10-20T23:00:16.000+02:00
- Add pip install -e . to all CI jobs to fix module import errors
- Fix ruff UP035 error: import Generator from collections.abc
- Apply black formatting changes
- Add pragma allowlist comments for test secrets in detect-secrets
- Fix pytest module path issues by installing package in development mode
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -30,6 +30,7 @@ jobs:
           python -m pip install --upgrade pip
           pip install -r requirements.txt
           pip install -r requirements-dev.txt
+          pip install -e .
       - name: Lint
         run: |
           ruff check src clients/python tests
@@ -54,9 +55,10 @@ jobs:
           python -m pip install --upgrade pip
           pip install -r requirements.txt
           pip install -r requirements-dev.txt
+          pip install -e .
       - name: Run tests
         env:
-          CF_API_KEY: test-key
+          CF_API_KEY: test-key  # pragma: allowlist secret
           TESTING: true
         run: pytest tests/ -v
 
@@ -78,6 +80,7 @@ jobs:
         run: |
           python -m pip install --upgrade pip
           pip install -r requirements.txt
+          pip install -e .
           pip install safety pip-audit
       - name: Security audit
         run: |
@@ -119,6 +122,7 @@ jobs:
         run: |
           python -m pip install --upgrade pip
           pip install -r requirements.txt
+          pip install -e .
           pip install pre-commit pip-audit
       - name: Run pre-commit
         run: pre-commit run --all-files --show-diff-on-failure
diff --git a/Makefile b/Makefile
@@ -7,7 +7,9 @@ VENV_PIP := .venv/bin/pip
 clean:
 	@echo "Cleaning generated artifacts and virtual environment..."
 	@rm -rf dist/ build/ *.egg-info/ .coverage .pytest_cache/
-	@find . \( -type d -name "__pycache__" -exec rm -rf {} + \) -o \( -name "*.pyc" -o -name "*.pyo" \) -delete 2>/dev/null || true
+	@find . -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null || true
+	@find . -name "*.pyc" -delete 2>/dev/null || true
+	@find . -name "*.pyo" -delete 2>/dev/null || true
 	@rm -rf .venv
 	@echo "Clean complete."
 
diff --git a/src/contextforge_memory/main.py b/src/contextforge_memory/main.py
@@ -65,12 +65,15 @@ def deprecated(message: str):
     Args:
         message: The deprecation message to show in the warning.
     """
+
     def decorator(func):
         @wraps(func)
         def wrapper(*args, **kwargs):
             warnings.warn(message, DeprecationWarning, stacklevel=2)
             return func(*args, **kwargs)
+
         return wrapper
+
     return decorator
 
 
@@ -113,9 +116,7 @@ async def _startup_backfill() -> None:
                 if namespace and project_id:
                     namespaces_projects.add((namespace, project_id))
     except Exception as e:
-        logger.error(
-            "Failed to scan existing data for startup backfill: %s", str(e)
-        )
+        logger.error("Failed to scan existing data for startup backfill: %s", str(e))
         return
 
     logger.info(
@@ -261,18 +262,17 @@ class EmbedResponse(BaseModel):
     "yes",
 }
 # Check if we're in testing mode
-TESTING_MODE: bool = (
-    "pytest" in sys.modules or
-    os.environ.get("TESTING", "").lower() in {"1", "true", "yes"}
-)
+TESTING_MODE: bool = "pytest" in sys.modules or os.environ.get(
+    "TESTING", ""
+).lower() in {"1", "true", "yes"}
 
 API_KEY = os.environ.get("CF_API_KEY")
 
 # Validate required configuration
 if not API_KEY or not API_KEY.strip():
     if TESTING_MODE:
         # Use dummy key for testing
-        API_KEY = "test-key"
+        API_KEY = "test-key"  # pragma: allowlist secret
         logger.info("Using test API key for testing mode")
     else:
         logger.error(
@@ -374,9 +374,7 @@ def _get_env_int(key: str, default: int) -> int:
 BACKFILL_DELAY_SECONDS = 0.1  # Delay between backfill task scheduling
 
 # Search optimization configuration
-FILE_SIZE_WARNING_THRESHOLD = _get_env_int(
-    "CF_FILE_SIZE_THRESHOLD", 50 * 1024 * 1024
-)
+FILE_SIZE_WARNING_THRESHOLD = _get_env_int("CF_FILE_SIZE_THRESHOLD", 50 * 1024 * 1024)
 EMBEDDING_CACHE_SIZE = _get_env_int("CF_EMBEDDING_CACHE_SIZE", 10000)
 EMBEDDING_DIMENSION = 32  # Derived from _embed_text slice length (digest[:32])
 
@@ -1046,41 +1044,28 @@ def _schedule_background_backfill(namespace: str, project_id: str) -> str:
             )
             return existing_task
 
-        logger.info(
-            "Scheduling background backfill for %s (task: %s)", key, task_id
-        )
-        future = _io_pool.submit(
-            _safe_backfill_index, namespace, project_id, task_id
-        )
+        logger.info("Scheduling background backfill for %s (task: %s)", key, task_id)
+        future = _io_pool.submit(_safe_backfill_index, namespace, project_id, task_id)
 
         # Add done-callback to clean up completed futures
         def _cleanup_future(fut: Future[None]) -> None:
             with _backfill_tasks_lock:
-                if (
-                    task_id in _backfill_tasks
-                    and _backfill_tasks[task_id] is fut
-                ):
+                if task_id in _backfill_tasks and _backfill_tasks[task_id] is fut:
                     del _backfill_tasks[task_id]
-                    logger.debug(
-                        "Cleaned up completed backfill task %s", task_id
-                    )
+                    logger.debug("Cleaned up completed backfill task %s", task_id)
 
         future.add_done_callback(_cleanup_future)
         _backfill_tasks[task_id] = future
         return task_id
 
 
-def _safe_backfill_index(
-    namespace: str, project_id: str, task_id: str
-) -> None:
+def _safe_backfill_index(namespace: str, project_id: str, task_id: str) -> None:
     """Safely backfill index with guards and timeout protection."""
     start_time = time.time()
     bucket = DATA_DIR / "store.jsonl"
 
     if not bucket.exists():
-        logger.debug(
-            "No data file found for backfill of %s:%s", namespace, project_id
-        )
+        logger.debug("No data file found for backfill of %s:%s", namespace, project_id)
         return
 
     try:
@@ -1108,9 +1093,7 @@ def _scan_file() -> tuple[list[str], list[str]]:
                         and data.get("project_id") == project_id
                     ):
                         backfill_ids.append(data["id"])  # type: ignore[index]
-                        backfill_texts.append(
-                            data["text"]
-                        )  # type: ignore[index]
+                        backfill_texts.append(data["text"])  # type: ignore[index]
 
                         # Enforce item limit
                         if len(backfill_ids) >= MAX_BACKFILL_ITEMS:
@@ -1153,12 +1136,8 @@ def _scan_file() -> tuple[list[str], list[str]]:
                 )
                 break
 
-            batch_ids = missing_ids[
-                batch_start:batch_start + BACKFILL_BATCH_SIZE
-            ]
-            batch_texts = missing_texts[
-                batch_start:batch_start + BACKFILL_BATCH_SIZE
-            ]
+            batch_ids = missing_ids[batch_start : batch_start + BACKFILL_BATCH_SIZE]
+            batch_texts = missing_texts[batch_start : batch_start + BACKFILL_BATCH_SIZE]
 
             # Retry mechanism for batch processing
             success = False
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -3,7 +3,8 @@
 import os
 import sys
 import types
-from typing import Any, Generator
+from collections.abc import Generator
+from typing import Any
 
 import pytest
 
@@ -13,7 +14,7 @@ def setup_test_environment() -> Generator[None, None, None]:
     """Set up test environment variables for the entire test session."""
     # Set testing mode before any imports
     os.environ["TESTING"] = "true"
-    os.environ["CF_API_KEY"] = "test-key"
+    os.environ["CF_API_KEY"] = "test-key"  # pragma: allowlist secret
     yield
     # Cleanup after tests
     if "TESTING" in os.environ: