Merge branch 'main' into gl/chore/crews-ruff-linting-fixe

greysonlalonde · web-flow · commit c7809515e5ae · 2025-09-19T21:39:40.000-04:00
diff --git a/src/crewai/knowledge/source/base_file_knowledge_source.py b/src/crewai/knowledge/source/base_file_knowledge_source.py
@@ -1,6 +1,5 @@
 from abc import ABC, abstractmethod
 from pathlib import Path
-from typing import Dict, List, Optional, Union
 
 from pydantic import Field, field_validator
 
@@ -14,19 +13,19 @@ class BaseFileKnowledgeSource(BaseKnowledgeSource, ABC):
     """Base class for knowledge sources that load content from files."""
 
     _logger: Logger = Logger(verbose=True)
-    file_path: Optional[Union[Path, List[Path], str, List[str]]] = Field(
+    file_path: Path | list[Path] | str | list[str] | None = Field(
         default=None,
         description="[Deprecated] The path to the file. Use file_paths instead.",
     )
-    file_paths: Optional[Union[Path, List[Path], str, List[str]]] = Field(
+    file_paths: Path | list[Path] | str | list[str] | None = Field(
         default_factory=list, description="The path to the file"
     )
-    content: Dict[Path, str] = Field(init=False, default_factory=dict)
-    storage: Optional[KnowledgeStorage] = Field(default=None)
-    safe_file_paths: List[Path] = Field(default_factory=list)
+    content: dict[Path, str] = Field(init=False, default_factory=dict)
+    storage: KnowledgeStorage | None = Field(default=None)
+    safe_file_paths: list[Path] = Field(default_factory=list)
 
     @field_validator("file_path", "file_paths", mode="before")
-    def validate_file_path(cls, v, info):
+    def validate_file_path(cls, v, info):  # noqa: N805
         """Validate that at least one of file_path or file_paths is provided."""
         # Single check if both are None, O(1) instead of nested conditions
         if (
@@ -46,9 +45,8 @@ def model_post_init(self, _):
         self.content = self.load_content()
 
     @abstractmethod
-    def load_content(self) -> Dict[Path, str]:
+    def load_content(self) -> dict[Path, str]:
         """Load and preprocess file content. Should be overridden by subclasses. Assume that the file path is relative to the project root in the knowledge directory."""
-        pass
 
     def validate_content(self):
         """Validate the paths."""
@@ -74,11 +72,11 @@ def _save_documents(self):
         else:
             raise ValueError("No storage found to save documents.")
 
-    def convert_to_path(self, path: Union[Path, str]) -> Path:
+    def convert_to_path(self, path: Path | str) -> Path:
         """Convert a path to a Path object."""
         return Path(KNOWLEDGE_DIRECTORY + "/" + path) if isinstance(path, str) else path
 
-    def _process_file_paths(self) -> List[Path]:
+    def _process_file_paths(self) -> list[Path]:
         """Convert file_path to a list of Path objects."""
 
         if hasattr(self, "file_path") and self.file_path is not None:
@@ -93,7 +91,7 @@ def _process_file_paths(self) -> List[Path]:
             raise ValueError("Your source must be provided with a file_paths: []")
 
         # Convert single path to list
-        path_list: List[Union[Path, str]] = (
+        path_list: list[Path | str] = (
             [self.file_paths]
             if isinstance(self.file_paths, (str, Path))
             else list(self.file_paths)
diff --git a/src/crewai/knowledge/source/base_knowledge_source.py b/src/crewai/knowledge/source/base_knowledge_source.py
@@ -1,5 +1,5 @@
 from abc import ABC, abstractmethod
-from typing import Any, Dict, List, Optional
+from typing import Any
 
 import numpy as np
 from pydantic import BaseModel, ConfigDict, Field
@@ -12,29 +12,27 @@ class BaseKnowledgeSource(BaseModel, ABC):
 
     chunk_size: int = 4000
     chunk_overlap: int = 200
-    chunks: List[str] = Field(default_factory=list)
-    chunk_embeddings: List[np.ndarray] = Field(default_factory=list)
+    chunks: list[str] = Field(default_factory=list)
+    chunk_embeddings: list[np.ndarray] = Field(default_factory=list)
 
     model_config = ConfigDict(arbitrary_types_allowed=True)
-    storage: Optional[KnowledgeStorage] = Field(default=None)
-    metadata: Dict[str, Any] = Field(default_factory=dict)  # Currently unused
-    collection_name: Optional[str] = Field(default=None)
+    storage: KnowledgeStorage | None = Field(default=None)
+    metadata: dict[str, Any] = Field(default_factory=dict)  # Currently unused
+    collection_name: str | None = Field(default=None)
 
     @abstractmethod
     def validate_content(self) -> Any:
         """Load and preprocess content from the source."""
-        pass
 
     @abstractmethod
     def add(self) -> None:
         """Process content, chunk it, compute embeddings, and save them."""
-        pass
 
-    def get_embeddings(self) -> List[np.ndarray]:
+    def get_embeddings(self) -> list[np.ndarray]:
         """Return the list of embeddings for the chunks."""
         return self.chunk_embeddings
 
-    def _chunk_text(self, text: str) -> List[str]:
+    def _chunk_text(self, text: str) -> list[str]:
         """Utility method to split text into chunks."""
         return [
             text[i : i + self.chunk_size]
diff --git a/src/crewai/knowledge/source/crew_docling_source.py b/src/crewai/knowledge/source/crew_docling_source.py
@@ -1,13 +1,21 @@
+from collections.abc import Iterator
 from pathlib import Path
-from typing import Iterator, List, Optional, Union
 from urllib.parse import urlparse
 
 try:
-    from docling.datamodel.base_models import InputFormat
-    from docling.document_converter import DocumentConverter
-    from docling.exceptions import ConversionError
-    from docling_core.transforms.chunker.hierarchical_chunker import HierarchicalChunker
-    from docling_core.types.doc.document import DoclingDocument
+    from docling.datamodel.base_models import (  # type: ignore[import-not-found]
+        InputFormat,
+    )
+    from docling.document_converter import (  # type: ignore[import-not-found]
+        DocumentConverter,
+    )
+    from docling.exceptions import ConversionError  # type: ignore[import-not-found]
+    from docling_core.transforms.chunker.hierarchical_chunker import (  # type: ignore[import-not-found]
+        HierarchicalChunker,
+    )
+    from docling_core.types.doc.document import (  # type: ignore[import-not-found]
+        DoclingDocument,
+    )
 
     DOCLING_AVAILABLE = True
 except ImportError:
@@ -35,11 +43,11 @@ def __init__(self, *args, **kwargs):
 
     _logger: Logger = Logger(verbose=True)
 
-    file_path: Optional[List[Union[Path, str]]] = Field(default=None)
-    file_paths: List[Union[Path, str]] = Field(default_factory=list)
-    chunks: List[str] = Field(default_factory=list)
-    safe_file_paths: List[Union[Path, str]] = Field(default_factory=list)
-    content: List["DoclingDocument"] = Field(default_factory=list)
+    file_path: list[Path | str] | None = Field(default=None)
+    file_paths: list[Path | str] = Field(default_factory=list)
+    chunks: list[str] = Field(default_factory=list)
+    safe_file_paths: list[Path | str] = Field(default_factory=list)
+    content: list["DoclingDocument"] = Field(default_factory=list)
     document_converter: "DocumentConverter" = Field(
         default_factory=lambda: DocumentConverter(
             allowed_formats=[
@@ -66,7 +74,7 @@ def model_post_init(self, _) -> None:
         self.safe_file_paths = self.validate_content()
         self.content = self._load_content()
 
-    def _load_content(self) -> List["DoclingDocument"]:
+    def _load_content(self) -> list["DoclingDocument"]:
         try:
             return self._convert_source_to_docling_documents()
         except ConversionError as e:
@@ -88,7 +96,7 @@ def add(self) -> None:
             self.chunks.extend(list(new_chunks_iterable))
         self._save_documents()
 
-    def _convert_source_to_docling_documents(self) -> List["DoclingDocument"]:
+    def _convert_source_to_docling_documents(self) -> list["DoclingDocument"]:
         conv_results_iter = self.document_converter.convert_all(self.safe_file_paths)
         return [result.document for result in conv_results_iter]
 
@@ -97,8 +105,8 @@ def _chunk_doc(self, doc: "DoclingDocument") -> Iterator[str]:
         for chunk in chunker.chunk(doc):
             yield chunk.text
 
-    def validate_content(self) -> List[Union[Path, str]]:
-        processed_paths: List[Union[Path, str]] = []
+    def validate_content(self) -> list[Path | str]:
+        processed_paths: list[Path | str] = []
         for path in self.file_paths:
             if isinstance(path, str):
                 if path.startswith(("http://", "https://")):
@@ -108,7 +116,7 @@ def validate_content(self) -> List[Union[Path, str]]:
                         else:
                             raise ValueError(f"Invalid URL format: {path}")
                     except Exception as e:
-                        raise ValueError(f"Invalid URL: {path}. Error: {str(e)}")
+                        raise ValueError(f"Invalid URL: {path}. Error: {e!s}") from e
                 else:
                     local_path = Path(KNOWLEDGE_DIRECTORY + "/" + path)
                     if local_path.exists():
diff --git a/src/crewai/knowledge/source/csv_knowledge_source.py b/src/crewai/knowledge/source/csv_knowledge_source.py
@@ -1,14 +1,13 @@
 import csv
 from pathlib import Path
-from typing import Dict, List
 
 from crewai.knowledge.source.base_file_knowledge_source import BaseFileKnowledgeSource
 
 
 class CSVKnowledgeSource(BaseFileKnowledgeSource):
     """A knowledge source that stores and queries CSV file content using embeddings."""
 
-    def load_content(self) -> Dict[Path, str]:
+    def load_content(self) -> dict[Path, str]:
         """Load and preprocess CSV file content."""
         content_dict = {}
         for file_path in self.safe_file_paths:
@@ -32,7 +31,7 @@ def add(self) -> None:
         self.chunks.extend(new_chunks)
         self._save_documents()
 
-    def _chunk_text(self, text: str) -> List[str]:
+    def _chunk_text(self, text: str) -> list[str]:
         """Utility method to split text into chunks."""
         return [
             text[i : i + self.chunk_size]
diff --git a/src/crewai/knowledge/source/excel_knowledge_source.py b/src/crewai/knowledge/source/excel_knowledge_source.py
@@ -1,6 +1,4 @@
 from pathlib import Path
-from typing import Dict, Iterator, List, Optional, Union
-from urllib.parse import urlparse
 
 from pydantic import Field, field_validator
 
@@ -16,19 +14,19 @@ class ExcelKnowledgeSource(BaseKnowledgeSource):
 
     _logger: Logger = Logger(verbose=True)
 
-    file_path: Optional[Union[Path, List[Path], str, List[str]]] = Field(
+    file_path: Path | list[Path] | str | list[str] | None = Field(
         default=None,
         description="[Deprecated] The path to the file. Use file_paths instead.",
     )
-    file_paths: Optional[Union[Path, List[Path], str, List[str]]] = Field(
+    file_paths: Path | list[Path] | str | list[str] | None = Field(
         default_factory=list, description="The path to the file"
     )
-    chunks: List[str] = Field(default_factory=list)
-    content: Dict[Path, Dict[str, str]] = Field(default_factory=dict)
-    safe_file_paths: List[Path] = Field(default_factory=list)
+    chunks: list[str] = Field(default_factory=list)
+    content: dict[Path, dict[str, str]] = Field(default_factory=dict)
+    safe_file_paths: list[Path] = Field(default_factory=list)
 
     @field_validator("file_path", "file_paths", mode="before")
-    def validate_file_path(cls, v, info):
+    def validate_file_path(cls, v, info):  # noqa: N805
         """Validate that at least one of file_path or file_paths is provided."""
         # Single check if both are None, O(1) instead of nested conditions
         if (
@@ -41,7 +39,7 @@ def validate_file_path(cls, v, info):
             raise ValueError("Either file_path or file_paths must be provided")
         return v
 
-    def _process_file_paths(self) -> List[Path]:
+    def _process_file_paths(self) -> list[Path]:
         """Convert file_path to a list of Path objects."""
 
         if hasattr(self, "file_path") and self.file_path is not None:
@@ -56,7 +54,7 @@ def _process_file_paths(self) -> List[Path]:
             raise ValueError("Your source must be provided with a file_paths: []")
 
         # Convert single path to list
-        path_list: List[Union[Path, str]] = (
+        path_list: list[Path | str] = (
             [self.file_paths]
             if isinstance(self.file_paths, (str, Path))
             else list(self.file_paths)
@@ -100,7 +98,7 @@ def model_post_init(self, _) -> None:
         self.validate_content()
         self.content = self._load_content()
 
-    def _load_content(self) -> Dict[Path, Dict[str, str]]:
+    def _load_content(self) -> dict[Path, dict[str, str]]:
         """Load and preprocess Excel file content from multiple sheets.
 
         Each sheet's content is converted to CSV format and stored.
@@ -126,21 +124,21 @@ def _load_content(self) -> Dict[Path, Dict[str, str]]:
             content_dict[file_path] = sheet_dict
         return content_dict
 
-    def convert_to_path(self, path: Union[Path, str]) -> Path:
+    def convert_to_path(self, path: Path | str) -> Path:
         """Convert a path to a Path object."""
         return Path(KNOWLEDGE_DIRECTORY + "/" + path) if isinstance(path, str) else path
 
     def _import_dependencies(self):
         """Dynamically import dependencies."""
         try:
-            import pandas as pd
+            import pandas as pd  # type: ignore[import-untyped,import-not-found]
 
             return pd
         except ImportError as e:
             missing_package = str(e).split()[-1]
             raise ImportError(
                 f"{missing_package} is not installed. Please install it with: pip install {missing_package}"
-            )
+            ) from e
 
     def add(self) -> None:
         """
@@ -161,7 +159,7 @@ def add(self) -> None:
         self.chunks.extend(new_chunks)
         self._save_documents()
 
-    def _chunk_text(self, text: str) -> List[str]:
+    def _chunk_text(self, text: str) -> list[str]:
         """Utility method to split text into chunks."""
         return [
             text[i : i + self.chunk_size]
diff --git a/src/crewai/knowledge/source/json_knowledge_source.py b/src/crewai/knowledge/source/json_knowledge_source.py
@@ -1,16 +1,16 @@
 import json
 from pathlib import Path
-from typing import Any, Dict, List
+from typing import Any
 
 from crewai.knowledge.source.base_file_knowledge_source import BaseFileKnowledgeSource
 
 
 class JSONKnowledgeSource(BaseFileKnowledgeSource):
     """A knowledge source that stores and queries JSON file content using embeddings."""
 
-    def load_content(self) -> Dict[Path, str]:
+    def load_content(self) -> dict[Path, str]:
         """Load and preprocess JSON file content."""
-        content: Dict[Path, str] = {}
+        content: dict[Path, str] = {}
         for path in self.safe_file_paths:
             path = self.convert_to_path(path)
             with open(path, "r", encoding="utf-8") as json_file:
@@ -29,7 +29,7 @@ def _json_to_text(self, data: Any, level: int = 0) -> str:
             for item in data:
                 text += f"{indent}- {self._json_to_text(item, level + 1)}\n"
         else:
-            text += f"{str(data)}"
+            text += f"{data!s}"
         return text
 
     def add(self) -> None:
@@ -44,7 +44,7 @@ def add(self) -> None:
         self.chunks.extend(new_chunks)
         self._save_documents()
 
-    def _chunk_text(self, text: str) -> List[str]:
+    def _chunk_text(self, text: str) -> list[str]:
         """Utility method to split text into chunks."""
         return [
             text[i : i + self.chunk_size]
diff --git a/src/crewai/knowledge/source/pdf_knowledge_source.py b/src/crewai/knowledge/source/pdf_knowledge_source.py
@@ -1,13 +1,12 @@
 from pathlib import Path
-from typing import Dict, List
 
 from crewai.knowledge.source.base_file_knowledge_source import BaseFileKnowledgeSource
 
 
 class PDFKnowledgeSource(BaseFileKnowledgeSource):
     """A knowledge source that stores and queries PDF file content using embeddings."""
 
-    def load_content(self) -> Dict[Path, str]:
+    def load_content(self) -> dict[Path, str]:
         """Load and preprocess PDF file content."""
         pdfplumber = self._import_pdfplumber()
 
@@ -30,22 +29,22 @@ def _import_pdfplumber(self):
             import pdfplumber
 
             return pdfplumber
-        except ImportError:
+        except ImportError as e:
             raise ImportError(
                 "pdfplumber is not installed. Please install it with: pip install pdfplumber"
-            )
+            ) from e
 
     def add(self) -> None:
         """
         Add PDF file content to the knowledge source, chunk it, compute embeddings,
         and save the embeddings.
         """
-        for _, text in self.content.items():
+        for text in self.content.values():
             new_chunks = self._chunk_text(text)
             self.chunks.extend(new_chunks)
         self._save_documents()
 
-    def _chunk_text(self, text: str) -> List[str]:
+    def _chunk_text(self, text: str) -> list[str]:
         """Utility method to split text into chunks."""
         return [
             text[i : i + self.chunk_size]
diff --git a/src/crewai/knowledge/source/string_knowledge_source.py b/src/crewai/knowledge/source/string_knowledge_source.py
diff --git a/src/crewai/knowledge/source/text_file_knowledge_source.py b/src/crewai/knowledge/source/text_file_knowledge_source.py