add aws bedrock embedder (agno-agi#3075)

kausmeows · dirkbrnd · Lockeysama · commit 8e96ee62b67e · 2025-05-09T16:13:38.000+08:00
## Summary AWS Bedrock embedder- Cohere Embed-multilingual v3 ![image](https://github.com/user-attachments/assets/751c5ded-1519-40a2-9ebd-5291be855bbf) (If applicable, issue number: #____) ## Type of change - [ ] Bug fix - [x] New feature - [ ] Breaking change - [ ] Improvement - [ ] Model update - [ ] Other: --- ## Checklist - [ ] Code complies with style guidelines - [ ] Ran format/validation scripts (`./scripts/format.sh` and `./scripts/validate.sh`) - [ ] Self-review completed - [ ] Documentation updated (comments, docstrings) - [ ] Examples and guides: Relevant cookbook examples have been included or updated (if applicable) - [ ] Tested in clean environment - [ ] Tests added/updated (if applicable) --- ## Additional Notes Add any important context (deployment instructions, screenshots, security considerations, etc.) --------- Co-authored-by: Dirk Brand <dirkbrnd@gmail.com>
diff --git a/cookbook/agent_concepts/context/__init__.py b/cookbook/agent_concepts/context/__init__.py
diff --git a/cookbook/agent_concepts/knowledge/embedders/aws_bedrock_embedder.py b/cookbook/agent_concepts/knowledge/embedders/aws_bedrock_embedder.py
@@ -0,0 +1,25 @@
+from agno.document.reader.pdf_reader import PDFUrlReader
+from agno.embedder.aws_bedrock import AwsBedrockEmbedder
+from agno.knowledge.pdf_url import PDFUrlKnowledgeBase
+from agno.vectordb.pgvector import PgVector
+
+embeddings = AwsBedrockEmbedder().get_embedding(
+    "The quick brown fox jumps over the lazy dog."
+)
+# Print the embeddings and their dimensions
+print(f"Embeddings: {embeddings[:5]}")
+print(f"Dimensions: {len(embeddings)}")
+
+# Example usage:
+knowledge_base = PDFUrlKnowledgeBase(
+    urls=["https://agno-public.s3.amazonaws.com/recipes/ThaiRecipes.pdf"],
+    reader=PDFUrlReader(
+        chunk_size=2048
+    ),  # Required because cohere has a fixed size of 2048
+    vector_db=PgVector(
+        table_name="recipes",
+        db_url="postgresql+psycopg://ai:ai@localhost:5532/ai",
+        embedder=AwsBedrockEmbedder(),
+    ),
+)
+knowledge_base.load(recreate=False)
diff --git a/cookbook/agent_concepts/state/__init__.py b/cookbook/agent_concepts/state/__init__.py
diff --git a/libs/agno/agno/document/chunking/fixed.py b/libs/agno/agno/document/chunking/fixed.py
@@ -22,7 +22,6 @@ def chunk(self, document: Document) -> List[Document]:
         chunked_documents: List[Document] = []
         chunk_number = 1
         chunk_meta_data = document.meta_data
-
         start = 0
         while start + self.overlap < content_length:
             end = min(start + self.chunk_size, content_length)
@@ -55,5 +54,4 @@ def chunk(self, document: Document) -> List[Document]:
             )
             chunk_number += 1
             start = end - self.overlap
-
         return chunked_documents
diff --git a/libs/agno/agno/document/reader/base.py b/libs/agno/agno/document/reader/base.py
@@ -1,6 +1,6 @@
 import asyncio
 from dataclasses import dataclass, field
-from typing import Any, List
+from typing import Any, List, Optional
 
 from agno.document.base import Document
 from agno.document.chunking.fixed import FixedSizeChunking
@@ -12,9 +12,13 @@ class Reader:
     """Base class for reading documents"""
 
     chunk: bool = True
-    chunk_size: int = 3000
+    chunk_size: int = 5000
     separators: List[str] = field(default_factory=lambda: ["\n", "\n\n", "\r", "\r\n", "\n\r", "\t", " ", "  "])
-    chunking_strategy: ChunkingStrategy = field(default_factory=FixedSizeChunking)
+    chunking_strategy: Optional[ChunkingStrategy] = None
+
+    def __init__(self, chunk_size: int = 5000, chunking_strategy: Optional[ChunkingStrategy] = None) -> None:
+        self.chunk_size = chunk_size
+        self.chunking_strategy = chunking_strategy or FixedSizeChunking(chunk_size=self.chunk_size)
 
     def read(self, obj: Any) -> List[Document]:
         raise NotImplementedError
@@ -23,7 +27,7 @@ async def async_read(self, obj: Any) -> List[Document]:
         raise NotImplementedError
 
     def chunk_document(self, document: Document) -> List[Document]:
-        return self.chunking_strategy.chunk(document)
+        return self.chunking_strategy.chunk(document)  # type: ignore
 
     async def chunk_documents_async(self, documents: List[Document]) -> List[Document]:
         """
diff --git a/libs/agno/agno/document/reader/firecrawl_reader.py b/libs/agno/agno/document/reader/firecrawl_reader.py
@@ -11,12 +11,18 @@
 except ImportError:
     raise ImportError("The `firecrawl` package is not installed. Please install it via `pip install firecrawl-py`.")
 
-
 @dataclass
 class FirecrawlReader(Reader):
     api_key: Optional[str] = None
     params: Optional[Dict] = None
     mode: Literal["scrape", "crawl"] = "scrape"
+    
+    def __init__(self, api_key: Optional[str] = None, params: Optional[Dict] = None, mode: Literal["scrape", "crawl"] = "scrape", *args, **kwargs) -> None:
+        super().__init__(*args, **kwargs)
+        self.api_key = api_key
+        self.params = params
+        self.mode = mode
+
 
     def scrape(self, url: str) -> List[Document]:
         """
diff --git a/libs/agno/agno/embedder/aws_bedrock.py b/libs/agno/agno/embedder/aws_bedrock.py
@@ -0,0 +1,212 @@
+import json
+from dataclasses import dataclass
+from os import getenv
+from typing import Any, Dict, List, Optional, Tuple
+
+from agno.embedder.base import Embedder
+from agno.exceptions import AgnoError, ModelProviderError
+from agno.utils.log import log_error, logger
+
+try:
+    from boto3 import client as AwsClient
+    from boto3.session import Session
+    from botocore.exceptions import ClientError
+except ImportError:
+    log_error("`boto3` not installed. Please install it via `pip install boto3`.")
+    raise
+
+
+@dataclass
+class AwsBedrockEmbedder(Embedder):
+    """
+    AWS Bedrock embedder.
+
+    To use this embedder, you need to either:
+    1. Set the following environment variables:
+       - AWS_ACCESS_KEY_ID
+       - AWS_SECRET_ACCESS_KEY
+       - AWS_REGION
+    2. Or provide a boto3 Session object
+
+    Args:
+        id (str): The model ID to use. Default is 'cohere.embed-multilingual-v3'.
+        dimensions (Optional[int]): The dimensions of the embeddings. Default is 1024.
+        input_type (str): Prepends special tokens to differentiate types. Options:
+            'search_document', 'search_query', 'classification', 'clustering'. Default is 'search_query'.
+        truncate (Optional[str]): How to handle inputs longer than the maximum token length.
+            Options: 'NONE', 'START', 'END'. Default is 'NONE'.
+        embedding_types (Optional[List[str]]): Types of embeddings to return. Options:
+            'float', 'int8', 'uint8', 'binary', 'ubinary'. Default is ['float'].
+        aws_region (Optional[str]): The AWS region to use.
+        aws_access_key_id (Optional[str]): The AWS access key ID to use.
+        aws_secret_access_key (Optional[str]): The AWS secret access key to use.
+        session (Optional[Session]): A boto3 Session object to use for authentication.
+        request_params (Optional[Dict[str, Any]]): Additional parameters to pass to the API requests.
+        client_params (Optional[Dict[str, Any]]): Additional parameters to pass to the boto3 client.
+    """
+
+    id: str = "cohere.embed-multilingual-v3"
+    dimensions: int = 1024  # Cohere models have 1024 dimensions by default
+    input_type: str = "search_query"
+    truncate: Optional[str] = None  # 'NONE', 'START', or 'END'
+    # 'float', 'int8', 'uint8', etc.
+    embedding_types: Optional[List[str]] = None
+
+    aws_region: Optional[str] = None
+    aws_access_key_id: Optional[str] = None
+    aws_secret_access_key: Optional[str] = None
+    session: Optional[Session] = None
+
+    request_params: Optional[Dict[str, Any]] = None
+    client_params: Optional[Dict[str, Any]] = None
+    client: Optional[AwsClient] = None
+
+    def get_client(self) -> AwsClient:
+        """
+        Returns an AWS Bedrock client.
+
+        Returns:
+            AwsClient: An instance of the AWS Bedrock client.
+        """
+        if self.client is not None:
+            return self.client
+
+        if self.session:
+            self.client = self.session.client("bedrock-runtime")
+            return self.client
+
+        self.aws_access_key_id = self.aws_access_key_id or getenv("AWS_ACCESS_KEY_ID")
+        self.aws_secret_access_key = self.aws_secret_access_key or getenv("AWS_SECRET_ACCESS_KEY")
+        self.aws_region = self.aws_region or getenv("AWS_REGION")
+
+        if not self.aws_access_key_id or not self.aws_secret_access_key:
+            raise AgnoError(
+                message="AWS credentials not found. Please set AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables or provide a boto3 session.",
+                status_code=400,
+            )
+
+        self.client = AwsClient(
+            service_name="bedrock-runtime",
+            region_name=self.aws_region,
+            aws_access_key_id=self.aws_access_key_id,
+            aws_secret_access_key=self.aws_secret_access_key,
+            **(self.client_params or {}),
+        )
+        return self.client
+
+    def _format_request_body(self, text: str) -> str:
+        """
+        Format the request body for the embedder.
+
+        Args:
+            text (str): The text to embed.
+
+        Returns:
+            str: The formatted request body as a JSON string.
+        """
+        request_body = {
+            "texts": [text],
+            "input_type": self.input_type,
+        }
+
+        if self.truncate:
+            request_body["truncate"] = self.truncate
+
+        if self.embedding_types:
+            request_body["embedding_types"] = self.embedding_types
+
+        # Add additional request parameters if provided
+        if self.request_params:
+            request_body.update(self.request_params)
+
+        return json.dumps(request_body)
+
+    def response(self, text: str) -> Dict[str, Any]:
+        """
+        Get embeddings from AWS Bedrock for the given text.
+
+        Args:
+            text (str): The text to embed.
+
+        Returns:
+            Dict[str, Any]: The response from the API.
+        """
+        try:
+            body = self._format_request_body(text)
+            response = self.get_client().invoke_model(
+                modelId=self.id,
+                body=body,
+                contentType="application/json",
+                accept="application/json",
+            )
+            response_body = json.loads(response["body"].read().decode("utf-8"))
+            return response_body
+        except ClientError as e:
+            log_error(f"Unexpected error calling Bedrock API: {str(e)}")
+            raise ModelProviderError(message=str(e.response), model_name="AwsBedrockEmbedder", model_id=self.id) from e
+        except Exception as e:
+            log_error(f"Unexpected error calling Bedrock API: {str(e)}")
+            raise ModelProviderError(message=str(e), model_name="AwsBedrockEmbedder", model_id=self.id) from e
+
+    def get_embedding(self, text: str) -> List[float]:
+        """
+        Get embeddings for the given text.
+
+        Args:
+            text (str): The text to embed.
+
+        Returns:
+            List[float]: The embedding vector.
+        """
+        response = self.response(text=text)
+        try:
+            # Check if response contains embeddings or embeddings by type
+            if "embeddings" in response:
+                if isinstance(response["embeddings"], list):
+                    # Default 'float' embeddings response format
+                    return response["embeddings"][0]
+                elif isinstance(response["embeddings"], dict):
+                    # If embeddings_types parameter was used, select float embeddings
+                    if "float" in response["embeddings"]:
+                        return response["embeddings"]["float"][0]
+                    # Fallback to the first available embedding type
+                    for embedding_type in response["embeddings"]:
+                        return response["embeddings"][embedding_type][0]
+            logger.warning("No embeddings found in response")
+            return []
+        except Exception as e:
+            logger.warning(f"Error extracting embeddings: {e}")
+            return []
+
+    def get_embedding_and_usage(self, text: str) -> Tuple[List[float], Optional[Dict[str, Any]]]:
+        """
+        Get embeddings and usage information for the given text.
+
+        Args:
+            text (str): The text to embed.
+
+        Returns:
+            Tuple[List[float], Optional[Dict[str, Any]]]: The embedding vector and usage information.
+        """
+        response = self.response(text=text)
+
+        embedding: List[float] = []
+        # Extract embeddings
+        if "embeddings" in response:
+            if isinstance(response["embeddings"], list):
+                embedding = response["embeddings"][0]
+            elif isinstance(response["embeddings"], dict):
+                if "float" in response["embeddings"]:
+                    embedding = response["embeddings"]["float"][0]
+                # Fallback to the first available embedding type
+                else:
+                    for embedding_type in response["embeddings"]:
+                        embedding = response["embeddings"][embedding_type][0]
+                        break
+
+        # Extract usage metrics if available
+        usage = None
+        if "usage" in response:
+            usage = response["usage"]
+
+        return embedding, usage
diff --git a/libs/agno/agno/knowledge/agent.py b/libs/agno/agno/knowledge/agent.py
@@ -29,7 +29,7 @@ class AgentKnowledge(BaseModel):
 
     @model_validator(mode="after")
     def update_reader(self) -> "AgentKnowledge":
-        if self.reader is not None:
+        if self.reader is not None and self.reader.chunking_strategy is None:
             self.reader.chunking_strategy = self.chunking_strategy
         return self
 
diff --git a/libs/agno/tests/integration/teams/test_team_metrics.py b/libs/agno/tests/integration/teams/test_team_metrics.py
@@ -52,6 +52,7 @@ def test_team_metrics_basic():
     assert team.session_metrics.output_tokens is not None
     assert team.session_metrics.total_tokens is not None
 
+
 def test_team_metrics_streaming():
     """Test team metrics with streaming."""
 
diff --git a/libs/agno/tests/unit/reader/test_firecrawl_reader.py b/libs/agno/tests/unit/reader/test_firecrawl_reader.py
@@ -119,7 +119,7 @@ def test_scrape_with_chunking(mock_scrape_response):
         # Create reader with chunking enabled
         reader = FirecrawlReader()
         reader.chunk = True
-        reader.chunk_size = 10  # Small chunk size to ensure multiple chunks
+        reader.chunking_strategy.chunk_size = 10  # Small chunk size to ensure multiple chunks
 
         # Create a patch for chunk_document
         def mock_chunk_document(doc):
@@ -209,7 +209,7 @@ def test_crawl_with_chunking(mock_crawl_response):
         # Create reader with chunking enabled
         reader = FirecrawlReader(mode="crawl")
         reader.chunk = True
-        reader.chunk_size = 10  # Small chunk size to ensure multiple chunks
+        reader.chunking_strategy.chunk_size = 10  # Small chunk size to ensure multiple chunks
 
         def mock_chunk_document(doc):
             # Simple mock that splits into 2 chunks
diff --git a/libs/agno/tests/unit/reader/test_url_reader.py b/libs/agno/tests/unit/reader/test_url_reader.py
@@ -97,7 +97,7 @@ def test_chunking(mock_response):
     with patch("httpx.get", return_value=mock_response):
         reader = URLReader()
         reader.chunk = True
-        reader.chunk_size = 100
+        reader.chunking_strategy.chunk_size = 100
         documents = reader.read(url)
 
         assert len(documents) > 1
@@ -198,7 +198,7 @@ async def test_async_chunking():
     with patch("httpx.AsyncClient", return_value=mock_client):
         reader = URLReader()
         reader.chunk = True
-        reader.chunk_size = 100
+        reader.chunking_strategy.chunk_size = 100
         documents = await reader.async_read(url)
 
         assert len(documents) > 1