Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions integrations/isaacus/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Changelog

## 0.1.0
- Add Kanon2TextEmbedder and Kanon2DocumentEmbedder components.
10 changes: 10 additions & 0 deletions integrations/isaacus/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# isaacus-haystack

- [Integration page](https://haystack.deepset.ai/integrations/isaacus)
- [Changelog](https://github.com/deepset-ai/haystack-core-integrations/blob/main/integrations/isaacus/CHANGELOG.md)

---

## Contributing

Refer to the general [Contribution Guidelines](https://github.com/deepset-ai/haystack-core-integrations/blob/main/CONTRIBUTING.md).
43 changes: 43 additions & 0 deletions integrations/isaacus/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
[project]
name = "isaacus-haystack"
version = "0.1.0"
description = "Kanon 2 (Isaacus) embedders for Haystack"
readme = "README.md"
requires-python = ">=3.9"
license = {text = "Apache-2.0"}
authors = [{name = "Isaacus"}]
dependencies = [
"haystack-ai>=2.14.0",
"requests>=2.31.0",
]

[project.urls]
Homepage = "https://haystack.deepset.ai/integrations"
Documentation = "https://docs.isaacus.com/capabilities/embedding"

[build-system]
requires = ["hatchling>=1.21.0"]
build-backend = "hatchling.build"

[tool.hatch.build.targets.wheel]
packages = ["src/haystack_integrations"]

[tool.pytest.ini_options]
addopts = "-q"

[tool.ruff]
line-length = 120
select = ["E", "F", "I", "UP", "B", "PL"]

[tool.mypy]
ignore_missing_imports = true

[tool.hatch.envs.test]
dependencies = [
"pytest",
"haystack-ai>=2.14.0",
"requests>=2.31.0",
]

[tool.hatch.envs.test.scripts]
all = "pytest -q"
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from .text_embedder import IsaacusTextEmbedder
from .document_embedder import IsaacusDocumentEmbedder

__all__ = ["IsaacusTextEmbedder", "IsaacusDocumentEmbedder"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
from __future__ import annotations
from typing import List, Optional
from haystack import component
from haystack.dataclasses import Document
from haystack.utils import Secret
from .utils import IsaacusClient


@component
class IsaacusDocumentEmbedder:
"""
Embeds a list of Haystack `Document`s using Isaacus (configurable model).
Writes vectors to `document.embedding` and returns the list under `documents`.

Parameters mirror IsaacusTextEmbedder, with an additional `batch_size`.
"""

def __init__(
self,
*,
api_key: Secret = Secret.from_env_var("ISAACUS_API_KEY"),
base_url: str = "https://api.isaacus.com/v1",
model: str = "kanon-2-embedder",
task: str = "retrieval/document",
dimensions: Optional[int] = None,
overflow_strategy: Optional[str] = "drop_end",
batch_size: int = 128,
timeout: int = 30,
):
self._client = IsaacusClient(api_key.resolve_value(), base_url, timeout)
self.model = model
self.task = task
self.dimensions = dimensions
self.overflow_strategy = overflow_strategy
self.batch_size = max(1, min(128, batch_size))

@component.output_types(documents=List[Document])
def run(self, documents: List[Document]):
if not documents:
return {"documents": []}

# Only embed non-empty docs
docs = [d for d in documents if (d.content or "").strip()]

for i in range(0, len(docs), self.batch_size):
batch = docs[i : i + self.batch_size]
vectors = self._client.embeddings_create(
model=self.model,
texts=[d.content for d in batch],
task=self.task,
dimensions=self.dimensions,
overflow_strategy=self.overflow_strategy,
)
for d, v in zip(batch, vectors):
d.embedding = v

return {"documents": documents}
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
from __future__ import annotations
from typing import List, Optional
from haystack import component
from haystack.utils import Secret
from .utils import IsaacusClient


@component
class IsaacusTextEmbedder:
"""
Embeds a text string into a vector using Isaacus (configurable model).
Returns a single vector under the key `embedding`.

Parameters
----------
api_key : Secret
Isaacus API key (default reads ISAACUS_API_KEY env var).
base_url : str
Isaacus API base URL.
model : str
Embedding model name (e.g., "kanon-2-embedder").
task : str
Embedding task name ("retrieval/query" by default for queries).
dimensions : Optional[int]
Optional output dimensionality (e.g., 1792, 1024, 768...).
overflow_strategy : Optional[str]
Truncation strategy for long inputs (e.g., "drop_end").
timeout : int
HTTP timeout in seconds.
"""

def __init__(
self,
*,
api_key: Secret = Secret.from_env_var("ISAACUS_API_KEY"),
base_url: str = "https://api.isaacus.com/v1",
model: str = "kanon-2-embedder",
task: str = "retrieval/query",
dimensions: Optional[int] = None,
overflow_strategy: Optional[str] = "drop_end",
timeout: int = 30,
):
self._client = IsaacusClient(api_key.resolve_value(), base_url, timeout)
self.model = model
self.task = task
self.dimensions = dimensions
self.overflow_strategy = overflow_strategy

@component.output_types(embedding=List[float])
def run(self, text: str):
if not text or not text.strip():
return {"embedding": []}
vectors = self._client.embeddings_create(
model=self.model,
texts=[text],
task=self.task,
dimensions=self.dimensions,
overflow_strategy=self.overflow_strategy,
)
return {"embedding": vectors[0]}
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from __future__ import annotations
from typing import Any, Dict, List, Optional
import requests


class IsaacusClient:
def __init__(self, api_key: str, base_url: str = "https://api.isaacus.com/v1", timeout: int = 30):
self.api_key = api_key
self.base_url = base_url.rstrip("/")
self.timeout = timeout

def embeddings_create(
self,
*,
model: str,
texts: List[str],
task: Optional[str] = None,
dimensions: Optional[int] = None,
overflow_strategy: Optional[str] = None,
extra_headers: Optional[Dict[str, str]] = None,
) -> List[List[float]]:
url = f"{self.base_url}/embeddings"
payload: Dict[str, Any] = {"model": model, "texts": texts}
if task:
payload["task"] = task
if dimensions is not None:
payload["dimensions"] = int(dimensions)
if overflow_strategy:
payload["overflow_strategy"] = overflow_strategy

headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"}
if extra_headers:
headers.update(extra_headers)

resp = requests.post(url, json=payload, headers=headers, timeout=self.timeout)
resp.raise_for_status()
data = resp.json()
items = data.get("embeddings", [])
return [it["embedding"] for it in items]
37 changes: 37 additions & 0 deletions integrations/isaacus/tests/test_kanon2_embedder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
from __future__ import annotations
from unittest.mock import patch
from haystack.dataclasses import Document
from haystack.utils import Secret
from haystack_integrations.components.embedders.isaacus import (
IsaacusTextEmbedder,
IsaacusDocumentEmbedder,
)


def _fake_post(*_args, **kwargs):
class _Resp:
def raise_for_status(self): ...
def json(self):
texts = kwargs.get("json", {}).get("texts", [])
return {"embeddings": [{"embedding": [float(len(t))] * 4} for t in texts]}
return _Resp()


def test_text_embedder_runs_and_returns_vector():
with patch("requests.post", _fake_post):
emb = IsaacusTextEmbedder(api_key=Secret.from_token("x"), model="kanon-2-embedder")
out = emb.run("hello")
assert "embedding" in out and isinstance(out["embedding"], list)
assert len(out["embedding"]) == 4


def test_document_embedder_sets_embeddings_on_documents():
with patch("requests.post", _fake_post):
docs = [Document(content="a"), Document(content="bb"), Document(content="")]
emb = IsaacusDocumentEmbedder(api_key=Secret.from_token("x"), batch_size=2, model="kanon-2-embedder")
out = emb.run(docs)
docs2 = out["documents"]
assert isinstance(docs2[0].embedding, list) and len(docs2[0].embedding) == 4
assert isinstance(docs2[1].embedding, list) and len(docs2[1].embedding) == 4
# empty doc keeps embedding as None/falsy
assert not docs2[2].embedding