weaviate · CShorten · Mar 20, 2026
diff --git a/engram-test.py b/engram-test.py
@@ -0,0 +1,33 @@
+from engram import EngramClient
+import os
+import uuid
+
+client = EngramClient(
+    api_key=os.getenv("ENGRAM_API_KEY"),
+    base_url="https://dev-engram.labs.weaviate.io"
+)
+
+user_id = f"connor-1234567890"
+
+'''
+run = client.memories.add(
+    "My favorite mattress brand is Purple",
+    user_id=user_id,
+)
+
+print(f"Run ID: {run.run_id}")
+print(f"Status: {run.status}")
+'''
+
+results = client.memories.search(
+    query="What is the user's favorite mattress brand?",
+    user_id=user_id,
+)
+
+for result in results:
+    print(result)
+
+"""
+# Returns:
+Memory(id='08505722-6e44-43b5-bb17-2091a58fa834', project_id='019ce79c-d6d6-7dea-8687-edaacade55ee', content="The user's favorite mattress brand is Purple.", topic='UserKnowledge', group='default', created_at='2026-03-19T13:15:46.668Z', updated_at='2026-03-19T13:15:46.668Z', user_id='connor-1234567890', conversation_id=None, tags=None, score=1)
+"""
diff --git a/pyproject.toml b/pyproject.toml
@@ -7,7 +7,7 @@ name = "query-agent-benchmarking"
 version = "0.6"
 description="A Python library for benchmarking Weaviate's Query Agent!"
 readme="README.md"
-requires-python = ">=3.10"
+requires-python = ">=3.11"
 dependencies = [
   "dspy>=3.0.4",
   "sentence-transformers>=5.0.0",
@@ -20,7 +20,8 @@ dependencies = [
   "setuptools>=80.9.0",
   "wheel>=0.45.1",
   "twine>=6.2.0",
-  "PyMuPDF>=1.24.0",
+  "PyMuPDF>=1.24.0,!=1.27.2.2",
+  "weaviate-engram>=0.4.0",
 ]
 
 [tool.setuptools.packages.find]

diff --git a/query_agent_benchmarking/agent/__init__.py b/query_agent_benchmarking/agent/__init__.py
@@ -1,10 +1,12 @@
 from .search_agent import SearchAgentBuilder
 from .ask_agent import AskAgentBuilder
 from .base import BaseAgentBuilder
+from .engram_ask_agent import EngramAskAgent
 
 __all__ = [
     "SearchAgentBuilder",
-    "AskAgentBuilder", 
+    "AskAgentBuilder",
     "BaseAgentBuilder",
+    "EngramAskAgent",
 ]
 
diff --git a/query_agent_benchmarking/agent/engram_ask_agent.py b/query_agent_benchmarking/agent/engram_ask_agent.py
@@ -0,0 +1,143 @@
+from typing import Optional
+
+from engram import EngramClient
+import openai
+
+from query_agent_benchmarking.agent.ask_agent import AskResponse
+
+
+DEFAULT_SYSTEM_PROMPT = (
+    "You are a helpful assistant that answers questions based on the user's memories. "
+    "Use the provided memories as context to answer the question. "
+    "If the memories don't contain enough information, say so."
+)
+
+
+class EngramAskAgent:
+    """
+    Ask agent that retrieves memories from Engram and generates answers via an LLM.
+
+    Flow: query -> Engram search (scoped by tenant_id) -> build context -> LLM answer.
+
+    This agent does not extend BaseAgentBuilder since it doesn't need a Weaviate connection.
+    It follows the same interface (run/run_async returning AskResponse) so it plugs directly
+    into run_ask_queries() / run_ask_queries_async().
+
+    For multi-tenant datasets like LongMemEval, each query's tenant_id is used as the
+    Engram user_id, optionally prefixed with engram_user_id_prefix.
+    """
+
+    def __init__(
+        self,
+        engram_api_key: str,
+        engram_base_url: str,
+        engram_user_id_prefix: str = "",
+        llm_model: str = "gpt-4.1",
+        system_prompt: Optional[str] = None,
+        openai_api_key: Optional[str] = None,
+    ):
+        self.engram_client = EngramClient(
+            api_key=engram_api_key,
+            base_url=engram_base_url,
+        )
+        self.engram_user_id_prefix = engram_user_id_prefix
+        self.llm_model = llm_model
+        self.system_prompt = system_prompt or DEFAULT_SYSTEM_PROMPT
+
+        if openai_api_key:
+            self.openai_client = openai.OpenAI(api_key=openai_api_key)
+        else:
+            self.openai_client = openai.OpenAI()
+
+        print(f"EngramAskAgent initialized:")
+        print(f"  Engram base URL: {engram_base_url}")
+        print(f"  User ID prefix: {engram_user_id_prefix!r}")
+        print(f"  LLM model: {llm_model}")
+
+    def _resolve_user_id(self, tenant_id: Optional[str]) -> str:
+        """Map a query's tenant_id to an Engram user_id."""
+        if not tenant_id:
+            raise ValueError(
+                "EngramAskAgent requires tenant_id on each query to scope memory search. "
+                "Ensure your dataset loader populates InMemoryAskQuery.tenant_id."
+            )
+        return f"{self.engram_user_id_prefix}{tenant_id}"
+
+    def _build_context(self, memories) -> str:
+        """Build a context string from Engram memory search results."""
+        parts = []
+        for i, memory in enumerate(memories, 1):
+            parts.append(f"Memory {i}: {memory.content}")
+        return "\n".join(parts)
+
+    def _generate_answer(self, question: str, context: str) -> str:
+        """Generate an answer from the retrieved memories using an LLM."""
+        user_message = (
+            f"Based on the following memories, answer the question.\n\n"
+            f"Memories:\n{context}\n\n"
+            f"Question: {question}"
+        )
+        response = self.openai_client.chat.completions.create(
+            model=self.llm_model,
+            messages=[
+                {"role": "system", "content": self.system_prompt},
+                {"role": "user", "content": user_message},
+            ],
+        )
+        return response.choices[0].message.content or ""
+
+    def run(
+        self,
+        query: str,
+        oracle_context_id: Optional[str] = None,
+        tenant_id: Optional[str] = None,
+    ) -> AskResponse:
+        """
+        Run a synchronous ask query via Engram retrieval + LLM generation.
+
+        Args:
+            query: The question to answer.
+            oracle_context_id: Unused, kept for interface compatibility.
+            tenant_id: Maps to Engram user_id for scoping memory search.
+        """
+        user_id = self._resolve_user_id(tenant_id)
+        memories = self.engram_client.memories.search(
+            query=query,
+            user_id=user_id,
+        )
+        context = self._build_context(memories)
+        answer = self._generate_answer(query, context)
+
+        return AskResponse(
+            final_answer=answer,
+            raw_response={"memories": [m.content for m in memories], "answer": answer},
+        )
+
+    async def run_async(
+        self,
+        query: str,
+        oracle_context_id: Optional[str] = None,
+        tenant_id: Optional[str] = None,
+    ) -> AskResponse:
+        """
+        Run an async ask query. Currently wraps the sync implementation.
+
+        Args:
+            query: The question to answer.
+            oracle_context_id: Unused, kept for interface compatibility.
+            tenant_id: Maps to Engram user_id for scoping memory search.
+        """
+        import asyncio
+        return await asyncio.to_thread(self.run, query, oracle_context_id, tenant_id)
+
+    async def initialize_async(self):
+        """No-op — Engram client is initialized in __init__."""
+        pass
+
+    async def close_async(self):
+        """No-op — no persistent connections to close."""
+        pass
+
+    def close_sync(self):
+        """No-op — no persistent connections to close."""
+        pass
diff --git a/query_agent_benchmarking/ask_benchmark_run.py b/query_agent_benchmarking/ask_benchmark_run.py
@@ -10,7 +10,7 @@
 from pathlib import Path
 from typing import Optional, Any, Union
 
-from query_agent_benchmarking.agent import AskAgentBuilder
+from query_agent_benchmarking.agent import AskAgentBuilder, EngramAskAgent
 from query_agent_benchmarking.models import (
     DocsCollection,
     AskQueriesCollection,
@@ -51,8 +51,11 @@ async def _run_ask_eval(config: dict[str, Any]) -> dict[str, Any]:
     dataset_name = config.get("ask_dataset")
     docs_collection = config.get("docs_collection")
     queries_input = config.get("queries")
-
-    # Load queries
+
+    # Load queries — fall back to ask_dataset if queries is not explicitly provided
+    if not queries_input and dataset_name:
+        queries_input = dataset_name
+
     if queries_input:
         if isinstance(queries_input, AskQueriesCollection):
             # Custom Weaviate collection
@@ -88,7 +91,7 @@ async def _run_ask_eval(config: dict[str, Any]) -> dict[str, Any]:
                 f"Got: {type(queries_input)}"
             )
     else:
-        raise ValueError("Must provide 'queries' for ask evaluation")
+        raise ValueError("Must provide 'queries' or 'ask_dataset' for ask evaluation")
 
     # Determine dataset identifier
     if docs_collection:
@@ -138,7 +141,16 @@ async def _run_ask_eval(config: dict[str, Any]) -> dict[str, Any]:
     # Add agent_name to config for result serialization
     config["agent_name"] = agent_name
 
-    if docs_collection:
+    if agent_name == "engram":
+        import os
+        ask_agent = EngramAskAgent(
+            engram_api_key=config.get("engram_api_key") or os.getenv("ENGRAM_API_KEY", ""),
+            engram_base_url=config.get("engram_base_url") or os.getenv("ENGRAM_BASE_URL", ""),
+            engram_user_id_prefix=config.get("engram_user_id_prefix", ""),
+            llm_model=config.get("engram_llm_model", "gpt-4.1"),
+            system_prompt=system_prompt,
+        )
+    elif docs_collection:
         ask_agent = AskAgentBuilder(
             agent_name=agent_name,
             docs_collection=docs_collection,

diff --git a/query_agent_benchmarking/benchmark-config.yml b/query_agent_benchmarking/benchmark-config.yml
@@ -11,13 +11,14 @@ search_target: "image_content_weaviate"
 # Ask Mode Settings
 # =============================================================================
 ask_agent_name: "query-agent-ask"
-ask_dataset: multihoprag
-queries: "multihoprag"
+ask_dataset: longmemeval-s
 
 # LLM Judge configuration
 judge_model: "openai/gpt-4.1"
 ensemble_k: 3
 
+engram_base_url: https://dev-engram.labs.weaviate.io
+
 # =============================================================================
 # Shared Settings
 # =============================================================================

diff --git a/query_agent_benchmarking/config.py b/query_agent_benchmarking/config.py
@@ -33,6 +33,7 @@
 
 supported_ask_datasets = (
     "irpapers",  # Uses IRPapers_Default collection
+    "longmemeval-s",  # Uses Engram memory layer or LongmemevalS_Default collection
     "multihoprag",  # Uses MultiHopRAG_Default collection
     "officeqa",  # Uses OfficeQA_Default collection (local PDFs)
 )

diff --git a/query_agent_benchmarking/database/__init__.py b/query_agent_benchmarking/database/__init__.py
@@ -7,6 +7,7 @@
     create_collection_with_vector_config,
     get_vector_config,
 )
+from .engram_loader import engram_loader
 
 __all__ = [
     "DatasetSpec",
@@ -16,4 +17,5 @@
     "database_loader",
     "create_collection_with_vector_config",
     "get_vector_config",
+    "engram_loader",
 ]
diff --git a/query_agent_benchmarking/database/database_loader.py b/query_agent_benchmarking/database/database_loader.py
@@ -122,15 +122,23 @@ def create_collection_with_vector_config(
 # Public API: primary entry point used by `scripts/populate-db.py` and package users.
 def database_loader(recreate: bool = True, tag: str = "Default") -> None:
     """
-    Load dataset from config and populate Weaviate collection.
-    
+    Load dataset from config and populate Weaviate collection (or Engram if use_engram is set).
+
     Args:
         recreate: Whether to drop existing collection before creating
         tag: Suffix to add to collection name
     """
     config_path = Path(__file__).parent / "database_loader_config.yml"
     config = load_config(config_path)
 
+    if config.get("use_engram", False):
+        from .engram_loader import engram_loader
+        engram_loader(
+            dataset_name=config.get("dataset_name"),
+            engram_base_url=config.get("engram_base_url"),
+        )
+        return
+
     # Get provider headers for all configured embedding providers.
     headers = _resolve_provider_headers(
         embedding_providers=config.get("embedding_providers"),

diff --git a/query_agent_benchmarking/database/database_loader_config.yml b/query_agent_benchmarking/database/database_loader_config.yml
@@ -1,4 +1,4 @@
-dataset_name: irpapers
+dataset_name: longmemeval-s
 
 # NOTE: This is a work in progess, need a better abstraction for multiple models from the same provider.
 # The problem is X_weaviate_snowflake_... ends up being really verbose when you need to
@@ -17,3 +17,7 @@ ksim: 4
 dprojections: 16
 repetitions: 10
 ef: 500
+
+# Engram parameters
+use_engram: True
+engram_base_url: https://dev-engram.labs.weaviate.io