Skip to content

Commit 407e27a

Browse files
committed
feat: add the knowledge assistant example
1 parent e8fa0fd commit 407e27a

33 files changed

+9754
-14
lines changed

.env.example

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,14 @@
11
SESSION_STORAGE_TYPE=memory
22
# SESSION_STORAGE_TYPE=db
33

4+
# example for sqlite
45
ADK_DATABASE_URL=sqlite:///./chatkit.db
56

7+
# example for postgres
8+
# ADK_DATABASE_URL=postgresql://POSTGRES_USERNAME:POSTGRES_PASSWORD@POSTGRES_HOST:POSTGRES_PORT/DB_NAME
9+
610
gpt41_agent={"llm":{"model_name": "azure/gpt-4.1", "provider_args" : {"api_key": "xx", "api_base": "", "api_version": "2025-03-01-preview"}}}
711
gpt41_mini_agent={"llm":{"model_name": "azure/gpt-4.1-mini", "provider_args" : {"api_key": "xx", "api_base": "", "api_version": "2025-03-01-preview"}}}
812

913

14+
DATA_DIR=/workspaces/adk-chatkit/examples/data

examples/backend/pyproject.toml

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,17 @@ dependencies = [
1010
"fastapi-cognito>=2.9.0",
1111
"pydantic>=2.11.7",
1212
"pydantic-settings>=2.10.1",
13-
"google-adk>=1.16.0",
13+
"google-adk>=1.16.0",
1414
"litellm>=1.76.3",
1515
"adk-chatkit",
16+
"langchain-community>=0.4",
17+
"langchain-core>=1.0.0",
18+
"lancedb>=0.25.2",
19+
"langchain-openai>=1.0.1",
20+
"langchain-ollama>=1.0.0",
21+
"pypdf>=6.1.3",
22+
"beautifulsoup4>=4.14.2",
23+
"lxml>=6.0.2",
1624
]
1725

1826
[tool.ruff]

examples/backend/src/backend/_app.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import functools
2-
import logging
32
from contextlib import asynccontextmanager
43
from typing import AsyncContextManager, AsyncGenerator, Callable, Self
54

@@ -12,10 +11,9 @@
1211
from ._runner_manager import RunnerManager
1312
from .api.facts import router as facts_router
1413
from .api.health import router as health_router
14+
from .api.knowledge import router as knowledge_router
1515
from .api.support import router as support_router
1616

17-
_LOGGER = logging.getLogger(__name__)
18-
1917

2018
@asynccontextmanager
2119
async def internal_lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
@@ -56,3 +54,4 @@ def __init__(
5654
self.include_router(health_router, tags=["healthcheck"])
5755
self.include_router(support_router, prefix="/support", tags=["support"])
5856
self.include_router(facts_router, prefix="/facts", tags=["facts"])
57+
self.include_router(knowledge_router, prefix="/knowledge", tags=["knowledge"])

examples/backend/src/backend/_config.py

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,37 @@
11
import os
22
from enum import Enum
3+
from pathlib import Path
34
from typing import Annotated, Any, Literal
45

5-
from pydantic import AnyUrl, BeforeValidator, computed_field
6+
from pydantic import AnyUrl, BaseModel, BeforeValidator, SecretStr, computed_field
67
from pydantic_settings import BaseSettings, SettingsConfigDict
78

89
from .agents._config import AgentConfig
910

1011

12+
class EmbeddingModelType(str, Enum):
13+
openai = "openai"
14+
azure_openai = "azure_openai"
15+
ollama = "ollama"
16+
17+
1118
class SessionStorageType(str, Enum):
1219
memory = "memory"
1320
db = "db"
1421

1522

23+
class EmbedderSettings(BaseModel):
24+
provider_type: EmbeddingModelType
25+
model_name: str
26+
api_key: SecretStr | None = None
27+
api_endpoint: str | None = None
28+
api_version: str | None = None
29+
api_deployment: str | None = None
30+
31+
chunk_size: int = 1200
32+
chunk_overlap: int = 100
33+
34+
1635
def parse_cors(v: Any) -> list[str] | str:
1736
if isinstance(v, str) and not v.startswith("["):
1837
return [i.strip() for i in v.split(",")]
@@ -56,7 +75,12 @@ def all_cors_origins(self) -> list[str]:
5675

5776
AIRLINE_APP_NAME: str = "airline"
5877
FACTS_APP_NAME: str = "facts"
78+
KNOWLEDGE_APP_NAME: str = "knowledge"
79+
80+
DATA_DIR: Path
5981

6082
SESSION_STORAGE_TYPE: SessionStorageType = SessionStorageType.memory
6183

6284
ADK_DATABASE_URL: str | None = None
85+
86+
embedder: EmbedderSettings | None = None

examples/backend/src/backend/_dishka_providers.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,13 @@
44
from google.adk.sessions.base_session_service import BaseSessionService
55
from google.adk.sessions.database_session_service import DatabaseSessionService
66
from google.adk.sessions.in_memory_session_service import InMemorySessionService
7+
from langchain_core.vectorstores import VectorStore
78

89
from ._config import SessionStorageType, Settings
910
from ._runner_manager import RunnerManager
1011
from .agents.airline import AirlineSupportChatkitServer
1112
from .agents.facts import FactsChatkitServer
13+
from .agents.knowledge import KnowledgeAssistantChatkitServer, make_vector_store
1214

1315

1416
class SessionServiceProvider(Provider):
@@ -24,6 +26,18 @@ async def get_service(self, settings: Settings) -> BaseSessionService:
2426
return InMemorySessionService() # type: ignore
2527

2628

29+
class VectorStoreProvider(Provider):
30+
scope = Scope.APP
31+
32+
settings = from_context(provides=Settings, scope=Scope.APP)
33+
34+
@provide
35+
async def get_vector_store(self, settings: Settings) -> VectorStore:
36+
if settings.embedder is None:
37+
raise ValueError("Embedder settings must be provided to create a vector store.")
38+
return make_vector_store(settings)
39+
40+
2741
def get_providers() -> list[BaseProvider]:
2842
runner_provider = Provider(scope=Scope.APP)
2943
runner_provider.from_context(Settings)
@@ -40,10 +54,16 @@ def get_providers() -> list[BaseProvider]:
4054
facts_server_provider.from_context(Settings)
4155
facts_server_provider.provide(FactsChatkitServer)
4256

57+
knowledge_server_provider = Provider(scope=Scope.APP)
58+
knowledge_server_provider.from_context(Settings)
59+
knowledge_server_provider.provide(KnowledgeAssistantChatkitServer)
60+
4361
return [
4462
runner_provider,
4563
SessionServiceProvider(),
64+
VectorStoreProvider(),
4665
adk_store_provider,
4766
airline_support_server_provider,
4867
facts_server_provider,
68+
knowledge_server_provider,
4969
]
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
from ._agent import KnowledgeAgent
2+
from ._documents import DOCUMENTS, DOCUMENTS_BY_FILENAME, DOCUMENTS_BY_ID, DOCUMENTS_BY_STEM, as_dicts
3+
from ._server import KnowledgeAssistantChatkitServer
4+
from ._vector_store import make_vector_store
5+
6+
__all__ = [
7+
"KnowledgeAgent",
8+
"KnowledgeAssistantChatkitServer",
9+
"DOCUMENTS",
10+
"DOCUMENTS_BY_ID",
11+
"DOCUMENTS_BY_FILENAME",
12+
"DOCUMENTS_BY_STEM",
13+
"as_dicts",
14+
"make_vector_store",
15+
]
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
from typing import Any
2+
3+
from adk_chatkit import remove_widgets_and_client_tool_calls
4+
from google.adk.agents.callback_context import CallbackContext
5+
from google.adk.agents.llm_agent import LlmAgent, ToolUnion
6+
from google.adk.models.lite_llm import LiteLlm
7+
from google.adk.models.llm_request import LlmRequest
8+
from google.adk.models.llm_response import LlmResponse
9+
from google.adk.tools.base_tool import BaseTool
10+
from google.adk.tools.tool_context import ToolContext
11+
from google.genai import types as genai_types
12+
13+
_INSTRUCTIONS = """You are a **Federal Reserve Knowledge Assistant agent**.
14+
15+
**Source library**
16+
You must use the following documents (refer to them by these exact filenames):
17+
- `01_fomc_statement_2025-09-17.html`
18+
- `02_implementation_note_2025-09-17.html`
19+
- `03_sep_tables_2025-09-17.pdf`
20+
- `04_sep_tables_2025-09-17.html`
21+
- `05_press_conference_transcript_2025-09-17.pdf`
22+
- `06_bls_cpi_2025-08.html`
23+
- `07_bea_gdp_q2_2025_second_estimate.pdf`
24+
- `08_fed_mpr_2025-06.pdf`
25+
26+
These files contain the definitive information about the September 2025 FOMC meeting, projections, and related economic indicators.
27+
28+
**Your task**
29+
- Always call the `file_search` tool before responding. Use the passages it returns as your evidence.
30+
- Compose a concise answer (2-4 sentences) grounded **only** in the retrieved passages.
31+
- Every factual sentence must include a citation in the format `(filename, page/section)` using the filenames listed above. If you cannot provide such a citation, say "I don't see that in the knowledge base." instead of guessing.
32+
- After the answer, optionally list key supporting bullets—each bullet needs its own citation.
33+
- Finish with a `Sources:` section listing each supporting document on its own line: `- filename (page/section)`. Use the exact filenames shown above so the client can highlight the source documents. Do not omit this section even if there is only one source.
34+
35+
**Interaction guardrails**
36+
1. Ask for clarification when the question is ambiguous.
37+
2. Explain when the knowledge base does not contain the requested information.
38+
3. Never rely on external knowledge or unstated assumptions.
39+
40+
Limit the entire response with citation to 2-3 sentences.
41+
42+
""".strip()
43+
44+
45+
class KnowledgeAgent(LlmAgent):
46+
def __init__(
47+
self,
48+
llm: LiteLlm,
49+
tools: list[ToolUnion],
50+
generate_content_config: genai_types.GenerateContentConfig | None = None,
51+
) -> None:
52+
self._llm = llm
53+
54+
super().__init__(
55+
name="knowledge_assistant",
56+
description="Federal Reserve Knowledge Assistan",
57+
model=self._llm,
58+
instruction=_INSTRUCTIONS,
59+
tools=tools,
60+
before_model_callback=[remove_widgets_and_client_tool_calls],
61+
generate_content_config=generate_content_config,
62+
)
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
from __future__ import annotations
2+
3+
from dataclasses import asdict, dataclass
4+
from pathlib import Path
5+
from typing import Iterable
6+
7+
8+
def _normalise(value: str) -> str:
9+
return value.strip().lower()
10+
11+
12+
def _slugify(value: str) -> str:
13+
return "".join(ch for ch in value.lower() if ch.isalnum())
14+
15+
16+
@dataclass(frozen=True, slots=True)
17+
class DocumentMetadata:
18+
id: str
19+
filename: str
20+
title: str
21+
description: str | None = None
22+
23+
@property
24+
def stem(self) -> str:
25+
return Path(self.filename).stem
26+
27+
28+
DOCUMENTS: tuple[DocumentMetadata, ...] = (
29+
DocumentMetadata(
30+
id="fomc_statement",
31+
filename="01_fomc_statement_2025-09-17.html",
32+
title="FOMC Statement — September 17, 2025",
33+
description="Official statement outlining the Federal Reserve's policy decision and rationale.",
34+
),
35+
DocumentMetadata(
36+
id="implementation_note",
37+
filename="02_implementation_note_2025-09-17.html",
38+
title="Implementation Note — September 17, 2025",
39+
description="Operational guidance on how the policy decision will be implemented across facilities.",
40+
),
41+
DocumentMetadata(
42+
id="sep_tables_pdf",
43+
filename="03_sep_tables_2025-09-17.pdf",
44+
title="Summary of Economic Projections Tables (PDF)",
45+
description="PDF tables summarising participants' projections for key economic indicators.",
46+
),
47+
DocumentMetadata(
48+
id="sep_tables_html",
49+
filename="04_sep_tables_2025-09-17.html",
50+
title="Summary of Economic Projections Tables (HTML)",
51+
description="HTML tables summarising participants' projections for key economic indicators.",
52+
),
53+
DocumentMetadata(
54+
id="press_conference_transcript",
55+
filename="05_press_conference_transcript_2025-09-17.pdf",
56+
title="Press Conference Transcript — September 17, 2025",
57+
description="Chair Powell's press conference transcript following the September 2025 FOMC meeting.",
58+
),
59+
DocumentMetadata(
60+
id="bls_cpi_august",
61+
filename="06_bls_cpi_2025-08.html",
62+
title="BLS Consumer Price Index — August 2025",
63+
description="Consumer Price Index report providing the latest inflation readings.",
64+
),
65+
DocumentMetadata(
66+
id="bea_gdp_q2_second_estimate",
67+
filename="07_bea_gdp_q2_2025_second_estimate.pdf",
68+
title="BEA GDP Second Estimate — Q2 2025",
69+
description="Bureau of Economic Analysis second estimate of GDP for the second quarter of 2025.",
70+
),
71+
DocumentMetadata(
72+
id="monetary_policy_report",
73+
filename="08_fed_mpr_2025-06.pdf",
74+
title="Monetary Policy Report — June 2025",
75+
description="Semiannual Monetary Policy Report submitted to Congress in June 2025.",
76+
),
77+
)
78+
DOCUMENTS_BY_ID: dict[str, DocumentMetadata] = {doc.id: doc for doc in DOCUMENTS}
79+
80+
DOCUMENTS_BY_FILENAME: dict[str, DocumentMetadata] = {_normalise(doc.filename): doc for doc in DOCUMENTS}
81+
82+
DOCUMENTS_BY_STEM: dict[str, DocumentMetadata] = {_normalise(doc.stem): doc for doc in DOCUMENTS}
83+
84+
DOCUMENTS_BY_SLUG: dict[str, DocumentMetadata] = {}
85+
for document in DOCUMENTS:
86+
for candidate in {
87+
document.id,
88+
document.filename,
89+
document.stem,
90+
document.title,
91+
document.description or "",
92+
}:
93+
if candidate:
94+
DOCUMENTS_BY_SLUG.setdefault(_slugify(candidate), document)
95+
96+
97+
def as_dicts(documents: Iterable[DocumentMetadata]) -> list[dict[str, str | None]]:
98+
return [asdict(document) for document in documents]

0 commit comments

Comments
 (0)