Skip to content

Commit ef37add

Browse files
authored
Merge branch 'main' into release/v2.5.4
2 parents e99404d + 15108d1 commit ef37add

File tree

113 files changed

+6687
-1167
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

113 files changed

+6687
-1167
lines changed

.github/workflows/test_on_release.yml

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -848,6 +848,26 @@ jobs:
848848
./libs/agno/tests/integration/teams/test_metrics.py \
849849
./libs/agno/tests/integration/teams/test_parser_model.py
850850
851+
verify-split-test-coverage:
852+
runs-on: ubuntu-latest
853+
steps:
854+
- uses: actions/checkout@v3
855+
- name: Verify all split integration tests are assigned to CI jobs
856+
run: |
857+
status=0
858+
for dir in teams knowledge; do
859+
grep -oE "\./libs/agno/tests/integration/${dir}/[^ \\\\]+(/[^ \\\\]+)*\.py" \
860+
.github/workflows/test_on_release.yml \
861+
| grep -v '^\s*#' | sort -u > /tmp/wf.txt
862+
find "libs/agno/tests/integration/${dir}" -name 'test_*.py' \
863+
| sed 's|^|./|' | sort > /tmp/repo.txt
864+
if ! diff /tmp/repo.txt /tmp/wf.txt; then
865+
echo "::error::${dir} test coverage mismatch — see diff above"
866+
status=1
867+
fi
868+
done
869+
exit $status
870+
851871
test-workflows:
852872
runs-on: ubuntu-latest
853873
strategy:
@@ -943,7 +963,9 @@ jobs:
943963
./libs/agno/tests/integration/knowledge/test_json_knowledge.py \
944964
./libs/agno/tests/integration/knowledge/test_md_knowledge.py \
945965
./libs/agno/tests/integration/knowledge/test_pdf_knowledge.py \
946-
./libs/agno/tests/integration/knowledge/test_text_knowledge.py
966+
./libs/agno/tests/integration/knowledge/test_pptx_knowledge.py \
967+
./libs/agno/tests/integration/knowledge/test_text_knowledge.py \
968+
./libs/agno/tests/integration/knowledge/test_async_knowledge_retriever.py
947969
948970
test-knowledge-2:
949971
runs-on: ubuntu-latest
@@ -977,7 +999,8 @@ jobs:
977999
python -m pytest ./libs/agno/tests/integration/knowledge/test_arxiv_knowledge.py \
9781000
./libs/agno/tests/integration/knowledge/test_firecrawl_knowledge.py \
9791001
./libs/agno/tests/integration/knowledge/test_website_knowledge.py \
980-
./libs/agno/tests/integration/knowledge/test_youtube_knowledge.py
1002+
./libs/agno/tests/integration/knowledge/test_youtube_knowledge.py \
1003+
./libs/agno/tests/integration/knowledge/filters/test_agentic_filtering.py
9811004
9821005
# Run A2A tests (isolated due to dependency conflicts)
9831006
test-a2a:
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
"""
2+
Combined Metrics
3+
=============================
4+
5+
When an agent uses multiple background features, each model's
6+
calls are tracked under separate detail keys:
7+
- "model" for the agent's own calls
8+
- "reasoning_model" for reasoning manager calls
9+
- "compression_model" for compression manager calls
10+
- "output_model" for output model calls
11+
- "memory_model" for memory manager calls
12+
- "culture_model" for culture manager calls
13+
- "session_summary_model" for session summary calls
14+
- "eval_model" for evaluation hook calls
15+
16+
This example shows all detail keys and session-level metrics.
17+
"""
18+
19+
from typing import List
20+
21+
from agno.agent import Agent
22+
from agno.compression.manager import CompressionManager
23+
from agno.culture.manager import CultureManager
24+
from agno.db.postgres import PostgresDb
25+
from agno.eval.agent_as_judge import AgentAsJudgeEval
26+
from agno.memory.manager import MemoryManager
27+
from agno.models.openai import OpenAIChat
28+
from agno.session.summary import SessionSummaryManager
29+
from agno.tools.yfinance import YFinanceTools
30+
from pydantic import BaseModel, Field
31+
from rich.pretty import pprint
32+
33+
34+
class StockSummary(BaseModel):
35+
ticker: str = Field(..., description="Stock ticker symbol")
36+
summary: str = Field(..., description="Brief summary of the stock")
37+
key_metrics: List[str] = Field(..., description="Key financial metrics")
38+
39+
40+
# ---------------------------------------------------------------------------
41+
# Create Agent
42+
# ---------------------------------------------------------------------------
43+
db = PostgresDb(db_url="postgresql+psycopg://ai:ai@localhost:5532/ai")
44+
45+
eval_hook = AgentAsJudgeEval(
46+
name="Quality Check",
47+
model=OpenAIChat(id="gpt-4o-mini"),
48+
criteria="Response should be helpful and accurate",
49+
scoring_strategy="binary",
50+
)
51+
52+
agent = Agent(
53+
model=OpenAIChat(id="gpt-4o-mini"),
54+
tools=[YFinanceTools(enable_stock_price=True, enable_company_info=True)],
55+
reasoning_model=OpenAIChat(id="gpt-4o-mini"),
56+
reasoning=True,
57+
compression_manager=CompressionManager(
58+
model=OpenAIChat(id="gpt-4o-mini"),
59+
compress_tool_results_limit=1,
60+
),
61+
output_model=OpenAIChat(id="gpt-4o-mini"),
62+
output_schema=StockSummary,
63+
structured_outputs=True,
64+
memory_manager=MemoryManager(model=OpenAIChat(id="gpt-4o-mini"), db=db),
65+
update_memory_on_run=True,
66+
culture_manager=CultureManager(model=OpenAIChat(id="gpt-4o-mini"), db=db),
67+
update_cultural_knowledge=True,
68+
session_summary_manager=SessionSummaryManager(model=OpenAIChat(id="gpt-4o-mini")),
69+
enable_session_summaries=True,
70+
post_hooks=[eval_hook],
71+
db=db,
72+
session_id="combined-metrics-demo",
73+
)
74+
75+
# ---------------------------------------------------------------------------
76+
# Run Agent
77+
# ---------------------------------------------------------------------------
78+
if __name__ == "__main__":
79+
run_response = agent.run(
80+
"Get the stock price and company info for NVDA and summarize it."
81+
)
82+
83+
print("=" * 50)
84+
print("RUN METRICS")
85+
print("=" * 50)
86+
pprint(run_response.metrics)
87+
88+
print("=" * 50)
89+
print("MODEL DETAILS")
90+
print("=" * 50)
91+
if run_response.metrics and run_response.metrics.details:
92+
for model_type, model_metrics_list in run_response.metrics.details.items():
93+
print(f"\n{model_type}:")
94+
for model_metric in model_metrics_list:
95+
pprint(model_metric)
96+
97+
print("=" * 50)
98+
print("SESSION METRICS")
99+
print("=" * 50)
100+
session_metrics = agent.get_session_metrics()
101+
if session_metrics:
102+
pprint(session_metrics)
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
"""
2+
Culture Manager Metrics
3+
=============================
4+
5+
When an agent uses a CultureManager, the culture model's
6+
calls are tracked under the "culture_model" detail key.
7+
"""
8+
9+
from agno.agent import Agent
10+
from agno.culture.manager import CultureManager
11+
from agno.db.postgres import PostgresDb
12+
from agno.models.openai import OpenAIChat
13+
from rich.pretty import pprint
14+
15+
# ---------------------------------------------------------------------------
16+
# Create Agent
17+
# ---------------------------------------------------------------------------
18+
db = PostgresDb(db_url="postgresql+psycopg://ai:ai@localhost:5532/ai")
19+
20+
agent = Agent(
21+
model=OpenAIChat(id="gpt-4o-mini"),
22+
culture_manager=CultureManager(model=OpenAIChat(id="gpt-4o-mini"), db=db),
23+
update_cultural_knowledge=True,
24+
db=db,
25+
)
26+
27+
# ---------------------------------------------------------------------------
28+
# Run Agent
29+
# ---------------------------------------------------------------------------
30+
if __name__ == "__main__":
31+
run_response = agent.run(
32+
"Our team always does code reviews before merging. We pair program on complex features."
33+
)
34+
35+
print("=" * 50)
36+
print("RUN METRICS")
37+
print("=" * 50)
38+
pprint(run_response.metrics)
39+
40+
print("=" * 50)
41+
print("MODEL DETAILS")
42+
print("=" * 50)
43+
if run_response.metrics and run_response.metrics.details:
44+
for model_type, model_metrics_list in run_response.metrics.details.items():
45+
print(f"\n{model_type}:")
46+
for model_metric in model_metrics_list:
47+
pprint(model_metric)
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
"""
2+
Multi-Model Metrics
3+
=============================
4+
5+
When an agent uses a MemoryManager, each manager's model calls
6+
are tracked under separate detail keys in metrics.details.
7+
8+
This example shows the "model" vs "memory_model" breakdown.
9+
"""
10+
11+
from agno.agent import Agent
12+
from agno.db.postgres import PostgresDb
13+
from agno.memory.manager import MemoryManager
14+
from agno.models.openai import OpenAIChat
15+
from rich.pretty import pprint
16+
17+
# ---------------------------------------------------------------------------
18+
# Create Agent
19+
# ---------------------------------------------------------------------------
20+
db = PostgresDb(db_url="postgresql+psycopg://ai:ai@localhost:5532/ai")
21+
22+
agent = Agent(
23+
model=OpenAIChat(id="gpt-4o-mini"),
24+
memory_manager=MemoryManager(model=OpenAIChat(id="gpt-4o-mini"), db=db),
25+
update_memory_on_run=True,
26+
db=db,
27+
)
28+
29+
# ---------------------------------------------------------------------------
30+
# Run Agent
31+
# ---------------------------------------------------------------------------
32+
if __name__ == "__main__":
33+
run_response = agent.run(
34+
"My name is Alice and I work at Google as a senior engineer."
35+
)
36+
37+
print("=" * 50)
38+
print("RUN METRICS")
39+
print("=" * 50)
40+
pprint(run_response.metrics)
41+
42+
print("=" * 50)
43+
print("MODEL DETAILS")
44+
print("=" * 50)
45+
if run_response.metrics and run_response.metrics.details:
46+
for model_type, model_metrics_list in run_response.metrics.details.items():
47+
print(f"\n{model_type}:")
48+
for model_metric in model_metrics_list:
49+
pprint(model_metric)
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
"""
2+
Demonstrates session-level metrics that accumulate across multiple runs.
3+
"""
4+
5+
from agno.agent import Agent
6+
from agno.db.postgres import PostgresDb
7+
from agno.models.openai import OpenAIChat
8+
from rich.pretty import pprint
9+
10+
# ---------------------------------------------------------------------------
11+
# Setup
12+
# ---------------------------------------------------------------------------
13+
db_url = "postgresql+psycopg://ai:ai@localhost:5532/ai"
14+
db = PostgresDb(db_url=db_url, session_table="agent_metrics_sessions")
15+
16+
# ---------------------------------------------------------------------------
17+
# Create Agent
18+
# ---------------------------------------------------------------------------
19+
agent = Agent(
20+
model=OpenAIChat(id="gpt-4o-mini"),
21+
db=db,
22+
session_id="session_metrics_demo",
23+
add_history_to_context=True,
24+
)
25+
26+
# ---------------------------------------------------------------------------
27+
# Run Agent
28+
# ---------------------------------------------------------------------------
29+
if __name__ == "__main__":
30+
# First run
31+
run_output_1 = agent.run("What is the capital of France?")
32+
print("=" * 50)
33+
print("RUN 1 METRICS")
34+
print("=" * 50)
35+
pprint(run_output_1.metrics)
36+
37+
# Second run on the same session
38+
run_output_2 = agent.run("What about Germany?")
39+
print("=" * 50)
40+
print("RUN 2 METRICS")
41+
print("=" * 50)
42+
pprint(run_output_2.metrics)
43+
44+
# Session metrics aggregate both runs
45+
print("=" * 50)
46+
print("SESSION METRICS (accumulated)")
47+
print("=" * 50)
48+
session_metrics = agent.get_session_metrics()
49+
pprint(session_metrics)
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
"""
2+
Session Summary Metrics
3+
=============================
4+
5+
When an agent uses a SessionSummaryManager, the summary model's token
6+
usage is tracked separately under the "session_summary_model" detail key.
7+
8+
This lets you see how many tokens are spent summarizing the session
9+
versus the agent's own model calls.
10+
11+
The session summary runs after each interaction to maintain a concise
12+
summary of the conversation so far.
13+
"""
14+
15+
from agno.agent import Agent
16+
from agno.db.postgres import PostgresDb
17+
from agno.models.openai import OpenAIChat
18+
from agno.session.summary import SessionSummaryManager
19+
from rich.pretty import pprint
20+
21+
# ---------------------------------------------------------------------------
22+
# Create Agent
23+
# ---------------------------------------------------------------------------
24+
db = PostgresDb(db_url="postgresql+psycopg://ai:ai@localhost:5532/ai")
25+
26+
agent = Agent(
27+
model=OpenAIChat(id="gpt-4o-mini"),
28+
session_summary_manager=SessionSummaryManager(
29+
model=OpenAIChat(id="gpt-4o-mini"),
30+
),
31+
enable_session_summaries=True,
32+
db=db,
33+
session_id="session-summary-metrics-demo",
34+
)
35+
36+
# ---------------------------------------------------------------------------
37+
# Run Agent
38+
# ---------------------------------------------------------------------------
39+
if __name__ == "__main__":
40+
# First run
41+
run_response_1 = agent.run("My name is Alice and I work at Google.")
42+
print("=" * 50)
43+
print("RUN 1 METRICS")
44+
print("=" * 50)
45+
pprint(run_response_1.metrics)
46+
47+
# Second run - triggers session summary
48+
run_response_2 = agent.run("I also enjoy hiking on weekends.")
49+
print("=" * 50)
50+
print("RUN 2 METRICS")
51+
print("=" * 50)
52+
pprint(run_response_2.metrics)
53+
54+
print("=" * 50)
55+
print("MODEL DETAILS (Run 2)")
56+
print("=" * 50)
57+
if run_response_2.metrics and run_response_2.metrics.details:
58+
for model_type, model_metrics_list in run_response_2.metrics.details.items():
59+
print(f"\n{model_type}:")
60+
for model_metric in model_metrics_list:
61+
pprint(model_metric)
62+
63+
print("=" * 50)
64+
print("SESSION METRICS (accumulated)")
65+
print("=" * 50)
66+
session_metrics = agent.get_session_metrics()
67+
if session_metrics:
68+
pprint(session_metrics)

0 commit comments

Comments
 (0)