Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 57 additions & 0 deletions .github/workflows/e2e-examples-test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
name: e2e-examples-test

permissions:
contents: read

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

on:
workflow_dispatch: { }
pull_request:
types: [labeled, synchronize]
push:
branches:
- main
paths:
- 'apps/agentstack-server/**'
- 'apps/agentstack-sdk-py/**'
- 'examples/**'
- 'helm/**'
jobs:
e2e-examples-test:
if: >-
github.event_name != 'pull_request' ||
contains(github.event.pull_request.labels.*.name, 'e2e-examples')
timeout-minutes: 25
runs-on: ubuntu-latest
env:
AGENTSTACK__HOME: ${{ github.workspace }}/.agentstack
steps:
- uses: actions/checkout@v4
- name: Maximize build space
uses: ./.github/actions/maximize-build-space
with:
root-reserve-mb: 15360
temp-reserve-mb: 2048
swap-size-mb: 1024
remove-dotnet: 'true'
- name: "Set up Lima"
uses: lima-vm/lima-actions/setup@v1
id: lima-actions-setup
- name: "Cache ~/.cache/lima"
uses: actions/cache@v4
with:
path: ~/.cache/lima
key: lima-${{ steps.lima-actions-setup.outputs.version }}
- uses: actions/checkout@v4
- uses: ./.github/actions/setup
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- run: mise run agentstack-server:test:e2e-examples
env:
LLM_API_BASE: "${{ secrets.OPENAI_API_BASE }}"
LLM_MODEL: "${{ vars.OPENAI_MODEL }}"
LLM_API_KEY: "${{ secrets.OPENAI_API_KEY }}"
- run: uv cache prune --ci
85 changes: 84 additions & 1 deletion apps/agentstack-server/tasks.toml
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,90 @@ echo "Waiting for Keycloak provision job to complete..."
kubectl wait --for=condition=complete --timeout=300s job/keycloak-provision

kubectl port-forward svc/postgresql 5432:5432 2>/dev/null 1>&2 &
uv run pytest -m e2e
uv run pytest -m e2e --ignore=tests/e2e/examples
result=$?

if [ $result -ne 0 ]; then
echo "Tests failed. Checking pod status..."
echo "------------- pods --------------"
kubectl get pod
echo "------------ events -------------"
kubectl get event
fi

if [ "$NO_CLEAN" != "true" ]; then
{{ mise_bin }} run agentstack-cli:run -- platform delete --vm-name=${VM_NAME}
else
{{ mise_bin }} run agentstack-cli:run -- platform stop --vm-name=${VM_NAME}
fi

rm -f "$CONFIG_FILE"

kill %1
exit $result
"""

["agentstack-server:test:e2e-examples"]
dir = "{{config_root}}/apps/agentstack-server"
usage = 'flag "--no-clean"'
run = """
#!/bin/bash
VM_NAME=e2e-examples-test-run

export AGENTSTACK__USERNAME=admin
export AGENTSTACK__PASSWORD=admin

NO_CLEAN="${usage_no_clean:-false}"
if [ "$NO_CLEAN" != "true" ]; then
{{ mise_bin }} run agentstack:stop-all
{{ mise_bin }} run agentstack:delete --vm-name=${VM_NAME}
curl http://localhost:8333 >/dev/null 2>&1 && echo "Another instance at localhost:8333 is already running" && exit 2
fi

CONFIG_FILE="/tmp/config_e2e_test_$(date +%s).yaml"

echo '
externalRegistries: null
ui:
enabled: false
auth:
enabled: true
docling:
enabled: true
connector:
presets:
- url: mcp+stdio://test
stdio:
image: mcp/aws-documentation
metadata:
name: Test MCP Server
keycloak:
auth:
seedAgentstackUsers:
- username: admin
password: admin
firstName: Admin
lastName: User
email: [email protected]
roles: ["agentstack-admin"]
' > "$CONFIG_FILE"

{{ mise_bin }} run agentstack:start --vm-name=${VM_NAME} -f "$CONFIG_FILE" --set ui.enabled=false


eval "$( {{ mise_bin }} run agentstack:shell --vm-name="$VM_NAME" )"

export DB_URL="postgresql+asyncpg://agentstack-user:password@localhost:5432/agentstack"
export LLM_API_BASE="${LLM_API_BASE:-http://host.docker.internal:11434/v1}"

echo "Waiting for agentstack-server deployment to be ready..."
kubectl wait --for=condition=available --timeout=300s deployment/agentstack-server

echo "Waiting for Keycloak provision job to complete..."
kubectl wait --for=condition=complete --timeout=300s job/keycloak-provision

kubectl port-forward svc/postgresql 5432:5432 2>/dev/null 1>&2 &
uv run pytest -m e2e tests/e2e/examples
result=$?

if [ $result -ne 0 ]; then
Expand Down
2 changes: 1 addition & 1 deletion apps/agentstack-server/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def pytest_configure(config):
async def _get_kr8s_client():
api = await kr8s.asyncio.api()
kubeconfig = api.auth.kubeconfig
kubeconfig_regex = r".*/.agentstack/lima/(agentstack-local-dev|e2e-test-run|integration-test-run)/copied-from-guest/kubeconfig.yaml$"
kubeconfig_regex = r".*/.agentstack/lima/(agentstack-local-dev|e2e-test-run|e2e-examples-test-run|integration-test-run)/copied-from-guest/kubeconfig.yaml$"
if not re.match(kubeconfig_regex, str(kubeconfig.path)):
raise ValueError(
f"Preventing kubeconfig operations with invalid kubeconfig path.\n"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# Copyright 2025 © BeeAI a Series of LF Projects, LLC
# SPDX-License-Identifier: Apache-2.0

import pytest

from agentstack_server.domain.constants import AGENT_DETAIL_EXTENSION_URI
from agentstack_server.utils.a2a import get_extension
from tests.e2e.examples.conftest import run_example

pytestmark = pytest.mark.e2e


@pytest.mark.usefixtures("clean_up", "setup_platform_client")
async def test_basic_configuration_example(subtests, get_final_task_from_stream, a2a_client_factory):
example_path = "agent-integration/agent-details/basic-configuration"

async with run_example(example_path, a2a_client_factory) as running_example:
agent_card = running_example.agent_card

with subtests.test("agent card has correct name"):
assert agent_card.name == "Example Research Assistant"

with subtests.test("agent card has two skills"):
assert agent_card.skills is not None
assert len(agent_card.skills) == 2

skill_ids = {skill.id for skill in agent_card.skills}
assert skill_ids == {"research", "summarization"}

with subtests.test("agent detail extension is configured"):
agent_detail = get_extension(agent_card, AGENT_DETAIL_EXTENSION_URI)
assert agent_detail is not None

params = agent_detail.model_dump()["params"]
assert params["interaction_mode"] == "multi-turn"
assert (
params["user_greeting"] == "Hi there! I can help you research topics or summarize uploaded documents."
)
assert params["framework"] == "BeeAI Framework"
assert params["source_code_url"] == "https://github.com/example/example-research-assistant"

with subtests.test("agent detail has author info"):
agent_detail = get_extension(agent_card, AGENT_DETAIL_EXTENSION_URI)
params = agent_detail.model_dump()["params"]

assert params["author"]["name"] == "Agent Stack Team"
assert params["author"]["email"] == "[email protected]"

with subtests.test("agent detail has tools"):
agent_detail = get_extension(agent_card, AGENT_DETAIL_EXTENSION_URI)
params = agent_detail.model_dump()["params"]

tools = params["tools"]
assert len(tools) == 2

tool_names = {tool["name"] for tool in tools}
assert tool_names == {"Web Search", "Document Reader"}
Comment on lines +30 to +57
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

To improve readability and reduce code repetition, the agent_detail and params variables can be fetched once before the subtests that use them. This makes the test cleaner and more maintainable.

        agent_detail = get_extension(agent_card, AGENT_DETAIL_EXTENSION_URI)
        assert agent_detail is not None
        params = agent_detail.model_dump()["params"]

        with subtests.test("agent detail extension is configured"):
            assert params["interaction_mode"] == "multi-turn"
            assert (
                params["user_greeting"] == "Hi there! I can help you research topics or summarize uploaded documents."
            )
            assert params["framework"] == "BeeAI Framework"
            assert params["source_code_url"] == "https://github.com/example/example-research-assistant"

        with subtests.test("agent detail has author info"):
            assert params["author"]["name"] == "Agent Stack Team"
            assert params["author"]["email"] == "[email protected]"

        with subtests.test("agent detail has tools"):
            tools = params["tools"]
            assert len(tools) == 2

            tool_names = {tool["name"] for tool in tools}
            assert tool_names == {"Web Search", "Document Reader"}

Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# Copyright 2025 © BeeAI a Series of LF Projects, LLC
# SPDX-License-Identifier: Apache-2.0

import pytest
from a2a.client.helpers import create_text_message_object
from a2a.types import TaskState
from agentstack_sdk.a2a.extensions.ui.settings import SettingsExtensionSpec

from tests.e2e.examples.conftest import run_example

pytestmark = pytest.mark.e2e


@pytest.mark.usefixtures("clean_up", "setup_platform_client")
async def test_basic_settings_example(subtests, get_final_task_from_stream, a2a_client_factory):
example_path = "agent-integration/agent-settings/basic-settings"

async with run_example(example_path, a2a_client_factory) as running_example:
settings_uri = SettingsExtensionSpec.URI

with subtests.test("agent responds based on enabled thinking setting"):
message = create_text_message_object(content="Hello")
message.context_id = running_example.context.id
message.metadata = {
settings_uri: {
"values": {
"thinking_group": {
"type": "checkbox_group",
"values": {"thinking": {"value": True}},
}
}
}
}
task = await get_final_task_from_stream(running_example.client.send_message(message))

assert task.status.state == TaskState.completed, f"Fail: {task.status.message.parts[0].root.text}"
assert "Thinking mode is enabled" in task.history[-1].parts[0].root.text

with subtests.test("agent responds based on disabled thinking setting"):
message = create_text_message_object(content="Hello")
message.context_id = running_example.context.id
message.metadata = {
settings_uri: {
"values": {
"thinking_group": {
"type": "checkbox_group",
"values": {"thinking": {"value": False}},
}
}
}
}
task = await get_final_task_from_stream(running_example.client.send_message(message))

assert task.status.state == TaskState.completed, f"Fail: {task.status.message.parts[0].root.text}"
assert "Thinking mode is disabled" in task.history[-1].parts[0].root.text
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# Copyright 2025 © BeeAI a Series of LF Projects, LLC
# SPDX-License-Identifier: Apache-2.0

import pytest
from a2a.client.helpers import create_text_message_object
from a2a.types import TaskState
from agentstack_sdk.a2a.extensions.ui.canvas import CanvasExtensionSpec

from tests.e2e.examples.conftest import run_example

pytestmark = pytest.mark.e2e


@pytest.mark.usefixtures("clean_up", "setup_platform_client")
async def test_canvas_with_llm_example(subtests, get_final_task_from_stream, a2a_client_factory):
example_path = "agent-integration/canvas/canvas-with-llm"

async with run_example(example_path, a2a_client_factory) as running_example:
canvas_uri = CanvasExtensionSpec.URI

with subtests.test("agent generates code artifact"):
message = create_text_message_object(content="Write a hello world program")
message.context_id = running_example.context.id
task = await get_final_task_from_stream(running_example.client.send_message(message))

assert task.status.state == TaskState.completed, f"Fail: {task.status.message.parts[0].root.text}"

# Verify artifact is returned (the agent uses a mocked LLM response)
assert len(task.artifacts) > 0
artifact = task.artifacts[0]
assert artifact.name == "Response"

# Verify the artifact contains the expected mock response
artifact_text = "".join(part.root.text for part in artifact.parts if hasattr(part.root, "text"))
assert "Hello from LLM!" in artifact_text

with subtests.test("agent updates artifact via canvas edit"):
# Use the artifact from the previous test
artifact_id = artifact.artifact_id

# Get the artifact text to determine indices
artifact_text = "".join(part.root.text for part in artifact.parts if hasattr(part.root, "text"))

# Send edit request for a portion of the code
message = create_text_message_object(content="Change print to use f-string")
message.context_id = running_example.context.id
message.metadata = {
canvas_uri: {
"artifact_id": artifact_id,
"start_index": 0,
"end_index": min(50, len(artifact_text)), # Select first 50 chars
"description": "Change print to use f-string",
}
}
task = await get_final_task_from_stream(running_example.client.send_message(message))

assert task.status.state == TaskState.completed, f"Fail: {task.status.message.parts[0].root.text}"

# Verify updated artifact is returned
assert len(task.artifacts) > 0
updated_artifact = task.artifacts[0]

# Verify the response contains the edit prompt context
updated_text = "".join(part.root.text for part in updated_artifact.parts if hasattr(part.root, "text"))
assert "editing existing code" in updated_text.lower() or "selected" in updated_text.lower()
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# Copyright 2025 © BeeAI a Series of LF Projects, LLC
# SPDX-License-Identifier: Apache-2.0

import pytest
from a2a.client.helpers import create_text_message_object
from a2a.types import TaskState
from agentstack_sdk.a2a.extensions import CitationExtensionSpec

from tests.e2e.examples.conftest import run_example

pytestmark = pytest.mark.e2e


@pytest.mark.usefixtures("clean_up", "setup_platform_client")
async def test_citation_basic_usage_example(subtests, get_final_task_from_stream, a2a_client_factory):
example_path = "agent-integration/citations/citation-basic-usage"

async with run_example(example_path, a2a_client_factory) as running_example:
with subtests.test("agent responds with text and citation metadata"):
message = create_text_message_object(content="Hello")
message.context_id = running_example.context.id
task = await get_final_task_from_stream(running_example.client.send_message(message))

assert task.status.state == TaskState.completed, f"Fail: {task.status.message.parts[0].root.text}"

# Verify response text
response_text = task.history[-1].parts[0].root.text
assert "Python is the most popular programming language" in response_text

# Verify citation metadata exists
citation_uri = CitationExtensionSpec.URI
response_metadata = task.history[-1].metadata
assert response_metadata is not None
assert citation_uri in response_metadata

# Verify citation content
citations = response_metadata[citation_uri]["citations"]
assert len(citations) == 1
assert citations[0]["url"] == "https://survey.stackoverflow.com/2023"
assert citations[0]["title"] == "Stack Overflow Developer Survey 2023"
Loading
Loading