i-am-bee · aleskalfas · Feb 12, 2026 · Feb 12, 2026 · Feb 12, 2026 · Feb 12, 2026
diff --git a/.github/workflows/e2e-examples-test.yml b/.github/workflows/e2e-examples-test.yml
@@ -0,0 +1,57 @@
+name: e2e-examples-test
+
+permissions:
+  contents: read
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+on:
+  workflow_dispatch: { }
+  pull_request:
+    types: [labeled, synchronize]
+  push:
+    branches:
+      - main
+    paths:
+      - 'apps/agentstack-server/**'
+      - 'apps/agentstack-sdk-py/**'
+      - 'examples/**'
+      - 'helm/**'
+jobs:
+  e2e-examples-test:
+    if: >-
+      github.event_name != 'pull_request' ||
+      contains(github.event.pull_request.labels.*.name, 'e2e-examples')
+    timeout-minutes: 25
+    runs-on: ubuntu-latest
+    env:
+      AGENTSTACK__HOME: ${{ github.workspace }}/.agentstack
+    steps:
+      - uses: actions/checkout@v4
+      - name: Maximize build space
+        uses: ./.github/actions/maximize-build-space
+        with:
+          root-reserve-mb: 15360
+          temp-reserve-mb: 2048
+          swap-size-mb: 1024
+          remove-dotnet: 'true'
+      - name: "Set up Lima"
+        uses: lima-vm/lima-actions/setup@v1
+        id: lima-actions-setup
+      - name: "Cache ~/.cache/lima"
+        uses: actions/cache@v4
+        with:
+          path: ~/.cache/lima
+          key: lima-${{ steps.lima-actions-setup.outputs.version }}
+      - uses: actions/checkout@v4
+      - uses: ./.github/actions/setup
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+      - run: mise run agentstack-server:test:e2e-examples
+        env:
+          LLM_API_BASE: "${{ secrets.OPENAI_API_BASE }}"
+          LLM_MODEL: "${{ vars.OPENAI_MODEL }}"
+          LLM_API_KEY: "${{ secrets.OPENAI_API_KEY }}"
+      - run: uv cache prune --ci
diff --git a/apps/agentstack-server/tasks.toml b/apps/agentstack-server/tasks.toml
@@ -319,7 +319,90 @@ echo "Waiting for Keycloak provision job to complete..."
 kubectl wait --for=condition=complete --timeout=300s job/keycloak-provision
 
 kubectl port-forward svc/postgresql 5432:5432 2>/dev/null 1>&2 &
-uv run pytest -m e2e
+uv run pytest -m e2e --ignore=tests/e2e/examples
+result=$?
+
+if [ $result -ne 0 ]; then
+    echo "Tests failed. Checking pod status..."
+    echo "------------- pods --------------"
+    kubectl get pod
+    echo "------------ events -------------"
+    kubectl get event
+fi
+
+if [ "$NO_CLEAN" != "true" ]; then
+    {{ mise_bin }} run agentstack-cli:run -- platform delete --vm-name=${VM_NAME}
+else
+    {{ mise_bin }} run agentstack-cli:run -- platform stop --vm-name=${VM_NAME}
+fi
+
+rm -f "$CONFIG_FILE"
+
+kill %1
+exit $result
+"""
+
+["agentstack-server:test:e2e-examples"]
+dir = "{{config_root}}/apps/agentstack-server"
+usage = 'flag "--no-clean"'
+run = """
+#!/bin/bash
+VM_NAME=e2e-examples-test-run
+
+export AGENTSTACK__USERNAME=admin
+export AGENTSTACK__PASSWORD=admin
+
+NO_CLEAN="${usage_no_clean:-false}"
+if [ "$NO_CLEAN" != "true" ]; then
+    {{ mise_bin }} run agentstack:stop-all
+    {{ mise_bin }} run agentstack:delete --vm-name=${VM_NAME}
+    curl http://localhost:8333 >/dev/null 2>&1 && echo "Another instance at localhost:8333 is already running" && exit 2
+fi
+
+CONFIG_FILE="/tmp/config_e2e_test_$(date +%s).yaml"
+
+echo '
+externalRegistries: null
+ui:
+  enabled: false
+auth:
+  enabled: true
+docling:
+  enabled: true
+connector:
+  presets:
+    - url: mcp+stdio://test
+      stdio:
+        image: mcp/aws-documentation
+      metadata:
+        name: Test MCP Server
+keycloak:
+  auth:
+    seedAgentstackUsers:
+      - username: admin
+        password: admin
+        firstName: Admin
+        lastName: User
+        email: [email protected]
+        roles: ["agentstack-admin"]
+' > "$CONFIG_FILE"
+
+{{ mise_bin }} run agentstack:start --vm-name=${VM_NAME} -f "$CONFIG_FILE" --set ui.enabled=false
+
+
+eval "$( {{ mise_bin }} run agentstack:shell --vm-name="$VM_NAME" )"
+
+export DB_URL="postgresql+asyncpg://agentstack-user:password@localhost:5432/agentstack"
+export LLM_API_BASE="${LLM_API_BASE:-http://host.docker.internal:11434/v1}"
+
+echo "Waiting for agentstack-server deployment to be ready..."
+kubectl wait --for=condition=available --timeout=300s deployment/agentstack-server
+
+echo "Waiting for Keycloak provision job to complete..."
+kubectl wait --for=condition=complete --timeout=300s job/keycloak-provision
+
+kubectl port-forward svc/postgresql 5432:5432 2>/dev/null 1>&2 &
+uv run pytest -m e2e tests/e2e/examples
 result=$?
 
 if [ $result -ne 0 ]; then

diff --git a/apps/agentstack-server/tests/conftest.py b/apps/agentstack-server/tests/conftest.py
@@ -63,7 +63,7 @@ def pytest_configure(config):
 async def _get_kr8s_client():
     api = await kr8s.asyncio.api()
     kubeconfig = api.auth.kubeconfig
-    kubeconfig_regex = r".*/.agentstack/lima/(agentstack-local-dev|e2e-test-run|integration-test-run)/copied-from-guest/kubeconfig.yaml$"
+    kubeconfig_regex = r".*/.agentstack/lima/(agentstack-local-dev|e2e-test-run|e2e-examples-test-run|integration-test-run)/copied-from-guest/kubeconfig.yaml$"
     if not re.match(kubeconfig_regex, str(kubeconfig.path)):
         raise ValueError(
             f"Preventing kubeconfig operations with invalid kubeconfig path.\n"

diff --git a/...ack-server/tests/e2e/examples/agent-integration/agent-details/test_basic_configuration.py b/...ack-server/tests/e2e/examples/agent-integration/agent-details/test_basic_configuration.py
@@ -0,0 +1,57 @@
+# Copyright 2025 © BeeAI a Series of LF Projects, LLC
+# SPDX-License-Identifier: Apache-2.0
+
+import pytest
+
+from agentstack_server.domain.constants import AGENT_DETAIL_EXTENSION_URI
+from agentstack_server.utils.a2a import get_extension
+from tests.e2e.examples.conftest import run_example
+
+pytestmark = pytest.mark.e2e
+
+
+@pytest.mark.usefixtures("clean_up", "setup_platform_client")
+async def test_basic_configuration_example(subtests, get_final_task_from_stream, a2a_client_factory):
+    example_path = "agent-integration/agent-details/basic-configuration"
+
+    async with run_example(example_path, a2a_client_factory) as running_example:
+        agent_card = running_example.agent_card
+
+        with subtests.test("agent card has correct name"):
+            assert agent_card.name == "Example Research Assistant"
+
+        with subtests.test("agent card has two skills"):
+            assert agent_card.skills is not None
+            assert len(agent_card.skills) == 2
+
+            skill_ids = {skill.id for skill in agent_card.skills}
+            assert skill_ids == {"research", "summarization"}
+
+        with subtests.test("agent detail extension is configured"):
+            agent_detail = get_extension(agent_card, AGENT_DETAIL_EXTENSION_URI)
+            assert agent_detail is not None
+
+            params = agent_detail.model_dump()["params"]
+            assert params["interaction_mode"] == "multi-turn"
+            assert (
+                params["user_greeting"] == "Hi there! I can help you research topics or summarize uploaded documents."
+            )
+            assert params["framework"] == "BeeAI Framework"
+            assert params["source_code_url"] == "https://github.com/example/example-research-assistant"
+
+        with subtests.test("agent detail has author info"):
+            agent_detail = get_extension(agent_card, AGENT_DETAIL_EXTENSION_URI)
+            params = agent_detail.model_dump()["params"]
+
+            assert params["author"]["name"] == "Agent Stack Team"
+            assert params["author"]["email"] == "[email protected]"
+
+        with subtests.test("agent detail has tools"):
+            agent_detail = get_extension(agent_card, AGENT_DETAIL_EXTENSION_URI)
+            params = agent_detail.model_dump()["params"]
+
+            tools = params["tools"]
+            assert len(tools) == 2
+
+            tool_names = {tool["name"] for tool in tools}
+            assert tool_names == {"Web Search", "Document Reader"}
diff --git a/...ntstack-server/tests/e2e/examples/agent-integration/agent-settings/test_basic_settings.py b/...ntstack-server/tests/e2e/examples/agent-integration/agent-settings/test_basic_settings.py
@@ -0,0 +1,55 @@
+# Copyright 2025 © BeeAI a Series of LF Projects, LLC
+# SPDX-License-Identifier: Apache-2.0
+
+import pytest
+from a2a.client.helpers import create_text_message_object
+from a2a.types import TaskState
+from agentstack_sdk.a2a.extensions.ui.settings import SettingsExtensionSpec
+
+from tests.e2e.examples.conftest import run_example
+
+pytestmark = pytest.mark.e2e
+
+
+@pytest.mark.usefixtures("clean_up", "setup_platform_client")
+async def test_basic_settings_example(subtests, get_final_task_from_stream, a2a_client_factory):
+    example_path = "agent-integration/agent-settings/basic-settings"
+
+    async with run_example(example_path, a2a_client_factory) as running_example:
+        settings_uri = SettingsExtensionSpec.URI
+
+        with subtests.test("agent responds based on enabled thinking setting"):
+            message = create_text_message_object(content="Hello")
+            message.context_id = running_example.context.id
+            message.metadata = {
+                settings_uri: {
+                    "values": {
+                        "thinking_group": {
+                            "type": "checkbox_group",
+                            "values": {"thinking": {"value": True}},
+                        }
+                    }
+                }
+            }
+            task = await get_final_task_from_stream(running_example.client.send_message(message))
+
+            assert task.status.state == TaskState.completed, f"Fail: {task.status.message.parts[0].root.text}"
+            assert "Thinking mode is enabled" in task.history[-1].parts[0].root.text
+
+        with subtests.test("agent responds based on disabled thinking setting"):
+            message = create_text_message_object(content="Hello")
+            message.context_id = running_example.context.id
+            message.metadata = {
+                settings_uri: {
+                    "values": {
+                        "thinking_group": {
+                            "type": "checkbox_group",
+                            "values": {"thinking": {"value": False}},
+                        }
+                    }
+                }
+            }
+            task = await get_final_task_from_stream(running_example.client.send_message(message))
+
+            assert task.status.state == TaskState.completed, f"Fail: {task.status.message.parts[0].root.text}"
+            assert "Thinking mode is disabled" in task.history[-1].parts[0].root.text
diff --git a/apps/agentstack-server/tests/e2e/examples/agent-integration/canvas/test_canvas_with_llm.py b/apps/agentstack-server/tests/e2e/examples/agent-integration/canvas/test_canvas_with_llm.py
@@ -0,0 +1,65 @@
+# Copyright 2025 © BeeAI a Series of LF Projects, LLC
+# SPDX-License-Identifier: Apache-2.0
+
+import pytest
+from a2a.client.helpers import create_text_message_object
+from a2a.types import TaskState
+from agentstack_sdk.a2a.extensions.ui.canvas import CanvasExtensionSpec
+
+from tests.e2e.examples.conftest import run_example
+
+pytestmark = pytest.mark.e2e
+
+
+@pytest.mark.usefixtures("clean_up", "setup_platform_client")
+async def test_canvas_with_llm_example(subtests, get_final_task_from_stream, a2a_client_factory):
+    example_path = "agent-integration/canvas/canvas-with-llm"
+
+    async with run_example(example_path, a2a_client_factory) as running_example:
+        canvas_uri = CanvasExtensionSpec.URI
+
+        with subtests.test("agent generates code artifact"):
+            message = create_text_message_object(content="Write a hello world program")
+            message.context_id = running_example.context.id
+            task = await get_final_task_from_stream(running_example.client.send_message(message))
+
+            assert task.status.state == TaskState.completed, f"Fail: {task.status.message.parts[0].root.text}"
+
+            # Verify artifact is returned (the agent uses a mocked LLM response)
+            assert len(task.artifacts) > 0
+            artifact = task.artifacts[0]
+            assert artifact.name == "Response"
+
+            # Verify the artifact contains the expected mock response
+            artifact_text = "".join(part.root.text for part in artifact.parts if hasattr(part.root, "text"))
+            assert "Hello from LLM!" in artifact_text
+
+        with subtests.test("agent updates artifact via canvas edit"):
+            # Use the artifact from the previous test
+            artifact_id = artifact.artifact_id
+
+            # Get the artifact text to determine indices
+            artifact_text = "".join(part.root.text for part in artifact.parts if hasattr(part.root, "text"))
+
+            # Send edit request for a portion of the code
+            message = create_text_message_object(content="Change print to use f-string")
+            message.context_id = running_example.context.id
+            message.metadata = {
+                canvas_uri: {
+                    "artifact_id": artifact_id,
+                    "start_index": 0,
+                    "end_index": min(50, len(artifact_text)),  # Select first 50 chars
+                    "description": "Change print to use f-string",
+                }
+            }
+            task = await get_final_task_from_stream(running_example.client.send_message(message))
+
+            assert task.status.state == TaskState.completed, f"Fail: {task.status.message.parts[0].root.text}"
+
+            # Verify updated artifact is returned
+            assert len(task.artifacts) > 0
+            updated_artifact = task.artifacts[0]
+
+            # Verify the response contains the edit prompt context
+            updated_text = "".join(part.root.text for part in updated_artifact.parts if hasattr(part.root, "text"))
+            assert "editing existing code" in updated_text.lower() or "selected" in updated_text.lower()
diff --git a/...tstack-server/tests/e2e/examples/agent-integration/citations/test_citation_basic_usage.py b/...tstack-server/tests/e2e/examples/agent-integration/citations/test_citation_basic_usage.py
@@ -0,0 +1,40 @@
+# Copyright 2025 © BeeAI a Series of LF Projects, LLC
+# SPDX-License-Identifier: Apache-2.0
+
+import pytest
+from a2a.client.helpers import create_text_message_object
+from a2a.types import TaskState
+from agentstack_sdk.a2a.extensions import CitationExtensionSpec
+
+from tests.e2e.examples.conftest import run_example
+
+pytestmark = pytest.mark.e2e
+
+
+@pytest.mark.usefixtures("clean_up", "setup_platform_client")
+async def test_citation_basic_usage_example(subtests, get_final_task_from_stream, a2a_client_factory):
+    example_path = "agent-integration/citations/citation-basic-usage"
+
+    async with run_example(example_path, a2a_client_factory) as running_example:
+        with subtests.test("agent responds with text and citation metadata"):
+            message = create_text_message_object(content="Hello")
+            message.context_id = running_example.context.id
+            task = await get_final_task_from_stream(running_example.client.send_message(message))
+
+            assert task.status.state == TaskState.completed, f"Fail: {task.status.message.parts[0].root.text}"
+
+            # Verify response text
+            response_text = task.history[-1].parts[0].root.text
+            assert "Python is the most popular programming language" in response_text
+
+            # Verify citation metadata exists
+            citation_uri = CitationExtensionSpec.URI
+            response_metadata = task.history[-1].metadata
+            assert response_metadata is not None
+            assert citation_uri in response_metadata
+
+            # Verify citation content
+            citations = response_metadata[citation_uri]["citations"]
+            assert len(citations) == 1
+            assert citations[0]["url"] == "https://survey.stackoverflow.com/2023"
+            assert citations[0]["title"] == "Stack Overflow Developer Survey 2023"