RHEcosystemAppEng
diff --git a/‎docs/rate-limiting.md‎
Lines changed: 1 addition & 0 deletions b/‎docs/rate-limiting.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 2 additions & 2 deletions b/‎pyproject.toml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/lightspeed_agent/api/a2a/a2a_setup.py‎
Lines changed: 7 additions & 1 deletion b/‎src/lightspeed_agent/api/a2a/a2a_setup.py‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎src/lightspeed_agent/api/a2a/agent_card.py‎
Lines changed: 2 additions & 2 deletions b/‎src/lightspeed_agent/api/a2a/agent_card.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/lightspeed_agent/api/a2a/response_formatter_plugin.py‎
Lines changed: 82 additions & 0 deletions b/‎src/lightspeed_agent/api/a2a/response_formatter_plugin.py‎
Lines changed: 82 additions & 0 deletions
diff --git a/‎src/lightspeed_agent/api/app.py‎
Lines changed: 28 additions & 3 deletions b/‎src/lightspeed_agent/api/app.py‎
Lines changed: 28 additions & 3 deletions
@@ -182,4 +182,5 @@ Rate limiting happens **before** the request is processed (at the middleware lay
 
 - Rate limits are enforced across replicas as long as they share the same Redis instance.
 - The service verifies Redis connectivity at startup and fails fast when Redis is unavailable.
+- **Fail-open behaviour**: If Redis becomes unreachable at runtime, requests are allowed through without rate limiting (with a warning log). This prevents a Redis outage from causing a self-inflicted denial of service.
 - **In-transit encryption (TLS)**: Cloud Memorystore instances are created with `--transit-encryption-mode=SERVER_AUTHENTICATION`. Use the `rediss://` URL scheme and set `RATE_LIMIT_REDIS_CA_CERT` to the path of the mounted server CA certificate. See [Cloud Run Deployment — Redis Setup](../deploy/cloudrun/README.md#4-redis-setup-for-rate-limiting) for setup instructions.
@@ -37,6 +37,8 @@ dependencies = [
     # Google Cloud authentication (ADC for Procurement API calls)
     "google-auth>=2.0.0",
     "requests>=2.20.0",  # Required by google.auth.transport.requests
+    # Redis for distributed rate limiting (used by both agent and marketplace handler)
+    "redis>=5.0.0",
 ]
 
 [project.optional-dependencies]
@@ -49,8 +51,6 @@ agent = [
     "python-dotenv>=1.0.0",
     # Google Cloud integration
     "google-cloud-service-control>=1.0.0",
-    # Redis for distributed rate limiting
-    "redis>=5.0.0",
     # OpenTelemetry for distributed tracing
     "opentelemetry-api>=1.20.0",
     "opentelemetry-sdk>=1.20.0",
 
@@ -24,6 +24,7 @@
 from lightspeed_agent.api.a2a.agent_card import build_agent_card
 from lightspeed_agent.api.a2a.logging_plugin import AgentLoggingPlugin
 from lightspeed_agent.api.a2a.mcp_output_size_guard_plugin import MCPOutputSizeGuardPlugin
+from lightspeed_agent.api.a2a.response_formatter_plugin import ResponseFormatterPlugin
 from lightspeed_agent.api.a2a.usage_plugin import UsageTrackingPlugin
 from lightspeed_agent.config import get_settings
 from lightspeed_agent.core import create_agent
@@ -120,7 +121,12 @@ def _create_runner() -> Runner:
     app = App(
         name=settings.agent_name,
         root_agent=agent,
-        plugins=[AgentLoggingPlugin(), UsageTrackingPlugin(), MCPOutputSizeGuardPlugin()],
+        plugins=[
+            AgentLoggingPlugin(),
+            UsageTrackingPlugin(),
+            MCPOutputSizeGuardPlugin(),
+            ResponseFormatterPlugin(),
+        ],
     )
 
     # Use database-backed session service for production
 
@@ -179,8 +179,8 @@ def build_agent_card() -> AgentCard:
         security=[
             {"redhat_sso": ["openid", "api.console", "api.ocm"]},
         ],
-        default_input_modes=["text"],
-        default_output_modes=["text"],
+        default_input_modes=["text/plain"],
+        default_output_modes=["text/plain"],
     )
 
     return agent_card
 
@@ -0,0 +1,82 @@
+"""Response formatter plugin.
+
+Injects the first-response legal notice and the AI-content disclaimer
+footer at the application layer so the LLM does not need to track
+conversation state or remember to include verbatim boilerplate.
+
+- The notice is prepended to the first final text response in each session.
+- The footer is appended to every final text response.
+"""
+
+import logging
+
+from google.adk.agents.invocation_context import InvocationContext
+from google.adk.events.event import Event
+from google.adk.plugins.base_plugin import BasePlugin
+
+logger = logging.getLogger(__name__)
+
+FIRST_RESPONSE_NOTICE = (
+    "You are interacting with the Red Hat Lightspeed Agent, which can answer questions "
+    "about your Red Hat account, subscription, system configuration, and related details. "
+    "This feature uses AI technology. Interactions may be used to improve Red Hat's "
+    "products or services.\n\n"
+    "Always review AI-generated content prior to use.\n\n"
+)
+
+RESPONSE_FOOTER = "\n\n---\n*Always review AI-generated content prior to use.*"
+
+
+class ResponseFormatterPlugin(BasePlugin):
+    """ADK plugin that injects the first-response notice and disclaimer footer."""
+
+    def __init__(self) -> None:
+        super().__init__(name="response_formatter")
+
+    async def on_event_callback(
+        self, *, invocation_context: InvocationContext, event: Event
+    ) -> Event | None:
+        """Inject the first-response notice and disclaimer footer."""
+        if not event.is_final_response():
+            return None
+
+        if not event.content or not event.content.parts:
+            return None
+
+        # Locate the first and last text parts
+        first_text_idx: int | None = None
+        last_text_idx: int | None = None
+        for i, part in enumerate(event.content.parts):
+            if part.text:
+                if first_text_idx is None:
+                    first_text_idx = i
+                last_text_idx = i
+
+        if first_text_idx is None or last_text_idx is None:
+            return None
+
+        # Prepend first-response notice when this is a new session
+        if self._is_first_agent_response(invocation_context.session.events):
+            first_text = event.content.parts[first_text_idx].text or ""
+            event.content.parts[first_text_idx].text = (
+                FIRST_RESPONSE_NOTICE + first_text
+            )
+            logger.debug("Prepended first-response notice to agent response")
+
+        # Append disclaimer footer to every final response
+        last_text = event.content.parts[last_text_idx].text or ""
+        event.content.parts[last_text_idx].text = last_text + RESPONSE_FOOTER
+
+        return event
+
+    @staticmethod
+    def _is_first_agent_response(session_events: list[Event]) -> bool:
+        """Return True when no prior agent event in the session contains text."""
+        for ev in session_events:
+            if ev.author == "user":
+                continue
+            if ev.content and ev.content.parts:
+                for part in ev.content.parts:
+                    if part.text:
+                        return False
+        return True
@@ -9,12 +9,15 @@
 """
 
 import logging
+import pathlib
 from collections.abc import AsyncIterator
 from contextlib import asynccontextmanager
 from typing import Any
 
 from a2a.server.apps.jsonrpc.fastapi_app import A2AFastAPI
+from fastapi import Request
 from fastapi.middleware.cors import CORSMiddleware
+from starlette.responses import FileResponse
 
 from lightspeed_agent.api.a2a.a2a_setup import setup_a2a_routes
 from lightspeed_agent.api.a2a.agent_card import get_agent_card_dict
@@ -24,9 +27,18 @@
 from lightspeed_agent.ratelimit import RateLimitMiddleware, get_redis_rate_limiter
 from lightspeed_agent.security import RequestBodyLimitMiddleware, SecurityHeadersMiddleware
 
+_LOGO_PATH = pathlib.Path(__file__).parent.parent / "static" / "logo.png"
+
 logger = logging.getLogger(__name__)
 
 
+def _agent_card_response(request: Request) -> dict[str, Any]:
+    """Build agent card dict with a dynamic iconUrl derived from the request base URL."""
+    card = get_agent_card_dict()
+    icon_url = f"{str(request.base_url).rstrip('/')}/static/logo.png"
+    return {**card, "iconUrl": icon_url}
+
+
 @asynccontextmanager
 async def lifespan(app: A2AFastAPI) -> AsyncIterator[None]:
     """Application lifespan manager for startup/shutdown events."""
@@ -134,19 +146,32 @@ def create_app() -> A2AFastAPI:
         lifespan=lifespan,
     )
 
+    # Serve the Red Hat logo for the agent card iconUrl
+    @app.get("/static/logo.png")
+    async def serve_logo() -> FileResponse:
+        """Serve the agent logo image."""
+        return FileResponse(_LOGO_PATH, media_type="image/png")
+
+    # Custom agent card endpoint registered BEFORE setup_a2a_routes so
+    # FastAPI's first-match routing picks it up instead of the SDK default.
+    @app.get("/.well-known/agent.json")
+    async def agent_card_with_icon(request: Request) -> dict[str, Any]:
+        """AgentCard endpoint with dynamic iconUrl."""
+        return _agent_card_response(request)
+
     # Set up A2A protocol routes using ADK's built-in integration
     # This provides:
-    # - GET /.well-known/agent.json - AgentCard
+    # - GET /.well-known/agent.json - AgentCard (overridden above)
     # - POST / - JSON-RPC 2.0 endpoint for message/send, message/stream, etc.
     # The ADK integration handles SSE streaming, task management, and
     # event conversion automatically.
     setup_a2a_routes(app)
 
     # Alias for agent card (some clients use agent-card.json)
     @app.get("/.well-known/agent-card.json")
-    async def agent_card_alias() -> dict[str, Any]:
+    async def agent_card_alias(request: Request) -> dict[str, Any]:
         """AgentCard endpoint (alias for agent.json)."""
-        return get_agent_card_dict()
+        return _agent_card_response(request)
 
     # Add authentication middleware for A2A endpoint (innermost layer)
     # Validates Red Hat SSO JWT tokens on POST / requests