-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathservice.yaml
More file actions
207 lines (205 loc) · 8.38 KB
/
service.yaml
File metadata and controls
207 lines (205 loc) · 8.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
# Cloud Run Service Configuration
# Declarative configuration for the Lightspeed Agent service
#
# This deploys the agent with the MCP server as a sidecar container.
# The MCP server provides tools for interacting with Red Hat Insights APIs.
#
# Usage:
# gcloud run services replace deploy/cloudrun/service.yaml \
# --region=us-central1 --project=YOUR_PROJECT_ID
#
# Note: Replace ${PROJECT_ID} and ${REGION} with actual values before applying
apiVersion: serving.knative.dev/v1
kind: Service
metadata:
name: ${SERVICE_NAME}
labels:
app: ${SERVICE_NAME}
managed-by: cloud-build
annotations:
run.googleapis.com/description: "Red Hat Lightspeed Agent for Google Cloud - A2A-ready agent using Google ADK"
run.googleapis.com/ingress: all
run.googleapis.com/launch-stage: GA
spec:
template:
metadata:
annotations:
# Scaling configuration
autoscaling.knative.dev/minScale: "0"
autoscaling.knative.dev/maxScale: "2"
# CPU allocation
run.googleapis.com/cpu-throttling: "false"
# Startup probe
run.googleapis.com/startup-cpu-boost: "true"
# Cloud SQL connection
run.googleapis.com/cloudsql-instances: "${PROJECT_ID}:${REGION}:${DB_INSTANCE_NAME}"
# VPC connector for Cloud Memorystore Redis (rate limiting)
run.googleapis.com/vpc-access-connector: projects/${PROJECT_ID}/locations/${REGION}/connectors/${VPC_CONNECTOR_NAME}
run.googleapis.com/vpc-access-egress: private-ranges-only
spec:
containerConcurrency: 80
timeoutSeconds: 300
serviceAccountName: ${SERVICE_ACCOUNT_NAME}@${PROJECT_ID}.iam.gserviceaccount.com
containers:
- name: lightspeed-agent
image: gcr.io/${PROJECT_ID}/lightspeed-agent:latest
ports:
- name: http1
containerPort: 8000
resources:
limits:
cpu: "2"
memory: 2Gi
env:
# Google AI Configuration
- name: GOOGLE_GENAI_USE_VERTEXAI
value: "TRUE"
- name: GOOGLE_CLOUD_PROJECT
value: "${PROJECT_ID}"
- name: GOOGLE_CLOUD_LOCATION
value: "${REGION}"
- name: GEMINI_MODEL
value: "gemini-2.5-flash"
# Agent Configuration
- name: AGENT_HOST
value: "0.0.0.0"
- name: AGENT_PORT
value: "8000"
- name: AGENT_NAME
value: "lightspeed_agent"
- name: AGENT_PROVIDER_URL
value: "https://lightspeed-agent.example.com"
# Agent provider's organization website URL.
# Used in AgentCard provider.url and as the expected JWT audience
# for Google DCR software_statement validation.
# Must match the aud claim Google sends in the software_statement.
- name: AGENT_PROVIDER_ORGANIZATION_URL
value: "https://www.redhat.com"
# Marketplace handler URL for DCR endpoints in the AgentCard.
# Must point to the marketplace-handler Cloud Run service.
# Updated automatically by deploy.sh after deployment.
- name: MARKETPLACE_HANDLER_URL
value: "https://marketplace-handler.example.com"
# Logging and Audit Trail
# LOG_FORMAT=json enables structured audit logging with user_id,
# org_id, order_id, and request_id in every log record. Cloud
# Logging parses these fields automatically for querying.
- name: LOG_LEVEL
value: "INFO"
- name: LOG_FORMAT
value: "json"
- name: AGENT_LOGGING_DETAIL
value: "basic"
# Red Hat SSO Configuration
- name: RED_HAT_SSO_ISSUER
value: "https://sso.redhat.com/auth/realms/redhat-external"
# Comma-separated OAuth scopes required in access tokens (checked via introspection).
# Set to empty string to disable scope checking.
- name: AGENT_REQUIRED_SCOPE
value: "api.console,api.ocm"
# Comma-separated allowlist of OAuth scopes permitted in access tokens.
# Tokens carrying scopes outside this list are rejected (HTTP 403).
- name: AGENT_ALLOWED_SCOPES
value: "openid,profile,email,api.console,api.ocm"
# Ensure production environment do not skip JWT validation
- name: SKIP_JWT_VALIDATION
value: "false"
# MCP Configuration
# The agent connects to the MCP sidecar container over HTTP
- name: MCP_TRANSPORT_MODE
value: "http"
- name: MCP_SERVER_URL
value: "http://localhost:8080"
- name: MCP_READ_ONLY
value: "true"
# Secrets from Secret Manager
- name: RED_HAT_SSO_CLIENT_ID
valueFrom:
secretKeyRef:
name: redhat-sso-client-id
key: latest
- name: RED_HAT_SSO_CLIENT_SECRET
valueFrom:
secretKeyRef:
name: redhat-sso-client-secret
key: latest
# Marketplace Database (shared with handler for order validation)
- name: DATABASE_URL
valueFrom:
secretKeyRef:
name: database-url
key: latest
# Session backend: "database" for production persistence,
# "memory" for in-memory (sessions lost on restart)
- name: SESSION_BACKEND
value: "memory"
# Session Database (required when SESSION_BACKEND=database)
- name: SESSION_DATABASE_URL
valueFrom:
secretKeyRef:
name: session-database-url
key: latest
# Rate Limiting (Redis-backed, required for API Server)
- name: RATE_LIMIT_REDIS_URL
valueFrom:
secretKeyRef:
name: rate-limit-redis-url
key: latest
- name: RATE_LIMIT_REDIS_TIMEOUT_MS
value: "200"
- name: RATE_LIMIT_KEY_PREFIX
value: "lightspeed:ratelimit"
- name: RATE_LIMIT_REQUESTS_PER_MINUTE
value: "60"
- name: RATE_LIMIT_REQUESTS_PER_HOUR
value: "1000"
# Optional: max MCP tool executions per agent run (0 = unlimited).
# Counter is in-memory per Cloud Run instance; see docs/metering.md.
- name: MAX_TOOL_CALLS_PER_INVOCATION
value: "0"
# Health checks
startupProbe:
httpGet:
path: /health
port: 8000
initialDelaySeconds: 5
periodSeconds: 10
failureThreshold: 3
livenessProbe:
httpGet:
path: /health
port: 8000
periodSeconds: 30
failureThreshold: 3
# =====================================================================
# Red Hat Lightspeed MCP Server (Sidecar)
# =====================================================================
# Provides MCP tools for interacting with Red Hat Insights APIs
# (Advisor, Inventory, Vulnerability, Remediations, etc.)
#
# Authentication: The agent forwards the caller's JWT token to the
# MCP server via the Authorization header. The MCP server uses this
# token to authenticate with console.redhat.com on behalf of the user.
#
# Sidecar container - no ports section (only main container can expose port)
#
# Note: Cloud Run doesn't support Quay.io directly. Copy the image to GCR:
# docker pull quay.io/redhat-services-prod/insights-management-tenant/insights-mcp/red-hat-lightspeed-mcp:latest
# docker tag quay.io/.../red-hat-lightspeed-mcp:latest gcr.io/${PROJECT_ID}/red-hat-lightspeed-mcp:latest
# docker push gcr.io/${PROJECT_ID}/red-hat-lightspeed-mcp:latest
- name: insights-mcp
image: ${MCP_IMAGE}
args:
- "--readonly"
- "http"
- "--port"
- "8080"
- "--host"
- "0.0.0.0"
resources:
limits:
cpu: "1"
memory: 512Mi
traffic:
- percent: 100
latestRevision: true