Skip to content

Commit 6ed625f

Browse files
pree-dewrdimitrov
andauthored
464 logs infra (#478)
## Motivation and Context Logs will help in fixing issues quickly, thereby reducing the downtime and MTTD. ## How Has This Been Tested? End to end local setup is tested ## Breaking Changes No ## Types of changes <!-- What types of changes does your code introduce? Put an `x` in all the boxes that apply: --> - [ ] Bug fix (non-breaking change which fixes an issue) - [x] New feature (non-breaking change which adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to change) - [ ] Documentation update ## Checklist <!-- Go over all the following points, and put an `x` in all the boxes that apply. --> - [ ] I have read the [MCP Documentation](https://modelcontextprotocol.io) - [x] My code follows the repository's style guidelines - [x] New and existing tests pass locally - [ ] I have added appropriate error handling - [ ] I have added or updated documentation as needed ## Additional context - It handles container logs for mcp-registry pods. - It supports k8sattributes resources which helps in identifying from where the logs are coming. - Supports victorialogs and it's plugin in grafana. <img width="1129" height="383" alt="Screenshot 2025-09-15 at 1 43 51 AM" src="https://github.com/user-attachments/assets/a560c541-721d-415c-a0a8-8cbe008c3af6" /> <img width="1134" height="703" alt="Screenshot 2025-09-15 at 1 44 34 AM" src="https://github.com/user-attachments/assets/8b5b4132-9fc3-4b8e-8027-27c7787070f1" /> <img width="1134" height="727" alt="Screenshot 2025-09-15 at 1 46 10 AM" src="https://github.com/user-attachments/assets/bcdb4d9d-cea1-4d88-8dd5-fa4cdb51031f" /> --------- Co-authored-by: Radoslav Dimitrov <[email protected]>
1 parent 2280e4d commit 6ed625f

File tree

1 file changed

+284
-1
lines changed

1 file changed

+284
-1
lines changed

deploy/pkg/k8s/monitoring.go

Lines changed: 284 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,10 +105,281 @@ func DeployMonitoringStack(ctx *pulumi.Context, cluster *providers.ProviderInfo,
105105
return err
106106
}
107107

108+
// Deploy VictoriaLogs for log storage
109+
err = deployVictoriaLogs(ctx, cluster, ns, environment)
110+
if err != nil {
111+
return err
112+
}
113+
114+
// Deploy OpenTelemetry Collector DaemonSet
115+
err = deployOtelCollectorDaemonSet(ctx, cluster, ns, environment)
116+
if err != nil {
117+
return err
118+
}
119+
108120
// Deploy Grafana
109121
return deployGrafana(ctx, cluster, ns, environment, ingressNginx)
110122
}
111123

124+
// deployVictoriaLogs deploys VictoriaLogs for log storage
125+
func deployVictoriaLogs(ctx *pulumi.Context, cluster *providers.ProviderInfo, ns *corev1.Namespace, environment string) error {
126+
// Deploy VictoriaLogs using Helm chart
127+
_, err := helm.NewChart(ctx, "victoria-logs", helm.ChartArgs{
128+
Chart: pulumi.String("victoria-logs-single"),
129+
Version: pulumi.String("0.11.8"),
130+
Namespace: ns.Metadata.Name().Elem(),
131+
FetchArgs: helm.FetchArgs{
132+
Repo: pulumi.String("https://victoriametrics.github.io/helm-charts/"),
133+
},
134+
Values: pulumi.Map{
135+
"server": pulumi.Map{
136+
"retentionPeriod": pulumi.String("15d"),
137+
"resources": pulumi.Map{
138+
"requests": pulumi.Map{
139+
"memory": pulumi.String("256Mi"),
140+
"cpu": pulumi.String("100m"),
141+
},
142+
"limits": pulumi.Map{
143+
"memory": pulumi.String("2Gi"),
144+
"cpu": pulumi.String("1000m"),
145+
},
146+
},
147+
"persistence": pulumi.Map{
148+
"enabled": pulumi.Bool(true),
149+
"size": pulumi.String("20Gi"),
150+
},
151+
},
152+
},
153+
}, pulumi.Provider(cluster.Provider))
154+
if err != nil {
155+
return err
156+
}
157+
158+
return nil
159+
}
160+
161+
// deployOtelCollectorDaemonSet deploys OpenTelemetry Collector using Helm chart
162+
func deployOtelCollectorDaemonSet(ctx *pulumi.Context, cluster *providers.ProviderInfo, ns *corev1.Namespace, environment string) error {
163+
// Deploy OpenTelemetry Collector using Helm chart
164+
_, err := helm.NewChart(ctx, "opentelemetry-collector", helm.ChartArgs{
165+
Chart: pulumi.String("opentelemetry-collector"),
166+
Version: pulumi.String("0.133.0"),
167+
Namespace: ns.Metadata.Name().Elem(),
168+
FetchArgs: helm.FetchArgs{
169+
Repo: pulumi.String("https://open-telemetry.github.io/opentelemetry-helm-charts"),
170+
},
171+
Values: pulumi.Map{
172+
"mode": pulumi.String("daemonset"),
173+
"image": pulumi.Map{
174+
"repository": pulumi.String("otel/opentelemetry-collector-contrib"),
175+
"tag": pulumi.String("0.133.0"),
176+
},
177+
"clusterRole": pulumi.Map{
178+
"create": pulumi.Bool(true),
179+
"rules": pulumi.Array{
180+
pulumi.Map{
181+
"apiGroups": pulumi.StringArray{pulumi.String("")},
182+
"resources": pulumi.StringArray{
183+
pulumi.String("pods"),
184+
pulumi.String("pods/log"),
185+
pulumi.String("nodes"),
186+
pulumi.String("namespaces"),
187+
},
188+
"verbs": pulumi.StringArray{
189+
pulumi.String("get"),
190+
pulumi.String("list"),
191+
pulumi.String("watch"),
192+
},
193+
},
194+
},
195+
},
196+
"config": pulumi.Map{
197+
"receivers": pulumi.Map{
198+
"filelog": pulumi.Map{
199+
"include": pulumi.StringArray{pulumi.String("/var/log/pods/default_mcp-registry*/*/*.log")},
200+
"exclude": pulumi.StringArray{pulumi.String("/var/log/pods/*/*-collector-*/*.log")},
201+
"start_at": pulumi.String("end"),
202+
"include_file_path": pulumi.Bool(true),
203+
"include_file_name": pulumi.Bool(false),
204+
"operators": pulumi.Array{
205+
pulumi.Map{
206+
"type": pulumi.String("regex_parser"),
207+
"id": pulumi.String("extract_metadata_from_filepath"),
208+
"regex": pulumi.String(`^.*\/(?P<namespace>[^_]+)_(?P<pod_name>[^_]+)_(?P<uid>[a-f0-9\-]{36})\/(?P<container_name>[^\._]+)\/(?P<restart_count>\d+)\.log`),
209+
"parse_from": pulumi.String("attributes[\"log.file.path\"]"),
210+
"cache": pulumi.Map{
211+
"size": pulumi.Int(128),
212+
},
213+
},
214+
pulumi.Map{
215+
"type": pulumi.String("move"),
216+
"from": pulumi.String("attributes.container_name"),
217+
"to": pulumi.String("resource[\"k8s.container.name\"]"),
218+
},
219+
pulumi.Map{
220+
"type": pulumi.String("move"),
221+
"from": pulumi.String("attributes.namespace"),
222+
"to": pulumi.String("resource[\"k8s.namespace.name\"]"),
223+
},
224+
pulumi.Map{
225+
"type": pulumi.String("move"),
226+
"from": pulumi.String("attributes.pod_name"),
227+
"to": pulumi.String("resource[\"k8s.pod.name\"]"),
228+
},
229+
pulumi.Map{
230+
"type": pulumi.String("move"),
231+
"from": pulumi.String("attributes.restart_count"),
232+
"to": pulumi.String("resource[\"k8s.container.restart_count\"]"),
233+
},
234+
pulumi.Map{
235+
"type": pulumi.String("move"),
236+
"from": pulumi.String("attributes.uid"),
237+
"to": pulumi.String("resource[\"k8s.pod.uid\"]"),
238+
},
239+
},
240+
},
241+
},
242+
"processors": pulumi.Map{
243+
"batch": pulumi.Map{},
244+
"k8sattributes": pulumi.Map{
245+
"auth_type": pulumi.String("serviceAccount"),
246+
"passthrough": pulumi.Bool(false),
247+
"filter": pulumi.Map{
248+
"node_from_env_var": pulumi.String("KUBERNETES_NODE_NAME"),
249+
},
250+
"extract": pulumi.Map{
251+
"metadata": pulumi.StringArray{
252+
pulumi.String("k8s.pod.name"),
253+
pulumi.String("k8s.pod.uid"),
254+
pulumi.String("k8s.deployment.name"),
255+
pulumi.String("k8s.namespace.name"),
256+
pulumi.String("k8s.node.name"),
257+
pulumi.String("k8s.pod.start_time"),
258+
pulumi.String("k8s.cluster.uid"),
259+
},
260+
"labels": pulumi.Array{
261+
pulumi.Map{
262+
"tag_name": pulumi.String("app"),
263+
"key": pulumi.String("app"),
264+
"from": pulumi.String("pod"),
265+
},
266+
},
267+
},
268+
"pod_association": pulumi.Array{
269+
pulumi.Map{
270+
"sources": pulumi.Array{
271+
pulumi.Map{
272+
"from": pulumi.String("resource_attribute"),
273+
"name": pulumi.String("k8s.pod.name"),
274+
},
275+
pulumi.Map{
276+
"from": pulumi.String("resource_attribute"),
277+
"name": pulumi.String("k8s.namespace.name"),
278+
},
279+
},
280+
},
281+
},
282+
},
283+
},
284+
"exporters": pulumi.Map{
285+
"otlphttp/victorialogs": pulumi.Map{
286+
"logs_endpoint": pulumi.String("http://victoria-logs-victoria-logs-single-server:9428/insert/opentelemetry/v1/logs"),
287+
"headers": pulumi.Map{
288+
"VL-Msg-Field": pulumi.String("body"),
289+
"VL-Time-Field": pulumi.String("timestamp"),
290+
"VL-Stream-Fields": pulumi.String("k8s.namespace.name,k8s.pod.name,k8s.container.name,log.iostream"),
291+
},
292+
"timeout": pulumi.String("10s"),
293+
"retry_on_failure": pulumi.Map{
294+
"enabled": pulumi.Bool(true),
295+
"initial_interval": pulumi.String("5s"),
296+
"max_interval": pulumi.String("30s"),
297+
"max_elapsed_time": pulumi.String("300s"),
298+
},
299+
"sending_queue": pulumi.Map{
300+
"enabled": pulumi.Bool(true),
301+
"num_consumers": pulumi.Int(10),
302+
"queue_size": pulumi.Int(50),
303+
},
304+
},
305+
},
306+
"service": pulumi.Map{
307+
"pipelines": pulumi.Map{
308+
"logs": pulumi.Map{
309+
"receivers": pulumi.StringArray{pulumi.String("filelog")},
310+
"processors": pulumi.StringArray{pulumi.String("batch"), pulumi.String("k8sattributes")},
311+
"exporters": pulumi.StringArray{pulumi.String("otlphttp/victorialogs")},
312+
},
313+
},
314+
},
315+
},
316+
"extraVolumes": pulumi.Array{
317+
pulumi.Map{
318+
"name": pulumi.String("varlogpods"),
319+
"hostPath": pulumi.Map{
320+
"path": pulumi.String("/var/log/pods"),
321+
},
322+
},
323+
pulumi.Map{
324+
"name": pulumi.String("varlibdockercontainers"),
325+
"hostPath": pulumi.Map{
326+
"path": pulumi.String("/var/lib/docker/containers"),
327+
},
328+
},
329+
},
330+
"extraVolumeMounts": pulumi.Array{
331+
pulumi.Map{
332+
"name": pulumi.String("varlogpods"),
333+
"mountPath": pulumi.String("/var/log/pods"),
334+
"readOnly": pulumi.Bool(true),
335+
},
336+
pulumi.Map{
337+
"name": pulumi.String("varlibdockercontainers"),
338+
"mountPath": pulumi.String("/var/lib/docker/containers"),
339+
"readOnly": pulumi.Bool(true),
340+
},
341+
},
342+
"extraEnvs": pulumi.Array{
343+
pulumi.Map{
344+
"name": pulumi.String("KUBERNETES_NODE_NAME"),
345+
"valueFrom": pulumi.Map{
346+
"fieldRef": pulumi.Map{
347+
"fieldPath": pulumi.String("spec.nodeName"),
348+
},
349+
},
350+
},
351+
},
352+
"resources": pulumi.Map{
353+
"requests": pulumi.Map{
354+
"memory": pulumi.String("200Mi"),
355+
"cpu": pulumi.String("100m"),
356+
},
357+
"limits": pulumi.Map{
358+
"memory": pulumi.String("400Mi"),
359+
"cpu": pulumi.String("200m"),
360+
},
361+
},
362+
"tolerations": pulumi.Array{
363+
pulumi.Map{
364+
"key": pulumi.String("node-role.kubernetes.io/master"),
365+
"operator": pulumi.String("Exists"),
366+
"effect": pulumi.String("NoSchedule"),
367+
},
368+
pulumi.Map{
369+
"key": pulumi.String("node-role.kubernetes.io/control-plane"),
370+
"operator": pulumi.String("Exists"),
371+
"effect": pulumi.String("NoSchedule"),
372+
},
373+
},
374+
},
375+
}, pulumi.Provider(cluster.Provider))
376+
if err != nil {
377+
return err
378+
}
379+
380+
return nil
381+
}
382+
112383
func deployGrafana(ctx *pulumi.Context, cluster *providers.ProviderInfo, ns *corev1.Namespace, environment string, ingressNginx *helm.Chart) error {
113384
conf := config.New(ctx, "mcp-registry")
114385
grafanaSecret, err := corev1.NewSecret(ctx, "grafana-secrets", &corev1.SecretArgs{
@@ -149,7 +420,7 @@ func deployGrafana(ctx *pulumi.Context, cluster *providers.ProviderInfo, ns *cor
149420
return err
150421
}
151422

152-
// Create VictoriaMetrics datasource
423+
// Create VictoriaMetrics and VictoriaLogs datasources
153424
datasourcesConfig := map[string]interface{}{
154425
"apiVersion": 1,
155426
"datasources": []map[string]interface{}{
@@ -160,6 +431,15 @@ func deployGrafana(ctx *pulumi.Context, cluster *providers.ProviderInfo, ns *cor
160431
"access": "proxy",
161432
"isDefault": true,
162433
},
434+
{
435+
"name": "VictoriaLogs",
436+
"type": "victoriametrics-logs-datasource",
437+
"url": "http://victoria-logs-victoria-logs-single-server:9428",
438+
"access": "proxy",
439+
"jsonData": map[string]interface{}{
440+
"maxLines": 1000,
441+
},
442+
},
163443
},
164444
}
165445

@@ -187,6 +467,9 @@ func deployGrafana(ctx *pulumi.Context, cluster *providers.ProviderInfo, ns *cor
187467
},
188468
Namespace: ns.Metadata.Name().Elem(),
189469
Values: pulumi.Map{
470+
"plugins": pulumi.Array{
471+
pulumi.String("victoriametrics-logs-datasource"),
472+
},
190473
"extraConfigmapMounts": pulumi.Array{
191474
pulumi.Map{
192475
"name": pulumi.String("grafana-datasources"),

0 commit comments

Comments
 (0)