Skip to content

Commit 453a514

Browse files
Enable Kubernetes MCP Server to get PSI metrics
1 parent e86686a commit 453a514

File tree

9 files changed

+307
-0
lines changed

9 files changed

+307
-0
lines changed

pkg/kubernetes/accesscontrol_clientset.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,22 @@ func (a *AccessControlClientset) NodesLogs(ctx context.Context, name, logPath st
5555
AbsPath(url...), nil
5656
}
5757

58+
func (a *AccessControlClientset) NodesStatsSummary(ctx context.Context, name string) (*rest.Request, error) {
59+
gvk := &schema.GroupVersionKind{Group: "", Version: "v1", Kind: "Node"}
60+
if !isAllowed(a.staticConfig, gvk) {
61+
return nil, isNotAllowedError(gvk)
62+
}
63+
64+
if _, err := a.delegate.CoreV1().Nodes().Get(ctx, name, metav1.GetOptions{}); err != nil {
65+
return nil, fmt.Errorf("failed to get node %s: %w", name, err)
66+
}
67+
68+
url := []string{"api", "v1", "nodes", name, "proxy", "stats", "summary"}
69+
return a.delegate.CoreV1().RESTClient().
70+
Get().
71+
AbsPath(url...), nil
72+
}
73+
5874
func (a *AccessControlClientset) Pods(namespace string) (corev1.PodInterface, error) {
5975
gvk := &schema.GroupVersionKind{Group: "", Version: "v1", Kind: "Pod"}
6076
if !isAllowed(a.staticConfig, gvk) {

pkg/kubernetes/nodes.go

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,3 +34,25 @@ func (k *Kubernetes) NodesLog(ctx context.Context, name string, logPath string,
3434

3535
return string(rawData), nil
3636
}
37+
38+
func (k *Kubernetes) NodesStatsSummary(ctx context.Context, name string) (string, error) {
39+
// Use the node proxy API to access stats summary from the kubelet
40+
// This endpoint provides CPU, memory, filesystem, and network statistics
41+
42+
req, err := k.AccessControlClientset().NodesStatsSummary(ctx, name)
43+
if err != nil {
44+
return "", err
45+
}
46+
47+
result := req.Do(ctx)
48+
if result.Error() != nil {
49+
return "", fmt.Errorf("failed to get node stats summary: %w", result.Error())
50+
}
51+
52+
rawData, err := result.Raw()
53+
if err != nil {
54+
return "", fmt.Errorf("failed to read node stats summary response: %w", err)
55+
}
56+
57+
return string(rawData), nil
58+
}

pkg/mcp/nodes_test.go

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,115 @@ func (s *NodesSuite) TestNodesLogDenied() {
200200
})
201201
}
202202

203+
func (s *NodesSuite) TestNodesStatsSummary() {
204+
s.mockServer.Handle(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) {
205+
// Get Node response
206+
if req.URL.Path == "/api/v1/nodes/existing-node" {
207+
w.Header().Set("Content-Type", "application/json")
208+
w.WriteHeader(http.StatusOK)
209+
_, _ = w.Write([]byte(`{
210+
"apiVersion": "v1",
211+
"kind": "Node",
212+
"metadata": {
213+
"name": "existing-node"
214+
}
215+
}`))
216+
return
217+
}
218+
// Get Stats Summary response
219+
if req.URL.Path == "/api/v1/nodes/existing-node/proxy/stats/summary" {
220+
w.Header().Set("Content-Type", "application/json")
221+
w.WriteHeader(http.StatusOK)
222+
_, _ = w.Write([]byte(`{
223+
"node": {
224+
"nodeName": "existing-node",
225+
"cpu": {
226+
"time": "2025-10-27T00:00:00Z",
227+
"usageNanoCores": 1000000000,
228+
"usageCoreNanoSeconds": 5000000000
229+
},
230+
"memory": {
231+
"time": "2025-10-27T00:00:00Z",
232+
"availableBytes": 8000000000,
233+
"usageBytes": 4000000000,
234+
"workingSetBytes": 3500000000
235+
}
236+
},
237+
"pods": []
238+
}`))
239+
return
240+
}
241+
w.WriteHeader(http.StatusNotFound)
242+
}))
243+
s.InitMcpClient()
244+
s.Run("nodes_stats_summary(name=nil)", func() {
245+
toolResult, err := s.CallTool("nodes_stats_summary", map[string]interface{}{})
246+
s.Require().NotNil(toolResult, "toolResult should not be nil")
247+
s.Run("has error", func() {
248+
s.Truef(toolResult.IsError, "call tool should fail")
249+
s.Nilf(err, "call tool should not return error object")
250+
})
251+
s.Run("describes missing name", func() {
252+
expectedMessage := "failed to get node stats summary, missing argument name"
253+
s.Equalf(expectedMessage, toolResult.Content[0].(mcp.TextContent).Text,
254+
"expected descriptive error '%s', got %v", expectedMessage, toolResult.Content[0].(mcp.TextContent).Text)
255+
})
256+
})
257+
s.Run("nodes_stats_summary(name=inexistent-node)", func() {
258+
toolResult, err := s.CallTool("nodes_stats_summary", map[string]interface{}{
259+
"name": "inexistent-node",
260+
})
261+
s.Require().NotNil(toolResult, "toolResult should not be nil")
262+
s.Run("has error", func() {
263+
s.Truef(toolResult.IsError, "call tool should fail")
264+
s.Nilf(err, "call tool should not return error object")
265+
})
266+
s.Run("describes missing node", func() {
267+
expectedMessage := "failed to get node stats summary for inexistent-node: failed to get node inexistent-node: the server could not find the requested resource (get nodes inexistent-node)"
268+
s.Equalf(expectedMessage, toolResult.Content[0].(mcp.TextContent).Text,
269+
"expected descriptive error '%s', got %v", expectedMessage, toolResult.Content[0].(mcp.TextContent).Text)
270+
})
271+
})
272+
s.Run("nodes_stats_summary(name=existing-node)", func() {
273+
toolResult, err := s.CallTool("nodes_stats_summary", map[string]interface{}{
274+
"name": "existing-node",
275+
})
276+
s.Require().NotNil(toolResult, "toolResult should not be nil")
277+
s.Run("no error", func() {
278+
s.Falsef(toolResult.IsError, "call tool should succeed")
279+
s.Nilf(err, "call tool should not return error object")
280+
})
281+
s.Run("returns stats summary", func() {
282+
content := toolResult.Content[0].(mcp.TextContent).Text
283+
s.Containsf(content, "existing-node", "expected stats to contain node name, got %v", content)
284+
s.Containsf(content, "usageNanoCores", "expected stats to contain CPU metrics, got %v", content)
285+
s.Containsf(content, "usageBytes", "expected stats to contain memory metrics, got %v", content)
286+
})
287+
})
288+
}
289+
290+
func (s *NodesSuite) TestNodesStatsSummaryDenied() {
291+
s.Require().NoError(toml.Unmarshal([]byte(`
292+
denied_resources = [ { version = "v1", kind = "Node" } ]
293+
`), s.Cfg), "Expected to parse denied resources config")
294+
s.InitMcpClient()
295+
s.Run("nodes_stats_summary (denied)", func() {
296+
toolResult, err := s.CallTool("nodes_stats_summary", map[string]interface{}{
297+
"name": "does-not-matter",
298+
})
299+
s.Require().NotNil(toolResult, "toolResult should not be nil")
300+
s.Run("has error", func() {
301+
s.Truef(toolResult.IsError, "call tool should fail")
302+
s.Nilf(err, "call tool should not return error object")
303+
})
304+
s.Run("describes denial", func() {
305+
expectedMessage := "failed to get node stats summary for does-not-matter: resource not allowed: /v1, Kind=Node"
306+
s.Equalf(expectedMessage, toolResult.Content[0].(mcp.TextContent).Text,
307+
"expected descriptive error '%s', got %v", expectedMessage, toolResult.Content[0].(mcp.TextContent).Text)
308+
})
309+
})
310+
}
311+
203312
func TestNodes(t *testing.T) {
204313
suite.Run(t, new(NodesSuite))
205314
}

pkg/mcp/testdata/toolsets-core-tools.json

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,29 @@
6767
},
6868
"name": "nodes_log"
6969
},
70+
{
71+
"annotations": {
72+
"title": "Node: Stats Summary",
73+
"readOnlyHint": true,
74+
"destructiveHint": false,
75+
"idempotentHint": false,
76+
"openWorldHint": true
77+
},
78+
"description": "Get detailed resource usage statistics from a Kubernetes node via the kubelet's Summary API. Provides comprehensive metrics including CPU, memory, filesystem, and network usage at the node, pod, and container levels. On systems with cgroup v2 and kernel 4.20+, also includes PSI (Pressure Stall Information) metrics that show resource pressure for CPU, memory, and I/O. See https://kubernetes.io/docs/reference/instrumentation/understand-psi-metrics/ for details on PSI metrics",
79+
"inputSchema": {
80+
"type": "object",
81+
"properties": {
82+
"name": {
83+
"description": "Name of the node to get stats from",
84+
"type": "string"
85+
}
86+
},
87+
"required": [
88+
"name"
89+
]
90+
},
91+
"name": "nodes_stats_summary"
92+
},
7093
{
7194
"annotations": {
7295
"title": "Pods: Delete",

pkg/mcp/testdata/toolsets-full-tools-multicluster-enum.json

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,37 @@
237237
},
238238
"name": "nodes_log"
239239
},
240+
{
241+
"annotations": {
242+
"title": "Node: Stats Summary",
243+
"readOnlyHint": true,
244+
"destructiveHint": false,
245+
"idempotentHint": false,
246+
"openWorldHint": true
247+
},
248+
"description": "Get detailed resource usage statistics from a Kubernetes node via the kubelet's Summary API. Provides comprehensive metrics including CPU, memory, filesystem, and network usage at the node, pod, and container levels. On systems with cgroup v2 and kernel 4.20+, also includes PSI (Pressure Stall Information) metrics that show resource pressure for CPU, memory, and I/O. See https://kubernetes.io/docs/reference/instrumentation/understand-psi-metrics/ for details on PSI metrics",
249+
"inputSchema": {
250+
"type": "object",
251+
"properties": {
252+
"context": {
253+
"description": "Optional parameter selecting which context to run the tool in. Defaults to fake-context if not set",
254+
"enum": [
255+
"extra-cluster",
256+
"fake-context"
257+
],
258+
"type": "string"
259+
},
260+
"name": {
261+
"description": "Name of the node to get stats from",
262+
"type": "string"
263+
}
264+
},
265+
"required": [
266+
"name"
267+
]
268+
},
269+
"name": "nodes_stats_summary"
270+
},
240271
{
241272
"annotations": {
242273
"title": "Pods: Delete",

pkg/mcp/testdata/toolsets-full-tools-multicluster.json

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,33 @@
213213
},
214214
"name": "nodes_log"
215215
},
216+
{
217+
"annotations": {
218+
"title": "Node: Stats Summary",
219+
"readOnlyHint": true,
220+
"destructiveHint": false,
221+
"idempotentHint": false,
222+
"openWorldHint": true
223+
},
224+
"description": "Get detailed resource usage statistics from a Kubernetes node via the kubelet's Summary API. Provides comprehensive metrics including CPU, memory, filesystem, and network usage at the node, pod, and container levels. On systems with cgroup v2 and kernel 4.20+, also includes PSI (Pressure Stall Information) metrics that show resource pressure for CPU, memory, and I/O. See https://kubernetes.io/docs/reference/instrumentation/understand-psi-metrics/ for details on PSI metrics",
225+
"inputSchema": {
226+
"type": "object",
227+
"properties": {
228+
"context": {
229+
"description": "Optional parameter selecting which context to run the tool in. Defaults to fake-context if not set",
230+
"type": "string"
231+
},
232+
"name": {
233+
"description": "Name of the node to get stats from",
234+
"type": "string"
235+
}
236+
},
237+
"required": [
238+
"name"
239+
]
240+
},
241+
"name": "nodes_stats_summary"
242+
},
216243
{
217244
"annotations": {
218245
"title": "Pods: Delete",

pkg/mcp/testdata/toolsets-full-tools-openshift.json

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,29 @@
173173
},
174174
"name": "nodes_log"
175175
},
176+
{
177+
"annotations": {
178+
"title": "Node: Stats Summary",
179+
"readOnlyHint": true,
180+
"destructiveHint": false,
181+
"idempotentHint": false,
182+
"openWorldHint": true
183+
},
184+
"description": "Get detailed resource usage statistics from a Kubernetes node via the kubelet's Summary API. Provides comprehensive metrics including CPU, memory, filesystem, and network usage at the node, pod, and container levels. On systems with cgroup v2 and kernel 4.20+, also includes PSI (Pressure Stall Information) metrics that show resource pressure for CPU, memory, and I/O. See https://kubernetes.io/docs/reference/instrumentation/understand-psi-metrics/ for details on PSI metrics",
185+
"inputSchema": {
186+
"type": "object",
187+
"properties": {
188+
"name": {
189+
"description": "Name of the node to get stats from",
190+
"type": "string"
191+
}
192+
},
193+
"required": [
194+
"name"
195+
]
196+
},
197+
"name": "nodes_stats_summary"
198+
},
176199
{
177200
"annotations": {
178201
"title": "Pods: Delete",

pkg/mcp/testdata/toolsets-full-tools.json

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,29 @@
173173
},
174174
"name": "nodes_log"
175175
},
176+
{
177+
"annotations": {
178+
"title": "Node: Stats Summary",
179+
"readOnlyHint": true,
180+
"destructiveHint": false,
181+
"idempotentHint": false,
182+
"openWorldHint": true
183+
},
184+
"description": "Get detailed resource usage statistics from a Kubernetes node via the kubelet's Summary API. Provides comprehensive metrics including CPU, memory, filesystem, and network usage at the node, pod, and container levels. On systems with cgroup v2 and kernel 4.20+, also includes PSI (Pressure Stall Information) metrics that show resource pressure for CPU, memory, and I/O. See https://kubernetes.io/docs/reference/instrumentation/understand-psi-metrics/ for details on PSI metrics",
185+
"inputSchema": {
186+
"type": "object",
187+
"properties": {
188+
"name": {
189+
"description": "Name of the node to get stats from",
190+
"type": "string"
191+
}
192+
},
193+
"required": [
194+
"name"
195+
]
196+
},
197+
"name": "nodes_stats_summary"
198+
},
176199
{
177200
"annotations": {
178201
"title": "Pods: Delete",

pkg/toolsets/core/nodes.go

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,27 @@ func initNodes() []api.ServerTool {
4444
OpenWorldHint: ptr.To(true),
4545
},
4646
}, Handler: nodesLog},
47+
{Tool: api.Tool{
48+
Name: "nodes_stats_summary",
49+
Description: "Get detailed resource usage statistics from a Kubernetes node via the kubelet's Summary API. Provides comprehensive metrics including CPU, memory, filesystem, and network usage at the node, pod, and container levels. On systems with cgroup v2 and kernel 4.20+, also includes PSI (Pressure Stall Information) metrics that show resource pressure for CPU, memory, and I/O. See https://kubernetes.io/docs/reference/instrumentation/understand-psi-metrics/ for details on PSI metrics",
50+
InputSchema: &jsonschema.Schema{
51+
Type: "object",
52+
Properties: map[string]*jsonschema.Schema{
53+
"name": {
54+
Type: "string",
55+
Description: "Name of the node to get stats from",
56+
},
57+
},
58+
Required: []string{"name"},
59+
},
60+
Annotations: api.ToolAnnotations{
61+
Title: "Node: Stats Summary",
62+
ReadOnlyHint: ptr.To(true),
63+
DestructiveHint: ptr.To(false),
64+
IdempotentHint: ptr.To(false),
65+
OpenWorldHint: ptr.To(true),
66+
},
67+
}, Handler: nodesStatsSummary},
4768
}
4869
}
4970

@@ -78,3 +99,15 @@ func nodesLog(params api.ToolHandlerParams) (*api.ToolCallResult, error) {
7899
}
79100
return api.NewToolCallResult(ret, nil), nil
80101
}
102+
103+
func nodesStatsSummary(params api.ToolHandlerParams) (*api.ToolCallResult, error) {
104+
name, ok := params.GetArguments()["name"].(string)
105+
if !ok || name == "" {
106+
return api.NewToolCallResult("", errors.New("failed to get node stats summary, missing argument name")), nil
107+
}
108+
ret, err := params.NodesStatsSummary(params, name)
109+
if err != nil {
110+
return api.NewToolCallResult("", fmt.Errorf("failed to get node stats summary for %s: %v", name, err)), nil
111+
}
112+
return api.NewToolCallResult(ret, nil), nil
113+
}

0 commit comments

Comments
 (0)