Skip to content

Commit 0626a29

Browse files
authored
feat: add troubleshoot_kubernetes_list_underutilized_pods_by_cpu_quota (#41)
1 parent 732e1ad commit 0626a29

File tree

5 files changed

+214
-0
lines changed

5 files changed

+214
-0
lines changed

AGENTS.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ The handler filters tools dynamically based on `GetMyPermissions` from Sysdig Se
5959
| `troubleshoot_kubernetes_list_top_400_500_http_errors_in_pods` | `tool_troubleshoot_kubernetes_list_top_400_500_http_errors_in_pods.go` | Lists the pods with the highest rate of HTTP 4xx and 5xx errors over a specified time interval. | `promql.exec` | "Show the top 20 pods with the most HTTP errors in cluster 'production'" |
6060
| `troubleshoot_kubernetes_list_top_network_errors_in_pods` | `tool_troubleshoot_kubernetes_list_top_network_errors_in_pods.go` | Shows the top network errors by pod over a given interval. | `promql.exec` | "Show the top 10 pods with the most network errors in cluster 'production'" |
6161
| `troubleshoot_kubernetes_list_count_pods_per_cluster` | `tool_troubleshoot_kubernetes_list_count_pods_per_cluster.go` | List the count of running Kubernetes Pods grouped by cluster and namespace. | `promql.exec` | "List the count of running Kubernetes Pods in cluster 'production'" |
62+
| `troubleshoot_kubernetes_list_underutilized_pods_by_cpu_quota` | `tool_troubleshoot_kubernetes_list_underutilized_pods_by_cpu_quota.go` | List Kubernetes pods with CPU usage below 25% of the quota limit. | `promql.exec` | "Show the top 10 underutilized pods by CPU quota in cluster 'production'" |
6263

6364
Every tool has a companion `_test.go` file that exercises request validation, permission metadata, and Sysdig client calls through mocks.
6465
Note that if you add more tools you need to also update this file to reflect that.

README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,11 @@ The server dynamically filters the available tools based on the permissions asso
158158
- **Required Permission**: `promql.exec`
159159
- **Sample Prompt**: "List the count of running Kubernetes Pods in cluster 'production'"
160160

161+
- **`troubleshoot_kubernetes_list_underutilized_pods_by_cpu_quota`**
162+
- **Description**: List Kubernetes pods with CPU usage below 25% of the quota limit.
163+
- **Required Permission**: `promql.exec`
164+
- **Sample Prompt**: "Show the top 10 underutilized pods by CPU quota in cluster 'production'"
165+
161166
## Requirements
162167

163168
- [Go](https://go.dev/doc/install) 1.25 or higher (if running without Docker).

cmd/server/main.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ func setupHandler(sysdigClient sysdig.ExtendedClientWithResponsesInterface) *mcp
105105
tools.NewTroubleshootKubernetesListTop400500HttpErrorsInPods(sysdigClient),
106106
tools.NewTroubleshootKubernetesListTopNetworkErrorsInPods(sysdigClient),
107107
tools.NewTroubleshootKubernetesListCountPodsPerCluster(sysdigClient),
108+
tools.NewTroubleshootKubernetesListUnderutilizedPodsByCPUQuota(sysdigClient),
108109
)
109110
return handler
110111
}
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
package tools
2+
3+
import (
4+
"context"
5+
"encoding/json"
6+
"fmt"
7+
"io"
8+
"strings"
9+
10+
"github.com/mark3labs/mcp-go/mcp"
11+
"github.com/mark3labs/mcp-go/server"
12+
"github.com/sysdiglabs/sysdig-mcp-server/internal/infra/sysdig"
13+
)
14+
15+
type TroubleshootKubernetesListUnderutilizedPodsByCPUQuota struct {
16+
SysdigClient sysdig.ExtendedClientWithResponsesInterface
17+
}
18+
19+
func NewTroubleshootKubernetesListUnderutilizedPodsByCPUQuota(sysdigClient sysdig.ExtendedClientWithResponsesInterface) *TroubleshootKubernetesListUnderutilizedPodsByCPUQuota {
20+
return &TroubleshootKubernetesListUnderutilizedPodsByCPUQuota{
21+
SysdigClient: sysdigClient,
22+
}
23+
}
24+
25+
func (t *TroubleshootKubernetesListUnderutilizedPodsByCPUQuota) RegisterInServer(s *server.MCPServer) {
26+
tool := mcp.NewTool("troubleshoot_kubernetes_list_underutilized_pods_by_cpu_quota",
27+
mcp.WithDescription("List Kubernetes pods with CPU usage below 25% of the quota limit."),
28+
mcp.WithString("cluster_name", mcp.Description("The name of the cluster to filter by.")),
29+
mcp.WithString("namespace_name", mcp.Description("The name of the namespace to filter by.")),
30+
mcp.WithNumber("limit",
31+
mcp.Description("Maximum number of pods to return."),
32+
mcp.DefaultNumber(10),
33+
),
34+
mcp.WithOutputSchema[map[string]any](),
35+
WithRequiredPermissions(), // FIXME(fede): Add the required permissions. It should be `promql.exec` but somehow the token does not have that permission even if you are able to execute queries.
36+
)
37+
s.AddTool(tool, t.handle)
38+
}
39+
40+
func (t *TroubleshootKubernetesListUnderutilizedPodsByCPUQuota) handle(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
41+
clusterName := mcp.ParseString(request, "cluster_name", "")
42+
namespaceName := mcp.ParseString(request, "namespace_name", "")
43+
limit := mcp.ParseInt(request, "limit", 10)
44+
45+
query := buildUnderutilizedPodsQuery(clusterName, namespaceName)
46+
47+
limitQuery := sysdig.LimitQuery(limit)
48+
params := &sysdig.GetQueryV1Params{
49+
Query: query,
50+
Limit: &limitQuery,
51+
}
52+
53+
httpResp, err := t.SysdigClient.GetQueryV1(ctx, params)
54+
if err != nil {
55+
return mcp.NewToolResultErrorFromErr("failed to get underutilized pod list", err), nil
56+
}
57+
58+
if httpResp.StatusCode != 200 {
59+
bodyBytes, _ := io.ReadAll(httpResp.Body)
60+
return mcp.NewToolResultErrorf("failed to get underutilized pod list: status code %d, body: %s", httpResp.StatusCode, string(bodyBytes)), nil
61+
}
62+
63+
var queryResponse sysdig.QueryResponseV1
64+
if err := json.NewDecoder(httpResp.Body).Decode(&queryResponse); err != nil {
65+
return mcp.NewToolResultErrorFromErr("failed to decode response", err), nil
66+
}
67+
68+
return mcp.NewToolResultJSON(queryResponse)
69+
}
70+
71+
func buildUnderutilizedPodsQuery(clusterName, namespaceName string) string {
72+
filters := []string{}
73+
if clusterName != "" {
74+
filters = append(filters, fmt.Sprintf("kube_cluster_name=\"%s\"", clusterName))
75+
}
76+
if namespaceName != "" {
77+
filters = append(filters, fmt.Sprintf("kube_namespace_name=\"%s\"", namespaceName))
78+
}
79+
80+
filterString := ""
81+
if len(filters) > 0 {
82+
filterString = fmt.Sprintf("{%s}", strings.Join(filters, ","))
83+
}
84+
85+
return fmt.Sprintf("sum by (kube_cluster_name, kube_namespace_name, kube_pod_name)(sysdig_container_cpu_cores_used%s) / (sum by (kube_cluster_name, kube_namespace_name, kube_pod_name)(sysdig_container_cpu_cores_quota_limit%s) > 0) < 0.25", filterString, filterString)
86+
}
Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
package tools_test
2+
3+
import (
4+
"bytes"
5+
"context"
6+
"io"
7+
"net/http"
8+
9+
"github.com/mark3labs/mcp-go/mcp"
10+
"github.com/mark3labs/mcp-go/server"
11+
. "github.com/onsi/ginkgo/v2"
12+
. "github.com/onsi/gomega"
13+
"github.com/sysdiglabs/sysdig-mcp-server/internal/infra/mcp/tools"
14+
"github.com/sysdiglabs/sysdig-mcp-server/internal/infra/sysdig"
15+
"github.com/sysdiglabs/sysdig-mcp-server/internal/infra/sysdig/mocks"
16+
"go.uber.org/mock/gomock"
17+
)
18+
19+
var _ = Describe("TroubleshootKubernetesListUnderutilizedPodsByCPUQuota Tool", func() {
20+
var (
21+
tool *tools.TroubleshootKubernetesListUnderutilizedPodsByCPUQuota
22+
mockSysdig *mocks.MockExtendedClientWithResponsesInterface
23+
mcpServer *server.MCPServer
24+
ctrl *gomock.Controller
25+
)
26+
27+
BeforeEach(func() {
28+
ctrl = gomock.NewController(GinkgoT())
29+
mockSysdig = mocks.NewMockExtendedClientWithResponsesInterface(ctrl)
30+
tool = tools.NewTroubleshootKubernetesListUnderutilizedPodsByCPUQuota(mockSysdig)
31+
mcpServer = server.NewMCPServer("test", "test")
32+
tool.RegisterInServer(mcpServer)
33+
})
34+
35+
It("should register successfully in the server", func() {
36+
Expect(mcpServer.GetTool("troubleshoot_kubernetes_list_underutilized_pods_by_cpu_quota")).NotTo(BeNil())
37+
})
38+
39+
When("listing underutilized pods", func() {
40+
DescribeTable("it succeeds", func(ctx context.Context, toolName string, request mcp.CallToolRequest, expectedParamsRequested sysdig.GetQueryV1Params) {
41+
mockSysdig.EXPECT().GetQueryV1(gomock.Any(), &expectedParamsRequested).Return(&http.Response{
42+
StatusCode: http.StatusOK,
43+
Body: io.NopCloser(bytes.NewBufferString(`{"status":"success"}`)),
44+
}, nil)
45+
46+
serverTool := mcpServer.GetTool(toolName)
47+
result, err := serverTool.Handler(ctx, request)
48+
Expect(err).NotTo(HaveOccurred())
49+
50+
resultData, ok := result.Content[0].(mcp.TextContent)
51+
Expect(ok).To(BeTrue())
52+
Expect(resultData.Text).To(MatchJSON(`{"status":"success"}`))
53+
},
54+
Entry(nil,
55+
"troubleshoot_kubernetes_list_underutilized_pods_by_cpu_quota",
56+
mcp.CallToolRequest{
57+
Params: mcp.CallToolParams{
58+
Name: "troubleshoot_kubernetes_list_underutilized_pods_by_cpu_quota",
59+
Arguments: map[string]any{},
60+
},
61+
},
62+
sysdig.GetQueryV1Params{
63+
Query: `sum by (kube_cluster_name, kube_namespace_name, kube_pod_name)(sysdig_container_cpu_cores_used) / (sum by (kube_cluster_name, kube_namespace_name, kube_pod_name)(sysdig_container_cpu_cores_quota_limit) > 0) < 0.25`,
64+
Limit: asPtr(sysdig.LimitQuery(10)),
65+
},
66+
),
67+
Entry(nil,
68+
"troubleshoot_kubernetes_list_underutilized_pods_by_cpu_quota",
69+
mcp.CallToolRequest{
70+
Params: mcp.CallToolParams{
71+
Name: "troubleshoot_kubernetes_list_underutilized_pods_by_cpu_quota",
72+
Arguments: map[string]any{"limit": "20"},
73+
},
74+
},
75+
sysdig.GetQueryV1Params{
76+
Query: `sum by (kube_cluster_name, kube_namespace_name, kube_pod_name)(sysdig_container_cpu_cores_used) / (sum by (kube_cluster_name, kube_namespace_name, kube_pod_name)(sysdig_container_cpu_cores_quota_limit) > 0) < 0.25`,
77+
Limit: asPtr(sysdig.LimitQuery(20)),
78+
},
79+
),
80+
Entry(nil,
81+
"troubleshoot_kubernetes_list_underutilized_pods_by_cpu_quota",
82+
mcp.CallToolRequest{
83+
Params: mcp.CallToolParams{
84+
Name: "troubleshoot_kubernetes_list_underutilized_pods_by_cpu_quota",
85+
Arguments: map[string]any{"cluster_name": "my_cluster"},
86+
},
87+
},
88+
sysdig.GetQueryV1Params{
89+
Query: `sum by (kube_cluster_name, kube_namespace_name, kube_pod_name)(sysdig_container_cpu_cores_used{kube_cluster_name="my_cluster"}) / (sum by (kube_cluster_name, kube_namespace_name, kube_pod_name)(sysdig_container_cpu_cores_quota_limit{kube_cluster_name="my_cluster"}) > 0) < 0.25`,
90+
Limit: asPtr(sysdig.LimitQuery(10)),
91+
},
92+
),
93+
Entry(nil,
94+
"troubleshoot_kubernetes_list_underutilized_pods_by_cpu_quota",
95+
mcp.CallToolRequest{
96+
Params: mcp.CallToolParams{
97+
Name: "troubleshoot_kubernetes_list_underutilized_pods_by_cpu_quota",
98+
Arguments: map[string]any{"namespace_name": "my_namespace"},
99+
},
100+
},
101+
sysdig.GetQueryV1Params{
102+
Query: `sum by (kube_cluster_name, kube_namespace_name, kube_pod_name)(sysdig_container_cpu_cores_used{kube_namespace_name="my_namespace"}) / (sum by (kube_cluster_name, kube_namespace_name, kube_pod_name)(sysdig_container_cpu_cores_quota_limit{kube_namespace_name="my_namespace"}) > 0) < 0.25`,
103+
Limit: asPtr(sysdig.LimitQuery(10)),
104+
},
105+
),
106+
Entry(nil,
107+
"troubleshoot_kubernetes_list_underutilized_pods_by_cpu_quota",
108+
mcp.CallToolRequest{
109+
Params: mcp.CallToolParams{
110+
Name: "troubleshoot_kubernetes_list_underutilized_pods_by_cpu_quota",
111+
Arguments: map[string]any{"cluster_name": "my_cluster", "namespace_name": "my_namespace"},
112+
},
113+
},
114+
sysdig.GetQueryV1Params{
115+
Query: `sum by (kube_cluster_name, kube_namespace_name, kube_pod_name)(sysdig_container_cpu_cores_used{kube_cluster_name="my_cluster",kube_namespace_name="my_namespace"}) / (sum by (kube_cluster_name, kube_namespace_name, kube_pod_name)(sysdig_container_cpu_cores_quota_limit{kube_cluster_name="my_cluster",kube_namespace_name="my_namespace"}) > 0) < 0.25`,
116+
Limit: asPtr(sysdig.LimitQuery(10)),
117+
},
118+
),
119+
)
120+
})
121+
})

0 commit comments

Comments
 (0)