Skip to content

Commit 732e1ad

Browse files
authored
feat: add troubleshoot_kubernetes_list_count_pods_per_cluster tool (#40)
1 parent aba00f8 commit 732e1ad

File tree

5 files changed

+214
-0
lines changed

5 files changed

+214
-0
lines changed

AGENTS.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ The handler filters tools dynamically based on `GetMyPermissions` from Sysdig Se
5858
| `troubleshoot_kubernetes_list_top_restarted_pods` | `tool_troubleshoot_kubernetes_list_top_restarted_pods.go` | Lists the pods with the highest number of container restarts. | `promql.exec` | "Show the top 10 pods with the most container restarts in cluster 'production'" |
5959
| `troubleshoot_kubernetes_list_top_400_500_http_errors_in_pods` | `tool_troubleshoot_kubernetes_list_top_400_500_http_errors_in_pods.go` | Lists the pods with the highest rate of HTTP 4xx and 5xx errors over a specified time interval. | `promql.exec` | "Show the top 20 pods with the most HTTP errors in cluster 'production'" |
6060
| `troubleshoot_kubernetes_list_top_network_errors_in_pods` | `tool_troubleshoot_kubernetes_list_top_network_errors_in_pods.go` | Shows the top network errors by pod over a given interval. | `promql.exec` | "Show the top 10 pods with the most network errors in cluster 'production'" |
61+
| `troubleshoot_kubernetes_list_count_pods_per_cluster` | `tool_troubleshoot_kubernetes_list_count_pods_per_cluster.go` | List the count of running Kubernetes Pods grouped by cluster and namespace. | `promql.exec` | "List the count of running Kubernetes Pods in cluster 'production'" |
6162

6263
Every tool has a companion `_test.go` file that exercises request validation, permission metadata, and Sysdig client calls through mocks.
6364
Note that if you add more tools you need to also update this file to reflect that.

README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,11 @@ The server dynamically filters the available tools based on the permissions asso
153153
- **Required Permission**: `promql.exec`
154154
- **Sample Prompt**: "Show the top 10 pods with the most network errors in cluster 'production'"
155155

156+
- **`troubleshoot_kubernetes_list_count_pods_per_cluster`**
157+
- **Description**: List the count of running Kubernetes Pods grouped by cluster and namespace.
158+
- **Required Permission**: `promql.exec`
159+
- **Sample Prompt**: "List the count of running Kubernetes Pods in cluster 'production'"
160+
156161
## Requirements
157162

158163
- [Go](https://go.dev/doc/install) 1.25 or higher (if running without Docker).

cmd/server/main.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ func setupHandler(sysdigClient sysdig.ExtendedClientWithResponsesInterface) *mcp
104104
tools.NewTroubleshootKubernetesListTopRestartedPods(sysdigClient),
105105
tools.NewTroubleshootKubernetesListTop400500HttpErrorsInPods(sysdigClient),
106106
tools.NewTroubleshootKubernetesListTopNetworkErrorsInPods(sysdigClient),
107+
tools.NewTroubleshootKubernetesListCountPodsPerCluster(sysdigClient),
107108
)
108109
return handler
109110
}
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
package tools
2+
3+
import (
4+
"context"
5+
"encoding/json"
6+
"fmt"
7+
"io"
8+
"strings"
9+
10+
"github.com/mark3labs/mcp-go/mcp"
11+
"github.com/mark3labs/mcp-go/server"
12+
"github.com/sysdiglabs/sysdig-mcp-server/internal/infra/sysdig"
13+
)
14+
15+
type TroubleshootKubernetesListCountPodsPerCluster struct {
16+
SysdigClient sysdig.ExtendedClientWithResponsesInterface
17+
}
18+
19+
func NewTroubleshootKubernetesListCountPodsPerCluster(sysdigClient sysdig.ExtendedClientWithResponsesInterface) *TroubleshootKubernetesListCountPodsPerCluster {
20+
return &TroubleshootKubernetesListCountPodsPerCluster{
21+
SysdigClient: sysdigClient,
22+
}
23+
}
24+
25+
func (t *TroubleshootKubernetesListCountPodsPerCluster) RegisterInServer(s *server.MCPServer) {
26+
tool := mcp.NewTool("troubleshoot_kubernetes_list_count_pods_per_cluster",
27+
mcp.WithDescription("List the count of running Kubernetes Pods grouped by cluster and namespace."),
28+
mcp.WithString("cluster_name", mcp.Description("The name of the cluster to filter by.")),
29+
mcp.WithString("namespace_name", mcp.Description("The name of the namespace to filter by.")),
30+
mcp.WithNumber("limit",
31+
mcp.Description("Maximum number of results to return."),
32+
mcp.DefaultNumber(20),
33+
),
34+
mcp.WithOutputSchema[map[string]any](),
35+
WithRequiredPermissions(), // FIXME(fede): Add the required permissions. It should be `promql.exec` but somehow the token does not have that permission even if you are able to execute queries.
36+
)
37+
s.AddTool(tool, t.handle)
38+
}
39+
40+
func (t *TroubleshootKubernetesListCountPodsPerCluster) handle(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
41+
clusterName := mcp.ParseString(request, "cluster_name", "")
42+
namespaceName := mcp.ParseString(request, "namespace_name", "")
43+
limit := mcp.ParseInt(request, "limit", 20)
44+
45+
query := buildKubePodCountQuery(clusterName, namespaceName)
46+
47+
limitQuery := sysdig.LimitQuery(limit)
48+
params := &sysdig.GetQueryV1Params{
49+
Query: query,
50+
Limit: &limitQuery,
51+
}
52+
53+
httpResp, err := t.SysdigClient.GetQueryV1(ctx, params)
54+
if err != nil {
55+
return mcp.NewToolResultErrorFromErr("failed to get pod count", err), nil
56+
}
57+
58+
if httpResp.StatusCode != 200 {
59+
bodyBytes, _ := io.ReadAll(httpResp.Body)
60+
return mcp.NewToolResultErrorf("failed to get pod count: status code %d, body: %s", httpResp.StatusCode, string(bodyBytes)), nil
61+
}
62+
63+
var queryResponse sysdig.QueryResponseV1
64+
if err := json.NewDecoder(httpResp.Body).Decode(&queryResponse); err != nil {
65+
return mcp.NewToolResultErrorFromErr("failed to decode response", err), nil
66+
}
67+
68+
return mcp.NewToolResultJSON(queryResponse)
69+
}
70+
71+
func buildKubePodCountQuery(clusterName, namespaceName string) string {
72+
filters := []string{}
73+
if clusterName != "" {
74+
filters = append(filters, fmt.Sprintf("kube_cluster_name=\"%s\"", clusterName))
75+
}
76+
if namespaceName != "" {
77+
filters = append(filters, fmt.Sprintf("kube_namespace_name=\"%s\"", namespaceName))
78+
}
79+
80+
filterString := ""
81+
if len(filters) > 0 {
82+
filterString = fmt.Sprintf("{%s}", strings.Join(filters, ","))
83+
}
84+
85+
return fmt.Sprintf("sum by (kube_cluster_name, kube_namespace_name) (kube_pod_info%s)", filterString)
86+
}
Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
package tools_test
2+
3+
import (
4+
"bytes"
5+
"context"
6+
"io"
7+
"net/http"
8+
9+
"github.com/mark3labs/mcp-go/mcp"
10+
"github.com/mark3labs/mcp-go/server"
11+
. "github.com/onsi/ginkgo/v2"
12+
. "github.com/onsi/gomega"
13+
"github.com/sysdiglabs/sysdig-mcp-server/internal/infra/mcp/tools"
14+
"github.com/sysdiglabs/sysdig-mcp-server/internal/infra/sysdig"
15+
"github.com/sysdiglabs/sysdig-mcp-server/internal/infra/sysdig/mocks"
16+
"go.uber.org/mock/gomock"
17+
)
18+
19+
var _ = Describe("TroubleshootKubernetesListCountPodsPerCluster Tool", func() {
20+
var (
21+
tool *tools.TroubleshootKubernetesListCountPodsPerCluster
22+
mockSysdig *mocks.MockExtendedClientWithResponsesInterface
23+
mcpServer *server.MCPServer
24+
ctrl *gomock.Controller
25+
)
26+
27+
BeforeEach(func() {
28+
ctrl = gomock.NewController(GinkgoT())
29+
mockSysdig = mocks.NewMockExtendedClientWithResponsesInterface(ctrl)
30+
tool = tools.NewTroubleshootKubernetesListCountPodsPerCluster(mockSysdig)
31+
mcpServer = server.NewMCPServer("test", "test")
32+
tool.RegisterInServer(mcpServer)
33+
})
34+
35+
It("should register successfully in the server", func() {
36+
Expect(mcpServer.GetTool("troubleshoot_kubernetes_list_count_pods_per_cluster")).NotTo(BeNil())
37+
})
38+
39+
When("counting pods", func() {
40+
DescribeTable("it succeeds", func(ctx context.Context, toolName string, request mcp.CallToolRequest, expectedParamsRequested sysdig.GetQueryV1Params) {
41+
mockSysdig.EXPECT().GetQueryV1(gomock.Any(), &expectedParamsRequested).Return(&http.Response{
42+
StatusCode: http.StatusOK,
43+
Body: io.NopCloser(bytes.NewBufferString(`{"status":"success"}`)),
44+
}, nil)
45+
46+
serverTool := mcpServer.GetTool(toolName)
47+
result, err := serverTool.Handler(ctx, request)
48+
Expect(err).NotTo(HaveOccurred())
49+
50+
resultData, ok := result.Content[0].(mcp.TextContent)
51+
Expect(ok).To(BeTrue())
52+
Expect(resultData.Text).To(MatchJSON(`{"status":"success"}`))
53+
},
54+
Entry(nil,
55+
"troubleshoot_kubernetes_list_count_pods_per_cluster",
56+
mcp.CallToolRequest{
57+
Params: mcp.CallToolParams{
58+
Name: "troubleshoot_kubernetes_list_count_pods_per_cluster",
59+
Arguments: map[string]any{},
60+
},
61+
},
62+
sysdig.GetQueryV1Params{
63+
Query: `sum by (kube_cluster_name, kube_namespace_name) (kube_pod_info)`,
64+
Limit: asPtr(sysdig.LimitQuery(20)),
65+
},
66+
),
67+
Entry(nil,
68+
"troubleshoot_kubernetes_list_count_pods_per_cluster",
69+
mcp.CallToolRequest{
70+
Params: mcp.CallToolParams{
71+
Name: "troubleshoot_kubernetes_list_count_pods_per_cluster",
72+
Arguments: map[string]any{"limit": "10"},
73+
},
74+
},
75+
sysdig.GetQueryV1Params{
76+
Query: `sum by (kube_cluster_name, kube_namespace_name) (kube_pod_info)`,
77+
Limit: asPtr(sysdig.LimitQuery(10)),
78+
},
79+
),
80+
Entry(nil,
81+
"troubleshoot_kubernetes_list_count_pods_per_cluster",
82+
mcp.CallToolRequest{
83+
Params: mcp.CallToolParams{
84+
Name: "troubleshoot_kubernetes_list_count_pods_per_cluster",
85+
Arguments: map[string]any{"cluster_name": "my_cluster"},
86+
},
87+
},
88+
sysdig.GetQueryV1Params{
89+
Query: `sum by (kube_cluster_name, kube_namespace_name) (kube_pod_info{kube_cluster_name="my_cluster"})`,
90+
Limit: asPtr(sysdig.LimitQuery(20)),
91+
},
92+
),
93+
Entry(nil,
94+
"troubleshoot_kubernetes_list_count_pods_per_cluster",
95+
mcp.CallToolRequest{
96+
Params: mcp.CallToolParams{
97+
Name: "troubleshoot_kubernetes_list_count_pods_per_cluster",
98+
Arguments: map[string]any{"namespace_name": "my_namespace"},
99+
},
100+
},
101+
sysdig.GetQueryV1Params{
102+
Query: `sum by (kube_cluster_name, kube_namespace_name) (kube_pod_info{kube_namespace_name="my_namespace"})`,
103+
Limit: asPtr(sysdig.LimitQuery(20)),
104+
},
105+
),
106+
Entry(nil,
107+
"troubleshoot_kubernetes_list_count_pods_per_cluster",
108+
mcp.CallToolRequest{
109+
Params: mcp.CallToolParams{
110+
Name: "troubleshoot_kubernetes_list_count_pods_per_cluster",
111+
Arguments: map[string]any{"cluster_name": "my_cluster", "namespace_name": "my_namespace"},
112+
},
113+
},
114+
sysdig.GetQueryV1Params{
115+
Query: `sum by (kube_cluster_name, kube_namespace_name) (kube_pod_info{kube_cluster_name="my_cluster",kube_namespace_name="my_namespace"})`,
116+
Limit: asPtr(sysdig.LimitQuery(20)),
117+
},
118+
),
119+
)
120+
})
121+
})

0 commit comments

Comments
 (0)