diff --git a/README.md b/README.md
index 303a426a..d4757348 100644
--- a/README.md
+++ b/README.md
@@ -213,6 +213,8 @@ The following sets of tools are available (all on by default):
config
+- **configuration_contexts_list** - List all available context names and associated server urls from the kubeconfig file
+
- **configuration_view** - Get the current Kubernetes configuration content as a kubeconfig YAML
- `minified` (`boolean`) - Return a minified version of the configuration. If set to true, keeps only the current-context and the relevant pieces of the configuration for that context. If set to false, all contexts, clusters, auth-infos, and users are returned in the configuration. (Optional, default true)
@@ -294,6 +296,19 @@ The following sets of tools are available (all on by default):
- `name` (`string`) **(required)** - Name of the resource
- `namespace` (`string`) - Optional Namespace to delete the namespaced resource from (ignored in case of cluster scoped resources). If not provided, will delete resource from configured namespace
+- **plan_mustgather** - Plan for collecting a must-gather archive from an OpenShift cluster, must-gather is a tool for collecting cluster data related to debugging and troubleshooting like logs, kubernetes resources, etc.
+ - `all_component_images` (`boolean`) - Optional when enabled, collects and runs multiple must gathers for all operators and components on the cluster that have an annotated must-gather image available
+ - `gather_command` (`string`) - Optionally specify a custom gather command to run a specialized script, eg. /usr/bin/gather_audit_logs
+ - `host_network` (`boolean`) - Optionally run the must-gather pods in the host network of the node. This is only relevant if a specific gather image needs to capture host-level data
+ - `images` (`array`) - Optional list of images to use for gathering custom information about specific operators or cluster components. If not specified, OpenShift's default must-gather image will be used by default
+ - `keep_namespace` (`boolean`) - Optional to retain all temporary resources when the mustgather completes, otherwise temporary resources created will be cleaned up
+ - `namespace` (`string`) - Optional to specify an existing privileged namespace where must-gather pods should run. If not provided, a temporary namespace will be created
+ - `node_name` (`string`) - Optional node to run the mustgather pod. If not provided, a random control-plane node will be selected automatically
+ - `node_selector` (`string`) - Optional node label selector to use, only relevant when specifying a command and image which needs to capture data on a set of cluster nodes simultaneously
+ - `since` (`string`) - Optional to collect logs newer than a relative duration like 5s, 2m5s, or 3h6m10s. If unspecified, all available logs will be collected
+ - `source_dir` (`string`) - Optional to set a specific directory where the pod will copy gathered data from
+ - `timeout` (`string`) - Timeout of the gather process eg. 30s, 6m20s, or 2h10m30s
+
diff --git a/pkg/kubernetes/resources.go b/pkg/kubernetes/resources.go
index 1f559e12..d816ed3d 100644
--- a/pkg/kubernetes/resources.go
+++ b/pkg/kubernetes/resources.go
@@ -3,10 +3,11 @@ package kubernetes
import (
"context"
"fmt"
- "k8s.io/apimachinery/pkg/runtime"
"regexp"
"strings"
+ "k8s.io/apimachinery/pkg/runtime"
+
"github.com/containers/kubernetes-mcp-server/pkg/version"
authv1 "k8s.io/api/authorization/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -36,7 +37,7 @@ func (k *Kubernetes) ResourcesList(ctx context.Context, gvk *schema.GroupVersion
// Check if operation is allowed for all namespaces (applicable for namespaced resources)
isNamespaced, _ := k.isNamespaced(gvk)
- if isNamespaced && !k.canIUse(ctx, gvr, namespace, "list") && namespace == "" {
+ if isNamespaced && !k.CanIUse(ctx, gvr, namespace, "list") && namespace == "" {
namespace = k.manager.configuredNamespace()
}
if options.AsTable {
@@ -187,7 +188,7 @@ func (k *Kubernetes) supportsGroupVersion(groupVersion string) bool {
return true
}
-func (k *Kubernetes) canIUse(ctx context.Context, gvr *schema.GroupVersionResource, namespace, verb string) bool {
+func (k *Kubernetes) CanIUse(ctx context.Context, gvr *schema.GroupVersionResource, namespace, verb string) bool {
accessReviews, err := k.manager.accessControlClientSet.SelfSubjectAccessReviews()
if err != nil {
return false
diff --git a/pkg/toolsets/core/mustgather.go b/pkg/toolsets/core/mustgather.go
new file mode 100644
index 00000000..dfc17499
--- /dev/null
+++ b/pkg/toolsets/core/mustgather.go
@@ -0,0 +1,512 @@
+package core
+
+import (
+ "context"
+ "fmt"
+ "path"
+ "strings"
+ "time"
+
+ "github.com/containers/kubernetes-mcp-server/pkg/api"
+ internalk8s "github.com/containers/kubernetes-mcp-server/pkg/kubernetes"
+ "github.com/google/jsonschema-go/jsonschema"
+ corev1 "k8s.io/api/core/v1"
+ rbacv1 "k8s.io/api/rbac/v1"
+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+ "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
+ "k8s.io/apimachinery/pkg/runtime"
+ "k8s.io/apimachinery/pkg/runtime/schema"
+ "k8s.io/apimachinery/pkg/util/rand"
+ "k8s.io/utils/ptr"
+ "sigs.k8s.io/yaml"
+)
+
+const (
+ defaultGatherSourceDir = "/must-gather/"
+ defaultMustGatherImage = "registry.redhat.io/openshift4/ose-must-gather:latest"
+ defaultGatherCmd = "/usr/bin/gather"
+ mgAnnotation = "operators.openshift.io/must-gather-image"
+ maxConcurrentGathers = 8
+)
+
+func initMustGatherPlan(o internalk8s.Openshift) []api.ServerTool {
+ // must-gather collection plan is only applicable to OpenShift clusters
+ if !o.IsOpenShift(context.Background()) {
+ return []api.ServerTool{}
+ }
+
+ return []api.ServerTool{{
+ Tool: api.Tool{
+ Name: "plan_mustgather",
+ Description: "Plan for collecting a must-gather archive from an OpenShift cluster, must-gather is a tool for collecting cluster data related to debugging and troubleshooting like logs, kubernetes resources, etc.",
+ InputSchema: &jsonschema.Schema{
+ Type: "object",
+ Properties: map[string]*jsonschema.Schema{
+ "node_name": {
+ Type: "string",
+ Description: "Optional node to run the mustgather pod. If not provided, a random control-plane node will be selected automatically",
+ },
+ "node_selector": {
+ Type: "string",
+ Description: "Optional node label selector to use, only relevant when specifying a command and image which needs to capture data on a set of cluster nodes simultaneously",
+ },
+ "host_network": {
+ Type: "boolean",
+ Description: "Optionally run the must-gather pods in the host network of the node. This is only relevant if a specific gather image needs to capture host-level data",
+ },
+ "gather_command": {
+ Type: "string",
+ Description: "Optionally specify a custom gather command to run a specialized script, eg. /usr/bin/gather_audit_logs",
+ Default: api.ToRawMessage("/usr/bin/gather"),
+ },
+ "all_component_images": {
+ Type: "boolean",
+ Description: "Optional when enabled, collects and runs multiple must gathers for all operators and components on the cluster that have an annotated must-gather image available",
+ },
+ "images": {
+ Type: "array",
+ Description: "Optional list of images to use for gathering custom information about specific operators or cluster components. If not specified, OpenShift's default must-gather image will be used by default",
+ Items: &jsonschema.Schema{
+ Type: "string",
+ },
+ },
+ "source_dir": {
+ Type: "string",
+ Description: "Optional to set a specific directory where the pod will copy gathered data from",
+ Default: api.ToRawMessage("/must-gather"),
+ },
+ "timeout": {
+ Type: "string",
+ Description: "Timeout of the gather process eg. 30s, 6m20s, or 2h10m30s",
+ },
+ "namespace": {
+ Type: "string",
+ Description: "Optional to specify an existing privileged namespace where must-gather pods should run. If not provided, a temporary namespace will be created",
+ },
+ "keep_resources": {
+ Type: "boolean",
+ Description: "Optional to retain all temporary resources when the mustgather completes, otherwise temporary resources created will be advised to be cleaned up",
+ },
+ "since": {
+ Type: "string",
+ Description: "Optional to collect logs newer than a relative duration like 5s, 2m5s, or 3h6m10s. If unspecified, all available logs will be collected",
+ },
+ },
+ },
+ Annotations: api.ToolAnnotations{
+ Title: "MustGather: Plan",
+ ReadOnlyHint: ptr.To(true),
+ DestructiveHint: ptr.To(false),
+ IdempotentHint: ptr.To(false),
+ OpenWorldHint: ptr.To(true),
+ },
+ },
+
+ Handler: mustGatherPlan,
+ }}
+}
+
+func mustGatherPlan(params api.ToolHandlerParams) (*api.ToolCallResult, error) {
+ args := params.GetArguments()
+
+ var nodeName, sourceDir, namespace, gatherCmd, timeout, since string
+ var hostNetwork, keepResources, allImages bool
+ var images []string
+ var nodeSelector map[string]string
+
+ if args["node_name"] != nil {
+ nodeName = args["node_name"].(string)
+ }
+
+ if args["node_selector"] != nil {
+ nodeSelector = parseNodeSelector(args["node_selector"].(string))
+ }
+
+ if args["host_network"] != nil {
+ hostNetwork = args["host_network"].(bool)
+ }
+
+ sourceDir = defaultGatherSourceDir
+ if args["source_dir"] != nil {
+ sourceDir = path.Clean(args["source_dir"].(string))
+ }
+
+ namespace = fmt.Sprintf("openshift-must-gather-%s", rand.String(6))
+ if args["namespace"] != nil {
+ namespace = args["namespace"].(string)
+ }
+
+ if args["keep_resources"] != nil {
+ keepResources = args["keep_resources"].(bool)
+ }
+
+ gatherCmd = defaultGatherCmd
+ if args["gather_command"] != nil {
+ gatherCmd = args["gather_command"].(string)
+ }
+
+ if args["all_component_images"] != nil {
+ allImages = args["all_component_images"].(bool)
+ }
+
+ if args["images"] != nil {
+ if imagesArg, ok := args["images"].([]interface{}); ok {
+ for _, img := range imagesArg {
+ if imgStr, ok := img.(string); ok {
+ images = append(images, imgStr)
+ }
+ }
+ }
+ }
+
+ if allImages {
+ componentImages, err := getComponentImages(params)
+ if err != nil {
+ return api.NewToolCallResult("",
+ fmt.Errorf("failed to get operator images: %v", err),
+ ), nil
+ }
+
+ images = append(images, componentImages...)
+ }
+
+ if len(images) > maxConcurrentGathers {
+ return api.NewToolCallResult("",
+ fmt.Errorf("more than %d gather images are not supported", maxConcurrentGathers),
+ ), nil
+ }
+
+ if args["timeout"] != nil {
+ timeout = args["timeout"].(string)
+
+ _, err := time.ParseDuration(timeout)
+ if err != nil {
+ return api.NewToolCallResult("", fmt.Errorf("timeout duration is not valid")), nil
+ }
+
+ gatherCmd = fmt.Sprintf("/usr/bin/timeout %s %s", timeout, gatherCmd)
+ }
+
+ if args["since"] != nil {
+ since = args["since"].(string)
+
+ _, err := time.ParseDuration(since)
+ if err != nil {
+ return api.NewToolCallResult("", fmt.Errorf("since duration is not valid")), nil
+ }
+ }
+
+ envVars := []corev1.EnvVar{}
+ if since != "" {
+ envVars = append(envVars, corev1.EnvVar{
+ Name: "MUST_GATHER_SINCE",
+ Value: since,
+ })
+ }
+
+ // template container for gather,
+ // if multiple images are added multiple containers in the same pod will be spin up
+ gatherContainerTemplate := corev1.Container{
+ Name: "gather",
+ Image: defaultMustGatherImage,
+ ImagePullPolicy: corev1.PullIfNotPresent,
+ Command: []string{gatherCmd},
+ Env: envVars,
+ VolumeMounts: []corev1.VolumeMount{
+ {
+ Name: "must-gather-output",
+ MountPath: sourceDir,
+ },
+ },
+ }
+
+ var gatherContainers = []corev1.Container{
+ *gatherContainerTemplate.DeepCopy(),
+ }
+
+ if len(images) > 0 {
+ gatherContainers = make([]corev1.Container, len(images))
+ }
+
+ for i, image := range images {
+ gatherContainers[i] = *gatherContainerTemplate.DeepCopy()
+
+ // if more than one gather container(s) are added,
+ // suffix container name with int id
+ if len(images) > 1 {
+ gatherContainers[i].Name = fmt.Sprintf("gather-%d", i+1)
+ }
+ gatherContainers[i].Image = image
+ }
+
+ serviceAccountName := "must-gather-collector"
+
+ pod := &corev1.Pod{
+ TypeMeta: metav1.TypeMeta{
+ APIVersion: "v1",
+ Kind: "Pod",
+ },
+ ObjectMeta: metav1.ObjectMeta{
+ // Avoiding generateName as resources_create_or_update fails without explicit name.
+ Name: fmt.Sprintf("must-gather-%s", rand.String(6)),
+ Namespace: namespace,
+ },
+ Spec: corev1.PodSpec{
+ ServiceAccountName: serviceAccountName,
+ NodeName: nodeName,
+ PriorityClassName: "system-cluster-critical",
+ RestartPolicy: corev1.RestartPolicyNever,
+ Volumes: []corev1.Volume{
+ {
+ Name: "must-gather-output",
+ VolumeSource: corev1.VolumeSource{
+ EmptyDir: &corev1.EmptyDirVolumeSource{},
+ },
+ },
+ },
+ Containers: append(gatherContainers, corev1.Container{
+ Name: "wait",
+ Image: "registry.redhat.io/ubi9/ubi-minimal",
+ ImagePullPolicy: corev1.PullIfNotPresent,
+ Command: []string{"/bin/bash", "-c", "sleep infinity"},
+ VolumeMounts: []corev1.VolumeMount{
+ {
+ Name: "must-gather-output",
+ MountPath: "/must-gather",
+ },
+ },
+ }),
+ HostNetwork: hostNetwork,
+ NodeSelector: nodeSelector,
+ Tolerations: []corev1.Toleration{
+ {
+ Operator: "Exists",
+ },
+ },
+ },
+ }
+
+ namespaceExists := false
+
+ _, err := params.ResourcesGet(params, &schema.GroupVersionKind{
+ Group: "",
+ Version: "v1",
+ Kind: "Namespaces",
+ }, "", namespace)
+ if err == nil {
+ namespaceExists = true
+ }
+
+ var namespaceObj *corev1.Namespace
+ if !namespaceExists {
+ namespaceObj = &corev1.Namespace{
+ TypeMeta: metav1.TypeMeta{
+ APIVersion: "v1",
+ Kind: "Namespace",
+ },
+ ObjectMeta: metav1.ObjectMeta{
+ Name: namespace,
+ },
+ }
+ }
+
+ serviceAccount := &corev1.ServiceAccount{
+ TypeMeta: metav1.TypeMeta{
+ APIVersion: "v1",
+ Kind: "ServiceAccount",
+ },
+ ObjectMeta: metav1.ObjectMeta{
+ Name: serviceAccountName,
+ Namespace: namespace,
+ },
+ }
+
+ clusterRoleBindingName := fmt.Sprintf("%s-must-gather-collector", namespace)
+ clusterRoleBinding := &rbacv1.ClusterRoleBinding{
+ TypeMeta: metav1.TypeMeta{
+ APIVersion: "rbac.authorization.k8s.io/v1",
+ Kind: "ClusterRoleBinding",
+ },
+ ObjectMeta: metav1.ObjectMeta{
+ Name: clusterRoleBindingName,
+ },
+ RoleRef: rbacv1.RoleRef{
+ APIGroup: "rbac.authorization.k8s.io",
+ Kind: "ClusterRole",
+ Name: "cluster-admin",
+ },
+ Subjects: []rbacv1.Subject{
+ {
+ Kind: "ServiceAccount",
+ Name: serviceAccountName,
+ Namespace: namespace,
+ },
+ },
+ }
+
+ allowChecks := map[string]struct {
+ schema.GroupVersionResource
+ name string
+ verb string
+ }{
+ "create_namespace": {
+ GroupVersionResource: schema.GroupVersionResource{Version: "v1", Resource: "namespace"},
+ verb: "create",
+ },
+ "create_serviceaccount": {
+ GroupVersionResource: schema.GroupVersionResource{Version: "v1", Resource: "serviceaccount"},
+ verb: "create",
+ },
+ "create_clusterrolebinding": {
+ GroupVersionResource: schema.GroupVersionResource{Group: "rbac.authorization.k8s.io", Version: "v1", Resource: "clusterrolebindings"},
+ verb: "create",
+ },
+ "create_pod": {
+ GroupVersionResource: schema.GroupVersionResource{Version: "v1", Resource: "pod"},
+ verb: "create",
+ },
+ "use_scc_hostnetwork": {
+ GroupVersionResource: schema.GroupVersionResource{Group: "security.openshift.io", Version: "v1", Resource: "securitycontextconstraints"},
+ name: "hostnetwork-v2",
+ verb: "use",
+ },
+ }
+ isAllowed := make(map[string]bool)
+
+ for k, check := range allowChecks {
+ isAllowed[k] = params.CanIUse(params, &check.GroupVersionResource, "", check.verb)
+ }
+
+ var result strings.Builder
+ result.WriteString("The generated plan contains YAML manifests for must-gather pods and required resources (namespace, serviceaccount, clusterrolebinding). " +
+ "Suggest how the user can apply the manifest and copy results locally (`oc cp` / `kubectl cp`). \n\n",
+ )
+ result.WriteString("Ask the user if they want to apply the plan \n" +
+ "- use the resource_create_or_update tool to apply the manifest \n" +
+ "- alternatively, advise the user to execute `oc apply` / `kubectl apply` instead. \n\n",
+ )
+
+ if !keepResources {
+ result.WriteString("Once the must-gather collection is completed, the user may wish to cleanup the created resources. \n" +
+ "- use the resources_delete tool to delete the namespace and the clusterrolebinding \n" +
+ "- or, execute cleanup using `kubectl delete`. \n\n")
+ }
+
+ if !namespaceExists && isAllowed["create_namespace"] {
+ namespaceYaml, err := yaml.Marshal(namespaceObj)
+ if err != nil {
+ return nil, fmt.Errorf("failed to marshal namespace to yaml: %w", err)
+ }
+
+ result.WriteString("```yaml\n")
+ result.Write(namespaceYaml)
+ result.WriteString("```\n\n")
+ }
+
+ if !namespaceExists && !isAllowed["create_namespace"] {
+ result.WriteString("WARNING: The resources_create_or_update call does not have permission to create namespace(s).\n")
+ }
+
+ // yaml(s) are dumped into individual code blocks of ``` ```
+ // because resources_create_or_update tool call fails when content has more than one more resource,
+ // some models are smart to detect an error and retry with one resource a time though.
+
+ serviceAccountYaml, err := yaml.Marshal(serviceAccount)
+ if err != nil {
+ return nil, fmt.Errorf("failed to marshal service account to yaml: %w", err)
+ }
+ result.WriteString("```yaml\n")
+ result.Write(serviceAccountYaml)
+ result.WriteString("```\n\n")
+
+ if !isAllowed["create_serviceaccount"] {
+ result.WriteString("WARNING: The resources_create_or_update call does not have permission to create serviceaccount(s).\n")
+ }
+
+ clusterRoleBindingYaml, err := yaml.Marshal(clusterRoleBinding)
+ if err != nil {
+ return nil, fmt.Errorf("failed to marshal cluster role binding to yaml: %w", err)
+ }
+
+ result.WriteString("```yaml\n")
+ result.Write(clusterRoleBindingYaml)
+ result.WriteString("```\n\n")
+
+ if !isAllowed["create_clusterrolebinding"] {
+ result.WriteString("WARNING: The resources_create_or_update call does not have permission to create clusterrolebinding(s).\n")
+ }
+
+ podYaml, err := yaml.Marshal(pod)
+ if err != nil {
+ return nil, fmt.Errorf("failed to marshal pod to yaml: %w", err)
+ }
+
+ result.WriteString("```yaml\n")
+ result.Write(podYaml)
+ result.WriteString("```\n")
+
+ if !isAllowed["create_pod"] {
+ result.WriteString("WARNING: The resources_create_or_update call does not have permission to create pod(s).\n")
+ }
+
+ if hostNetwork && !isAllowed["use_scc_hostnetwork"] {
+ result.WriteString("WARNING: The resources_create_or_update call does not have permission to create pod(s) with hostNetwork: true.\n")
+ }
+
+ return api.NewToolCallResult(result.String(), nil), nil
+}
+
+func getComponentImages(params api.ToolHandlerParams) ([]string, error) {
+ var images []string
+ appendImageFromAnnotation := func(obj runtime.Object) error {
+ unstruct, err := runtime.DefaultUnstructuredConverter.ToUnstructured(obj)
+ if err != nil {
+ return err
+ }
+
+ u := unstructured.Unstructured{Object: unstruct}
+ annotations := u.GetAnnotations()
+ if annotations[mgAnnotation] != "" {
+ images = append(images, annotations[mgAnnotation])
+ }
+
+ return nil
+ }
+
+ clusterOperatorsList, err := params.ResourcesList(params, &schema.GroupVersionKind{
+ Group: "config.openshift.io",
+ Version: "v1",
+ Kind: "ClusterOperator",
+ }, "", internalk8s.ResourceListOptions{})
+ if err != nil {
+ return nil, err
+ }
+
+ if err := clusterOperatorsList.EachListItem(appendImageFromAnnotation); err != nil {
+ return images, err
+ }
+
+ csvList, err := params.ResourcesList(params, &schema.GroupVersionKind{
+ Group: "operators.coreos.com",
+ Version: "v1alpha1",
+ Kind: "ClusterServiceVersion",
+ }, "", internalk8s.ResourceListOptions{})
+ if err != nil {
+ return images, err
+ }
+
+ err = csvList.EachListItem(appendImageFromAnnotation)
+ return images, err
+}
+
+func parseNodeSelector(selector string) map[string]string {
+ result := make(map[string]string)
+ pairs := strings.Split(selector, ",")
+ for _, pair := range pairs {
+ kv := strings.SplitN(strings.TrimSpace(pair), "=", 2)
+ if len(kv) == 2 && strings.TrimSpace(kv[0]) != "" {
+ result[strings.TrimSpace(kv[0])] = strings.TrimSpace(kv[1])
+ }
+ }
+ return result
+}
diff --git a/pkg/toolsets/core/toolset.go b/pkg/toolsets/core/toolset.go
index 9f88c7aa..3f156eed 100644
--- a/pkg/toolsets/core/toolset.go
+++ b/pkg/toolsets/core/toolset.go
@@ -26,6 +26,7 @@ func (t *Toolset) GetTools(o internalk8s.Openshift) []api.ServerTool {
initNamespaces(o),
initPods(),
initResources(o),
+ initMustGatherPlan(o),
)
}