Skip to content

Commit 9b30e41

Browse files
committed
Reduce MCPServer CRD size by using runtime.RawExtension for PodTemplateSpec
The MCPServer CRD was too large (~9500 lines) to apply without server-side apply due to the embedded PodTemplateSpec taking up ~8500 lines. This was causing issues as reported in GitHub issue #2013. Changed the PodTemplateSpec field from a strongly-typed corev1.PodTemplateSpec to runtime.RawExtension, which stores the raw JSON without schema validation at the CRD level. This reduces the CRD size from ~9500 lines to 651 lines (93% reduction). The solution maintains full backwards compatibility - users can still use the same YAML structure. Validation now happens at runtime in the operator, with proper error handling via Kubernetes events and status conditions to notify users when invalid PodTemplateSpec data is provided. Key changes: - Modified MCPServer type to use runtime.RawExtension for PodTemplateSpec - Updated PodTemplateSpecBuilder to unmarshal and validate at runtime - Added event recording and status conditions for validation errors - Added comprehensive tests for invalid PodTemplateSpec scenarios - Fixed race conditions in parallel tests Fixes #2013
1 parent 90993f6 commit 9b30e41

23 files changed

+768
-8524
lines changed

cmd/thv-operator/api/v1alpha1/mcpserver_types.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
package v1alpha1
22

33
import (
4-
corev1 "k8s.io/api/core/v1"
54
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
5+
"k8s.io/apimachinery/pkg/runtime"
66
)
77

88
// Condition types for MCPServer
@@ -85,8 +85,11 @@ type MCPServerSpec struct {
8585
// This allows for customizing the pod configuration beyond what is provided by the other fields.
8686
// Note that to modify the specific container the MCP server runs in, you must specify
8787
// the `mcp` container name in the PodTemplateSpec.
88+
// This field accepts a PodTemplateSpec object as JSON/YAML.
8889
// +optional
89-
PodTemplateSpec *corev1.PodTemplateSpec `json:"podTemplateSpec,omitempty"`
90+
// +kubebuilder:pruning:PreserveUnknownFields
91+
// +kubebuilder:validation:Type=object
92+
PodTemplateSpec *runtime.RawExtension `json:"podTemplateSpec,omitempty"`
9093

9194
// ResourceOverrides allows overriding annotations and labels for resources created by the operator
9295
// +optional

cmd/thv-operator/api/v1alpha1/zz_generated.deepcopy.go

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
package controllers
2+
3+
import (
4+
"encoding/json"
5+
"testing"
6+
7+
"github.com/stretchr/testify/require"
8+
corev1 "k8s.io/api/core/v1"
9+
"k8s.io/apimachinery/pkg/runtime"
10+
)
11+
12+
// podTemplateSpecToRawExtension is a test helper to convert PodTemplateSpec to RawExtension
13+
func podTemplateSpecToRawExtension(t *testing.T, pts *corev1.PodTemplateSpec) *runtime.RawExtension {
14+
t.Helper()
15+
if pts == nil {
16+
return nil
17+
}
18+
raw, err := json.Marshal(pts)
19+
require.NoError(t, err, "Failed to marshal PodTemplateSpec")
20+
return &runtime.RawExtension{Raw: raw}
21+
}

cmd/thv-operator/controllers/mcpserver_controller.go

Lines changed: 132 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import (
2626
"k8s.io/apimachinery/pkg/types"
2727
"k8s.io/apimachinery/pkg/util/intstr"
2828
"k8s.io/client-go/rest"
29+
"k8s.io/client-go/tools/record"
2930
ctrl "sigs.k8s.io/controller-runtime"
3031
"sigs.k8s.io/controller-runtime/pkg/client"
3132
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
@@ -40,6 +41,7 @@ import (
4041
type MCPServerReconciler struct {
4142
client.Client
4243
Scheme *runtime.Scheme
44+
Recorder record.EventRecorder
4345
platformDetector kubernetes.PlatformDetector
4446
detectedPlatform kubernetes.Platform
4547
platformOnce sync.Once
@@ -186,6 +188,15 @@ func (r *MCPServerReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
186188
return ctrl.Result{Requeue: true}, nil
187189
}
188190

191+
// Validate PodTemplateSpec early - before other validations
192+
// This ensures we fail fast if the spec is invalid
193+
if !r.validateAndUpdatePodTemplateStatus(ctx, mcpServer) {
194+
// Invalid PodTemplateSpec - return without error to avoid infinite retries
195+
// The user must fix the spec and the next reconciliation will retry
196+
ctxLogger.Info("Skipping reconciliation due to invalid PodTemplateSpec")
197+
return ctrl.Result{}, nil
198+
}
199+
189200
// Check if MCPToolConfig is referenced and handle it
190201
if err := r.handleToolConfig(ctx, mcpServer); err != nil {
191202
ctxLogger.Error(err, "Failed to handle MCPToolConfig")
@@ -206,6 +217,10 @@ func (r *MCPServerReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
206217
setImageValidationCondition(mcpServer, metav1.ConditionTrue,
207218
mcpv1alpha1.ConditionReasonImageValidationSkipped,
208219
"Image validation was not performed (no enforcement configured)")
220+
// Update status to persist the condition
221+
if statusErr := r.Status().Update(ctx, mcpServer); statusErr != nil {
222+
ctxLogger.Error(statusErr, "Failed to update MCPServer status after image validation")
223+
}
209224
} else if goerr.Is(err, validation.ErrImageInvalid) {
210225
ctxLogger.Error(err, "MCPServer image validation failed", "image", mcpServer.Spec.Image)
211226
// Update status to reflect validation failure
@@ -236,6 +251,10 @@ func (r *MCPServerReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
236251
setImageValidationCondition(mcpServer, metav1.ConditionTrue,
237252
mcpv1alpha1.ConditionReasonImageValidationSuccess,
238253
"Image validation passed - image found in enforced registries")
254+
// Update status to persist the condition
255+
if statusErr := r.Status().Update(ctx, mcpServer); statusErr != nil {
256+
ctxLogger.Error(statusErr, "Failed to update MCPServer status after image validation")
257+
}
239258
}
240259

241260
// Check if the MCPServer instance is marked to be deleted
@@ -406,6 +425,64 @@ func setImageValidationCondition(mcpServer *mcpv1alpha1.MCPServer, status metav1
406425
})
407426
}
408427

428+
// validateAndUpdatePodTemplateStatus validates the PodTemplateSpec and updates the MCPServer status
429+
// with appropriate conditions and events
430+
func (r *MCPServerReconciler) validateAndUpdatePodTemplateStatus(ctx context.Context, mcpServer *mcpv1alpha1.MCPServer) bool {
431+
ctxLogger := log.FromContext(ctx)
432+
433+
// Only validate if PodTemplateSpec is provided
434+
if mcpServer.Spec.PodTemplateSpec == nil || mcpServer.Spec.PodTemplateSpec.Raw == nil {
435+
// No PodTemplateSpec provided, validation passes
436+
return true
437+
}
438+
439+
_, err := NewMCPServerPodTemplateSpecBuilder(mcpServer.Spec.PodTemplateSpec)
440+
if err != nil {
441+
// Record event for invalid PodTemplateSpec
442+
r.Recorder.Eventf(mcpServer, corev1.EventTypeWarning, "InvalidPodTemplateSpec",
443+
"Failed to parse PodTemplateSpec: %v. Deployment blocked until PodTemplateSpec is fixed.", err)
444+
445+
// Set phase and message
446+
mcpServer.Status.Phase = mcpv1alpha1.MCPServerPhaseFailed
447+
mcpServer.Status.Message = fmt.Sprintf("Invalid PodTemplateSpec: %v", err)
448+
449+
// Set condition for invalid PodTemplateSpec
450+
meta.SetStatusCondition(&mcpServer.Status.Conditions, metav1.Condition{
451+
Type: "PodTemplateValid",
452+
Status: metav1.ConditionFalse,
453+
ObservedGeneration: mcpServer.Generation,
454+
Reason: "InvalidPodTemplateSpec",
455+
Message: fmt.Sprintf("Failed to parse PodTemplateSpec: %v. Deployment blocked until fixed.", err),
456+
})
457+
458+
// Update status with the condition
459+
if statusErr := r.Status().Update(ctx, mcpServer); statusErr != nil {
460+
ctxLogger.Error(statusErr, "Failed to update MCPServer status with PodTemplateSpec validation")
461+
return false
462+
}
463+
464+
ctxLogger.Error(err, "PodTemplateSpec validation failed")
465+
return false
466+
}
467+
468+
// Set condition for valid PodTemplateSpec
469+
meta.SetStatusCondition(&mcpServer.Status.Conditions, metav1.Condition{
470+
Type: "PodTemplateValid",
471+
Status: metav1.ConditionTrue,
472+
ObservedGeneration: mcpServer.Generation,
473+
Reason: "ValidPodTemplateSpec",
474+
Message: "PodTemplateSpec is valid",
475+
})
476+
477+
// Update status with the condition
478+
if statusErr := r.Status().Update(ctx, mcpServer); statusErr != nil {
479+
ctxLogger.Error(statusErr, "Failed to update MCPServer status with PodTemplateSpec validation")
480+
}
481+
482+
ctxLogger.V(1).Info("PodTemplateSpec validation completed successfully")
483+
return true
484+
}
485+
409486
// handleRestartAnnotation checks if the restart annotation has been updated and triggers a restart if needed
410487
// Returns true if a restart was triggered and the reconciliation should be requeued
411488
func (r *MCPServerReconciler) handleRestartAnnotation(ctx context.Context, mcpServer *mcpv1alpha1.MCPServer) (bool, error) {
@@ -755,17 +832,22 @@ func (r *MCPServerReconciler) deploymentForMCPServer(ctx context.Context, m *mcp
755832
useConfigMap := os.Getenv("TOOLHIVE_USE_CONFIGMAP") == trueValue
756833
if useConfigMap {
757834
// Also add pod template patch for secrets (same as regular flags approach)
758-
finalPodTemplateSpec := NewMCPServerPodTemplateSpecBuilder(m.Spec.PodTemplateSpec).
759-
WithSecrets(m.Spec.Secrets).
760-
Build()
761-
// Add pod template patch if we have one
762-
if finalPodTemplateSpec != nil {
763-
podTemplatePatch, err := json.Marshal(finalPodTemplateSpec)
764-
if err != nil {
765-
ctxLogger := log.FromContext(ctx)
766-
ctxLogger.Error(err, "Failed to marshal pod template spec")
767-
} else {
768-
args = append(args, fmt.Sprintf("--k8s-pod-patch=%s", string(podTemplatePatch)))
835+
builder, err := NewMCPServerPodTemplateSpecBuilder(m.Spec.PodTemplateSpec)
836+
if err != nil {
837+
ctxLogger := log.FromContext(ctx)
838+
ctxLogger.Error(err, "Invalid PodTemplateSpec in MCPServer spec, continuing without customizations")
839+
// Continue without pod template patch - the deployment will still work
840+
} else {
841+
finalPodTemplateSpec := builder.WithSecrets(m.Spec.Secrets).Build()
842+
// Add pod template patch if we have one
843+
if finalPodTemplateSpec != nil {
844+
podTemplatePatch, err := json.Marshal(finalPodTemplateSpec)
845+
if err != nil {
846+
ctxLogger := log.FromContext(ctx)
847+
ctxLogger.Error(err, "Failed to marshal pod template spec")
848+
} else {
849+
args = append(args, fmt.Sprintf("--k8s-pod-patch=%s", string(podTemplatePatch)))
850+
}
769851
}
770852
}
771853

@@ -812,18 +894,26 @@ func (r *MCPServerReconciler) deploymentForMCPServer(ctx context.Context, m *mcp
812894
defaultSA := mcpServerServiceAccountName(m.Name)
813895
serviceAccount = &defaultSA
814896
}
815-
finalPodTemplateSpec := NewMCPServerPodTemplateSpecBuilder(m.Spec.PodTemplateSpec).
816-
WithServiceAccount(serviceAccount).
817-
WithSecrets(m.Spec.Secrets).
818-
Build()
819-
// Add pod template patch if we have one
820-
if finalPodTemplateSpec != nil {
821-
podTemplatePatch, err := json.Marshal(finalPodTemplateSpec)
822-
if err != nil {
823-
ctxLogger := log.FromContext(ctx)
824-
ctxLogger.Error(err, "Failed to marshal pod template spec")
825-
} else {
826-
args = append(args, fmt.Sprintf("--k8s-pod-patch=%s", string(podTemplatePatch)))
897+
898+
builder, err := NewMCPServerPodTemplateSpecBuilder(m.Spec.PodTemplateSpec)
899+
if err != nil {
900+
ctxLogger := log.FromContext(ctx)
901+
ctxLogger.Error(err, "Invalid PodTemplateSpec in MCPServer spec, continuing without customizations")
902+
// Continue without pod template patch - the deployment will still work
903+
} else {
904+
finalPodTemplateSpec := builder.
905+
WithServiceAccount(serviceAccount).
906+
WithSecrets(m.Spec.Secrets).
907+
Build()
908+
// Add pod template patch if we have one
909+
if finalPodTemplateSpec != nil {
910+
podTemplatePatch, err := json.Marshal(finalPodTemplateSpec)
911+
if err != nil {
912+
ctxLogger := log.FromContext(ctx)
913+
ctxLogger.Error(err, "Failed to marshal pod template spec")
914+
} else {
915+
args = append(args, fmt.Sprintf("--k8s-pod-patch=%s", string(podTemplatePatch)))
916+
}
827917
}
828918
}
829919

@@ -1393,21 +1483,19 @@ func (r *MCPServerReconciler) deploymentNeedsUpdate(
13931483
// TODO: Add more comprehensive checks for PodTemplateSpec changes beyond just the args
13941484
// This would involve comparing the actual pod template spec fields with what would be
13951485
// generated by the operator, rather than just checking the command-line arguments.
1396-
if mcpServer.Spec.PodTemplateSpec != nil {
1397-
podTemplatePatch, err := json.Marshal(mcpServer.Spec.PodTemplateSpec)
1398-
if err == nil {
1399-
podTemplatePatchArg := fmt.Sprintf("--k8s-pod-patch=%s", string(podTemplatePatch))
1400-
found := false
1401-
for _, arg := range container.Args {
1402-
if arg == podTemplatePatchArg {
1403-
found = true
1404-
break
1405-
}
1406-
}
1407-
if !found {
1408-
return true
1486+
if mcpServer.Spec.PodTemplateSpec != nil && mcpServer.Spec.PodTemplateSpec.Raw != nil {
1487+
// Use the raw bytes directly since PodTemplateSpec is now a RawExtension
1488+
podTemplatePatchArg := fmt.Sprintf("--k8s-pod-patch=%s", string(mcpServer.Spec.PodTemplateSpec.Raw))
1489+
found := false
1490+
for _, arg := range container.Args {
1491+
if arg == podTemplatePatchArg {
1492+
found = true
1493+
break
14091494
}
14101495
}
1496+
if !found {
1497+
return true
1498+
}
14111499
}
14121500

14131501
// Check if the container port has changed
@@ -1461,7 +1549,14 @@ func (r *MCPServerReconciler) deploymentNeedsUpdate(
14611549
defaultSA := mcpServerServiceAccountName(mcpServer.Name)
14621550
serviceAccount = &defaultSA
14631551
}
1464-
expectedPodTemplateSpec := NewMCPServerPodTemplateSpecBuilder(mcpServer.Spec.PodTemplateSpec).
1552+
1553+
builder, err := NewMCPServerPodTemplateSpecBuilder(mcpServer.Spec.PodTemplateSpec)
1554+
if err != nil {
1555+
// If we can't parse the PodTemplateSpec, consider it as needing update
1556+
return true
1557+
}
1558+
1559+
expectedPodTemplateSpec := builder.
14651560
WithServiceAccount(serviceAccount).
14661561
WithSecrets(mcpServer.Spec.Secrets).
14671562
Build()
@@ -1527,7 +1622,6 @@ func (r *MCPServerReconciler) deploymentNeedsUpdate(
15271622
if !equalOpenTelemetryArgs(otelConfig, container.Args) {
15281623
return true
15291624
}
1530-
15311625
}
15321626

15331627
// Check if the service account name has changed

0 commit comments

Comments
 (0)