Skip to content

Commit f4e2c9a

Browse files
feat: add configurable telemetry to ModelAsService CRD
1 parent fa1fcdc commit f4e2c9a

File tree

6 files changed

+227
-0
lines changed

6 files changed

+227
-0
lines changed

api/components/v1alpha1/modelsasservice_types.go

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,43 @@ type ModelsAsServiceSpec struct {
6161
// APIKeys contains configuration for API key management.
6262
// +kubebuilder:validation:Optional
6363
APIKeys *APIKeysConfig `json:"apiKeys,omitempty"`
64+
65+
// Telemetry contains configuration for telemetry and metrics collection.
66+
// +kubebuilder:validation:Optional
67+
Telemetry *TelemetryConfig `json:"telemetry,omitempty"`
68+
}
69+
70+
// TelemetryConfig defines configuration for telemetry collection.
71+
type TelemetryConfig struct {
72+
// Metrics contains configuration for metric dimensions/labels.
73+
// +kubebuilder:validation:Optional
74+
Metrics *MetricsConfig `json:"metrics,omitempty"`
75+
}
76+
77+
// MetricsConfig defines which dimensions (labels) are captured in telemetry metrics.
78+
// Each dimension can be enabled or disabled to control metric cardinality and storage costs.
79+
type MetricsConfig struct {
80+
// CaptureOrganization enables the organization_id label on metrics.
81+
// +kubebuilder:default=true
82+
// +kubebuilder:validation:Optional
83+
CaptureOrganization *bool `json:"captureOrganization,omitempty"`
84+
85+
// CaptureUser enables the user label on metrics.
86+
// Note: This is a high-cardinality dimension and may be disabled for privacy (GDPR) compliance.
87+
// +kubebuilder:default=true
88+
// +kubebuilder:validation:Optional
89+
CaptureUser *bool `json:"captureUser,omitempty"`
90+
91+
// CaptureGroup enables the group label on metrics for team-based chargeback.
92+
// Note: This is a high-cardinality dimension and is disabled by default.
93+
// +kubebuilder:default=false
94+
// +kubebuilder:validation:Optional
95+
CaptureGroup *bool `json:"captureGroup,omitempty"`
96+
97+
// CaptureModelUsage enables the model label on metrics.
98+
// +kubebuilder:default=true
99+
// +kubebuilder:validation:Optional
100+
CaptureModelUsage *bool `json:"captureModelUsage,omitempty"`
64101
}
65102

66103
// APIKeysConfig defines configuration options for API key management.

api/components/v1alpha1/zz_generated.deepcopy.go

Lines changed: 60 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

internal/controller/components/modelsasservice/modelsasservice_controller.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ func (s *componentHandler) NewComponentReconciler(ctx context.Context, mgr ctrl.
6060
// Third-party CRDs that may not be available in all environments.
6161
OwnsGVK(gvk.AuthPolicyv1, reconciler.Dynamic(reconciler.CrdExists(gvk.AuthPolicyv1))).
6262
OwnsGVK(gvk.DestinationRule, reconciler.Dynamic(reconciler.CrdExists(gvk.DestinationRule))).
63+
OwnsGVK(gvk.TelemetryPolicyv1alpha1, reconciler.Dynamic(reconciler.CrdExists(gvk.TelemetryPolicyv1alpha1))).
6364
Watches(
6465
&extv1.CustomResourceDefinition{},
6566
reconciler.WithEventHandler(
@@ -85,6 +86,7 @@ func (s *componentHandler) NewComponentReconciler(ctx context.Context, mgr ctrl.
8586
)).
8687
// WithAction(releases.NewAction()). // TODO: Do we need this? How to fix annotation of "platform.opendatahub.io/version:0.0.0"
8788
WithAction(configureGatewayNamespaceResources).
89+
WithAction(configureTelemetryPolicy).
8890
WithAction(configureConfigHashAnnotation).
8991
WithAction(deploy.NewAction(
9092
deploy.WithCache(),

internal/controller/components/modelsasservice/modelsasservice_controller_actions.go

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,123 @@ func configureDestinationRule(log logr.Logger, resource *unstructured.Unstructur
219219
resource.SetNamespace(gatewayNamespace)
220220
}
221221

222+
// configureTelemetryPolicy is a post-render action that creates a TelemetryPolicy
223+
// resource based on the ModelsAsService telemetry configuration.
224+
//
225+
// The TelemetryPolicy is generated programmatically (not from manifests) because
226+
// its content is entirely dynamic based on the spec.telemetry.metrics configuration.
227+
func configureTelemetryPolicy(ctx context.Context, rr *types.ReconciliationRequest) error {
228+
log := logf.FromContext(ctx)
229+
230+
maas, ok := rr.Instance.(*componentApi.ModelsAsService)
231+
if !ok {
232+
return fmt.Errorf("resource instance %v is not a componentApi.ModelsAsService", rr.Instance)
233+
}
234+
235+
gatewayNamespace := maas.Spec.GatewayRef.Namespace
236+
gatewayName := maas.Spec.GatewayRef.Name
237+
238+
// Build the labels map based on telemetry configuration
239+
metricLabels := buildTelemetryLabels(log, maas.Spec.Telemetry)
240+
241+
// Create the TelemetryPolicy resource
242+
telemetryPolicy := &unstructured.Unstructured{
243+
Object: map[string]interface{}{
244+
"apiVersion": "extensions.kuadrant.io/v1alpha1",
245+
"kind": "TelemetryPolicy",
246+
"metadata": map[string]interface{}{
247+
"name": TelemetryPolicyName,
248+
"namespace": gatewayNamespace,
249+
"labels": map[string]interface{}{
250+
"app.kubernetes.io/part-of": "maas-observability",
251+
},
252+
},
253+
"spec": map[string]interface{}{
254+
"targetRef": map[string]interface{}{
255+
"group": "gateway.networking.k8s.io",
256+
"kind": "Gateway",
257+
"name": gatewayName,
258+
},
259+
"metrics": map[string]interface{}{
260+
"default": map[string]interface{}{
261+
"labels": metricLabels,
262+
},
263+
},
264+
},
265+
},
266+
}
267+
268+
log.V(2).Info("Creating TelemetryPolicy",
269+
"name", TelemetryPolicyName,
270+
"namespace", gatewayNamespace,
271+
"targetGateway", gatewayName,
272+
"labels", metricLabels)
273+
274+
// Add to resources for deployment
275+
rr.Resources = append(rr.Resources, *telemetryPolicy)
276+
277+
return nil
278+
}
279+
280+
// buildTelemetryLabels creates the metric labels map based on the telemetry configuration.
281+
// It includes always-on dimensions and configurable dimensions based on MetricsConfig settings.
282+
func buildTelemetryLabels(log logr.Logger, config *componentApi.TelemetryConfig) map[string]interface{} {
283+
// Default values when config is nil or metrics is nil
284+
captureOrganization := true
285+
captureUser := true
286+
captureGroup := false
287+
captureModelUsage := true
288+
289+
if config != nil && config.Metrics != nil {
290+
metrics := config.Metrics
291+
if metrics.CaptureOrganization != nil {
292+
captureOrganization = *metrics.CaptureOrganization
293+
}
294+
if metrics.CaptureUser != nil {
295+
captureUser = *metrics.CaptureUser
296+
}
297+
if metrics.CaptureGroup != nil {
298+
captureGroup = *metrics.CaptureGroup
299+
}
300+
if metrics.CaptureModelUsage != nil {
301+
captureModelUsage = *metrics.CaptureModelUsage
302+
}
303+
}
304+
305+
// Always-on dimensions - essential for billing and access control
306+
labels := map[string]interface{}{
307+
"subscription": "auth.identity.selected_subscription",
308+
"cost_center": "auth.identity.costCenter",
309+
"tier": "auth.identity.tier",
310+
}
311+
312+
// Configurable dimensions
313+
if captureOrganization {
314+
labels["organization_id"] = "auth.identity.organizationId"
315+
}
316+
317+
if captureUser {
318+
labels["user"] = "auth.identity.userid"
319+
}
320+
321+
if captureGroup {
322+
labels["group"] = "auth.identity.group"
323+
}
324+
325+
if captureModelUsage {
326+
labels["model"] = "responseBodyJSON(\"/model\")"
327+
}
328+
329+
log.V(4).Info("Built telemetry labels",
330+
"captureOrganization", captureOrganization,
331+
"captureUser", captureUser,
332+
"captureGroup", captureGroup,
333+
"captureModelUsage", captureModelUsage,
334+
"totalLabels", len(labels))
335+
336+
return labels
337+
}
338+
222339
// configureConfigHashAnnotation adds a hash annotation to the maas-api Deployment
223340
// to trigger rolling restarts when the ConfigMap changes.
224341
// This is necessary because env vars sourced via valueFrom.configMapKeyRef

internal/controller/components/modelsasservice/modelsasservice_support.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,11 @@ const (
4545
// the same namespace as the gateway it targets.
4646
GatewayDestinationRuleName = "maas-api-backend-tls"
4747

48+
// TelemetryPolicyName is the name of the TelemetryPolicy resource that
49+
// configures metric labels for the MaaS gateway. This resource needs to be
50+
// deployed to the same namespace as the gateway it targets.
51+
TelemetryPolicyName = "maas-telemetry"
52+
4853
// MaaSParametersConfigMapName is the name of the ConfigMap that stores
4954
// MaaS configuration parameters (generated by kustomize from params.env).
5055
MaaSParametersConfigMapName = "maas-parameters"

pkg/cluster/gvk/gvk.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -675,6 +675,12 @@ var (
675675
Kind: "RateLimitPolicy",
676676
}
677677

678+
TelemetryPolicyv1alpha1 = schema.GroupVersionKind{
679+
Group: "extensions.kuadrant.io",
680+
Version: "v1alpha1",
681+
Kind: "TelemetryPolicy",
682+
}
683+
678684
AuthConfigv1beta3 = schema.GroupVersionKind{
679685
Group: "authorino.kuadrant.io",
680686
Version: "v1beta3",

0 commit comments

Comments
 (0)