Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions api/components/v1alpha1/modelsasservice_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,43 @@ type ModelsAsServiceSpec struct {
// APIKeys contains configuration for API key management.
// +kubebuilder:validation:Optional
APIKeys *APIKeysConfig `json:"apiKeys,omitempty"`

// Telemetry contains configuration for telemetry and metrics collection.
// +kubebuilder:validation:Optional
Telemetry *TelemetryConfig `json:"telemetry,omitempty"`
}

// TelemetryConfig defines configuration for telemetry collection.
type TelemetryConfig struct {
// Metrics contains configuration for metric dimensions/labels.
// +kubebuilder:validation:Optional
Metrics *MetricsConfig `json:"metrics,omitempty"`
}

// MetricsConfig defines which dimensions (labels) are captured in telemetry metrics.
// Each dimension can be enabled or disabled to control metric cardinality and storage costs.
type MetricsConfig struct {
// CaptureOrganization enables the organization_id label on metrics.
// +kubebuilder:default=true
// +kubebuilder:validation:Optional
CaptureOrganization *bool `json:"captureOrganization,omitempty"`

// CaptureUser enables the user label on metrics.
// Note: This is a high-cardinality dimension and may be disabled for privacy (GDPR) compliance.
// +kubebuilder:default=true
// +kubebuilder:validation:Optional
CaptureUser *bool `json:"captureUser,omitempty"`

// CaptureGroup enables the group label on metrics for team-based chargeback.
// Note: This is a high-cardinality dimension and is disabled by default.
// +kubebuilder:default=false
// +kubebuilder:validation:Optional
CaptureGroup *bool `json:"captureGroup,omitempty"`

// CaptureModelUsage enables the model label on metrics.
// +kubebuilder:default=true
// +kubebuilder:validation:Optional
CaptureModelUsage *bool `json:"captureModelUsage,omitempty"`
}

// APIKeysConfig defines configuration options for API key management.
Expand Down
60 changes: 60 additions & 0 deletions api/components/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

37 changes: 37 additions & 0 deletions docs/api-overview.md
Original file line number Diff line number Diff line change
Expand Up @@ -1235,6 +1235,26 @@ _Appears in:_
| `releases` _[ComponentRelease](#componentrelease) array_ | | | |


#### MetricsConfig



MetricsConfig defines which dimensions (labels) are captured in telemetry metrics.
Each dimension can be enabled or disabled to control metric cardinality and storage costs.



_Appears in:_
- [TelemetryConfig](#telemetryconfig)

| Field | Description | Default | Validation |
| --- | --- | --- | --- |
| `captureOrganization` _boolean_ | CaptureOrganization enables the organization_id label on metrics. | true | Optional: \{\} <br /> |
| `captureUser` _boolean_ | CaptureUser enables the user label on metrics.<br />Note: This is a high-cardinality dimension and may be disabled for privacy (GDPR) compliance. | true | Optional: \{\} <br /> |
| `captureGroup` _boolean_ | CaptureGroup enables the group label on metrics for team-based chargeback.<br />Note: This is a high-cardinality dimension and is disabled by default. | false | Optional: \{\} <br /> |
| `captureModelUsage` _boolean_ | CaptureModelUsage enables the model label on metrics. | true | Optional: \{\} <br /> |


#### ModelController


Expand Down Expand Up @@ -1455,6 +1475,7 @@ _Appears in:_
| --- | --- | --- | --- |
| `gatewayRef` _[GatewayRef](#gatewayref)_ | GatewayRef specifies which Gateway (Gateway API) to use for exposing model endpoints.<br />If omitted, defaults to openshift-ingress/maas-default-gateway. | | Optional: \{\} <br /> |
| `apiKeys` _[APIKeysConfig](#apikeysconfig)_ | APIKeys contains configuration for API key management. | | Optional: \{\} <br /> |
| `telemetry` _[TelemetryConfig](#telemetryconfig)_ | Telemetry contains configuration for telemetry and metrics collection. | | Optional: \{\} <br /> |


#### ModelsAsServiceStatus
Expand Down Expand Up @@ -1683,6 +1704,22 @@ _Appears in:_
| `releases` _[ComponentRelease](#componentrelease) array_ | | | |


#### TelemetryConfig



TelemetryConfig defines configuration for telemetry collection.



_Appears in:_
- [ModelsAsServiceSpec](#modelsasservicespec)

| Field | Description | Default | Validation |
| --- | --- | --- | --- |
| `metrics` _[MetricsConfig](#metricsconfig)_ | Metrics contains configuration for metric dimensions/labels. | | Optional: \{\} <br /> |


#### Trainer


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ func (s *componentHandler) NewComponentReconciler(ctx context.Context, mgr ctrl.
// Third-party CRDs that may not be available in all environments.
OwnsGVK(gvk.AuthPolicyv1, reconciler.Dynamic(reconciler.CrdExists(gvk.AuthPolicyv1))).
OwnsGVK(gvk.DestinationRule, reconciler.Dynamic(reconciler.CrdExists(gvk.DestinationRule))).
OwnsGVK(gvk.TelemetryPolicyv1alpha1, reconciler.Dynamic(reconciler.CrdExists(gvk.TelemetryPolicyv1alpha1))).
Watches(
&extv1.CustomResourceDefinition{},
reconciler.WithEventHandler(
Expand All @@ -85,6 +86,7 @@ func (s *componentHandler) NewComponentReconciler(ctx context.Context, mgr ctrl.
)).
// WithAction(releases.NewAction()). // TODO: Do we need this? How to fix annotation of "platform.opendatahub.io/version:0.0.0"
WithAction(configureGatewayNamespaceResources).
WithAction(configureTelemetryPolicy).
WithAction(configureConfigHashAnnotation).
WithAction(deploy.NewAction(
deploy.WithCache(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,123 @@ func configureDestinationRule(log logr.Logger, resource *unstructured.Unstructur
resource.SetNamespace(gatewayNamespace)
}

// configureTelemetryPolicy is a post-render action that creates a TelemetryPolicy
// resource based on the ModelsAsService telemetry configuration.
//
// The TelemetryPolicy is generated programmatically (not from manifests) because
// its content is entirely dynamic based on the spec.telemetry.metrics configuration.
func configureTelemetryPolicy(ctx context.Context, rr *types.ReconciliationRequest) error {
log := logf.FromContext(ctx)

maas, ok := rr.Instance.(*componentApi.ModelsAsService)
if !ok {
return fmt.Errorf("resource instance %v is not a componentApi.ModelsAsService", rr.Instance)
}

gatewayNamespace := maas.Spec.GatewayRef.Namespace
gatewayName := maas.Spec.GatewayRef.Name

// Build the labels map based on telemetry configuration
metricLabels := buildTelemetryLabels(log, maas.Spec.Telemetry)

// Create the TelemetryPolicy resource
telemetryPolicy := &unstructured.Unstructured{
Object: map[string]interface{}{
"apiVersion": "extensions.kuadrant.io/v1alpha1",
"kind": "TelemetryPolicy",
"metadata": map[string]interface{}{
"name": TelemetryPolicyName,
"namespace": gatewayNamespace,
"labels": map[string]interface{}{
"app.kubernetes.io/part-of": "maas-observability",
},
},
"spec": map[string]interface{}{
"targetRef": map[string]interface{}{
"group": "gateway.networking.k8s.io",
"kind": "Gateway",
"name": gatewayName,
},
"metrics": map[string]interface{}{
"default": map[string]interface{}{
"labels": metricLabels,
},
},
},
},
}

log.V(2).Info("Creating TelemetryPolicy",
"name", TelemetryPolicyName,
"namespace", gatewayNamespace,
"targetGateway", gatewayName,
"labels", metricLabels)

// Add to resources for deployment
rr.Resources = append(rr.Resources, *telemetryPolicy)

return nil
}

// buildTelemetryLabels creates the metric labels map based on the telemetry configuration.
// It includes always-on dimensions and configurable dimensions based on MetricsConfig settings.
func buildTelemetryLabels(log logr.Logger, config *componentApi.TelemetryConfig) map[string]interface{} {
// Default values when config is nil or metrics is nil
captureOrganization := true
captureUser := true
captureGroup := false
captureModelUsage := true

if config != nil && config.Metrics != nil {
metrics := config.Metrics
if metrics.CaptureOrganization != nil {
captureOrganization = *metrics.CaptureOrganization
}
if metrics.CaptureUser != nil {
captureUser = *metrics.CaptureUser
}
if metrics.CaptureGroup != nil {
captureGroup = *metrics.CaptureGroup
}
if metrics.CaptureModelUsage != nil {
captureModelUsage = *metrics.CaptureModelUsage
}
}

// Always-on dimensions - essential for billing and access control
labels := map[string]interface{}{
"subscription": "auth.identity.selected_subscription",
"cost_center": "auth.identity.costCenter",
"tier": "auth.identity.tier",
}

// Configurable dimensions
if captureOrganization {
labels["organization_id"] = "auth.identity.organizationId"
}

if captureUser {
labels["user"] = "auth.identity.userid"
}

if captureGroup {
labels["group"] = "auth.identity.group"
}

if captureModelUsage {
labels["model"] = "responseBodyJSON(\"/model\")"
}

log.V(4).Info("Built telemetry labels",
"captureOrganization", captureOrganization,
"captureUser", captureUser,
"captureGroup", captureGroup,
"captureModelUsage", captureModelUsage,
"totalLabels", len(labels))

return labels
}

// configureConfigHashAnnotation adds a hash annotation to the maas-api Deployment
// to trigger rolling restarts when the ConfigMap changes.
// This is necessary because env vars sourced via valueFrom.configMapKeyRef
Expand Down
Loading
Loading