From 9decafd6bff8a6252613f00b1fa31409a8be9545 Mon Sep 17 00:00:00 2001 From: Rui Vieira Date: Mon, 7 Oct 2024 09:43:14 +0100 Subject: [PATCH 1/2] Add KServe Logger TLS bundle support --- controllers/config_maps.go | 94 +++++++++++++++++++ controllers/deployment.go | 42 ++++++++- controllers/inference_services.go | 42 ++++++++- .../templates/service/deployment.tmpl.yaml | 14 +++ controllers/trustyaiservice_controller.go | 16 ++++ 5 files changed, 203 insertions(+), 5 deletions(-) diff --git a/controllers/config_maps.go b/controllers/config_maps.go index a4885ab46..3ce84d8c2 100644 --- a/controllers/config_maps.go +++ b/controllers/config_maps.go @@ -2,13 +2,29 @@ package controllers import ( "context" + "encoding/json" "fmt" + corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/log" ) +// KServe InferenceLogger config +type LoggerConfig struct { + Image string `json:"image"` + MemoryRequest string `json:"memoryRequest"` + MemoryLimit string `json:"memoryLimit"` + CpuRequest string `json:"cpuRequest"` + CpuLimit string `json:"cpuLimit"` + DefaultUrl string `json:"defaultUrl"` + CaBundle *string `json:"caBundle,omitempty"` + CaCertFile *string `json:"caCertFile,omitempty"` +} + // getImageFromConfigMap gets a custom image value from a ConfigMap in the operator's namespace func (r *TrustyAIServiceReconciler) getImageFromConfigMap(ctx context.Context, key string, defaultImage string) (string, error) { if r.Namespace != "" { @@ -101,3 +117,81 @@ func (r *TrustyAIServiceReconciler) getConfigMapNamesWithLabel(ctx context.Conte return names, nil } + +func (r *TrustyAIServiceReconciler) getKServeLoggerConfig(ctx context.Context) (*LoggerConfig, error) { + if r.Namespace == "" { + return nil, nil + } + + configMapKey := types.NamespacedName{ + Namespace: r.Namespace, + Name: "inferenceservice-config", + } + + var cm corev1.ConfigMap + + if err := r.Get(ctx, configMapKey, &cm); err != nil { + if errors.IsNotFound(err) { + // ConfigMap not found + return nil, nil + } + return nil, fmt.Errorf("error reading configmap %s: %v", configMapKey, err) + } + + loggerData, ok := cm.Data["logger"] + if !ok { + log.FromContext(ctx).Info("No KServe logger key found in inferenceservice-config") + return nil, nil + } + + var loggerConfig LoggerConfig + err := json.Unmarshal([]byte(loggerData), &loggerConfig) + if err != nil { + return nil, fmt.Errorf("error parsing logger config: %v", err) + } + + return &loggerConfig, nil +} + +// createOrUpdateLoggerCaConfigMap creates or updates the KServe InferenceLogger CA bundle based on the KServe global configuration +func (r *TrustyAIServiceReconciler) createOrUpdateLoggerCaConfigMap(ctx context.Context, namespace string, loggerConfig *LoggerConfig) error { + if loggerConfig.CaBundle == nil { + return fmt.Errorf("loggerConfig.CaBundle is nil") + } + configMapName := *loggerConfig.CaBundle + + data := map[string]string{} + if loggerConfig.CaCertFile != nil { + data[*loggerConfig.CaCertFile] = "" + } + + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: configMapName, + Namespace: namespace, + Annotations: map[string]string{ + "service.beta.openshift.io/inject-cabundle": "true", + "service.beta.openshift.io/inject-cabundle-name": *loggerConfig.CaCertFile, + }, + }, + Data: data, + } + + existingCM := &corev1.ConfigMap{} + err := r.Get(ctx, types.NamespacedName{Name: configMapName, Namespace: namespace}, existingCM) + if err != nil && errors.IsNotFound(err) { + if err := r.Create(ctx, cm); err != nil { + return fmt.Errorf("failed to create ConfigMap %s: %v", configMapName, err) + } + } else if err != nil { + return fmt.Errorf("failed to get ConfigMap %s: %v", configMapName, err) + } else { + existingCM.Data = cm.Data + existingCM.Annotations = cm.Annotations + if err := r.Update(ctx, existingCM); err != nil { + return fmt.Errorf("failed to update ConfigMap %s: %v", configMapName, err) + } + } + + return nil +} diff --git a/controllers/deployment.go b/controllers/deployment.go index 3b8d8f5e7..8dd848ff9 100644 --- a/controllers/deployment.go +++ b/controllers/deployment.go @@ -41,6 +41,9 @@ type DeploymentConfig struct { Version string BatchSize int UseDBTLSCerts bool + ConfigMapExists bool + ConfigMapName string + CertName string } // createDeploymentObject returns a Deployment for the TrustyAI Service instance @@ -61,6 +64,41 @@ func (r *TrustyAIServiceReconciler) createDeploymentObject(ctx context.Context, log.FromContext(ctx).Error(err, "Error getting OAuth image from ConfigMap. Using the default image value of "+defaultOAuthProxyImage) } + // Initialize ConfigMapExists, ConfigMapName, and CertName + var configMapExists bool + var configMapName string + var certName string + + // Get loggerConfig + loggerConfig, err := r.getKServeLoggerConfig(ctx) + if err != nil { + log.FromContext(ctx).Error(err, "Could not read logger configuration") + // Proceed without ConfigMap + } + + if loggerConfig != nil && loggerConfig.CaBundle != nil { + configMapName = *loggerConfig.CaBundle + // Check if ConfigMap exists + configMap := &corev1.ConfigMap{} + err := r.Get(ctx, types.NamespacedName{Name: configMapName, Namespace: instance.Namespace}, configMap) + if err != nil && errors.IsNotFound(err) { + // ConfigMap does not exist + configMapExists = false + } else if err != nil { + // Error occurred + log.FromContext(ctx).Error(err, "Error getting ConfigMap") + return nil, err + } else { + // ConfigMap exists + configMapExists = true + if loggerConfig.CaCertFile != nil { + certName = *loggerConfig.CaCertFile + } + } + } else { + configMapExists = false + } + deploymentConfig := DeploymentConfig{ Instance: instance, ServiceImage: serviceImage, @@ -71,8 +109,10 @@ func (r *TrustyAIServiceReconciler) createDeploymentObject(ctx context.Context, CustomCertificatesBundle: caBunble, Version: Version, BatchSize: batchSize, + ConfigMapExists: configMapExists, + ConfigMapName: configMapName, + CertName: certName, } - if instance.Spec.Storage.IsStorageDatabase() { _, err := r.getSecret(ctx, instance.Name+"-db-tls", instance.Namespace) if err != nil { diff --git a/controllers/inference_services.go b/controllers/inference_services.go index b49a52677..e96d71d0c 100644 --- a/controllers/inference_services.go +++ b/controllers/inference_services.go @@ -3,13 +3,16 @@ package controllers import ( "context" "fmt" + "strings" + kservev1beta1 "github.com/kserve/kserve/pkg/apis/serving/v1beta1" trustyaiopendatahubiov1alpha1 "github.com/trustyai-explainability/trustyai-service-operator/api/v1alpha1" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/log" - "strings" ) const ( @@ -227,7 +230,33 @@ func (r *TrustyAIServiceReconciler) handleInferenceServices(ctx context.Context, } } if kServeServerlessEnabled { - err := r.patchKServe(ctx, instance, infService, namespace, crName, remove) + loggerConfig, err := r.getKServeLoggerConfig(ctx) + if err != nil { + log.FromContext(ctx).Error(err, "Could not read logger configuration") + return false, err + } + + useTLS := false + + if loggerConfig != nil && loggerConfig.CaBundle != nil { + // Check if the ConfigMap exists + caConfigMap := &corev1.ConfigMap{} + err = r.Get(ctx, types.NamespacedName{Name: *loggerConfig.CaBundle, Namespace: instance.Namespace}, caConfigMap) + if err == nil { + useTLS = true + } else if errors.IsNotFound(err) { + useTLS = false + log.FromContext(ctx).Info("CA ConfigMap not found, proceeding without TLS") + } else { + log.FromContext(ctx).Error(err, "Error fetching CA ConfigMap") + return false, err + } + } else { + // If no CaBundle is specified, proceed without TLS + useTLS = false + } + + err = r.patchKServe(ctx, instance, infService, namespace, crName, remove, useTLS) if err != nil { log.FromContext(ctx).Error(err, "could not patch InferenceLogger for KServe deployment") return false, err @@ -238,9 +267,14 @@ func (r *TrustyAIServiceReconciler) handleInferenceServices(ctx context.Context, } // patchKServe adds a TrustyAI service as an InferenceLogger to a KServe InferenceService -func (r *TrustyAIServiceReconciler) patchKServe(ctx context.Context, instance *trustyaiopendatahubiov1alpha1.TrustyAIService, infService kservev1beta1.InferenceService, namespace string, crName string, remove bool) error { +func (r *TrustyAIServiceReconciler) patchKServe(ctx context.Context, instance *trustyaiopendatahubiov1alpha1.TrustyAIService, infService kservev1beta1.InferenceService, namespace string, crName string, remove bool, useTLS bool) error { - url := generateNonTLSServiceURL(crName, namespace) + var url string + if useTLS { + url = generateTLSServiceURL(crName, namespace) + } else { + url = generateNonTLSServiceURL(crName, namespace) + } if remove { if infService.Spec.Predictor.Logger == nil || *infService.Spec.Predictor.Logger.URL != url { diff --git a/controllers/templates/service/deployment.tmpl.yaml b/controllers/templates/service/deployment.tmpl.yaml index f1e429f19..c8ab7fd0a 100644 --- a/controllers/templates/service/deployment.tmpl.yaml +++ b/controllers/templates/service/deployment.tmpl.yaml @@ -122,6 +122,10 @@ spec: - name: STORAGE_MIGRATION_CONFIG_FROM_FILENAME value: {{ .Instance.Spec.Data.Filename }} {{ end }} + {{ if .CertName }} + - name: KSERVE_LOGGER_CA_CERT + value: {{ .CertName }} + {{ end }} volumeMounts: - name: {{ .Instance.Name }}-internal readOnly: false @@ -136,6 +140,11 @@ spec: mountPath: /etc/tls/db readOnly: true {{ end }} + {{ if .ConfigMapExists }} + - name: kserve-logger-tls-certs + mountPath: /etc/tls/kserve + readOnly: true + {{ end }} - resources: limits: cpu: 100m @@ -229,3 +238,8 @@ spec: secretName: {{ .Instance.Name }}-db-tls defaultMode: 420 {{ end }} + {{ if .ConfigMapExists }} + - name: kserve-logger-tls-certs + configMap: + name: {{ .ConfigMapName }} + {{ end }} diff --git a/controllers/trustyaiservice_controller.go b/controllers/trustyaiservice_controller.go index edb60f21b..166a202fc 100644 --- a/controllers/trustyaiservice_controller.go +++ b/controllers/trustyaiservice_controller.go @@ -127,6 +127,22 @@ func (r *TrustyAIServiceReconciler) Reconcile(ctx context.Context, req ctrl.Requ return RequeueWithError(err) } + // Retrieve the KServe logger configuration and create the ConfigMap if needed + loggerConfig, err := r.getKServeLoggerConfig(ctx) + if err != nil { + // If the configuration is not present, simply log it + log.FromContext(ctx).Error(err, "Could not read KServe logger configuration") + } + + // If the KServe Logger CA bundle is defined, and correctly specified, create it + if loggerConfig != nil && (loggerConfig.CaBundle != nil || loggerConfig.CaCertFile != nil) { + err := r.createOrUpdateLoggerCaConfigMap(ctx, instance.Namespace, loggerConfig) + if err != nil { + log.FromContext(ctx).Error(err, "Could not create or update KServe logger TLS ConfigMap") + return RequeueWithError(err) + } + } + // CR found, add or update the URL // Call the function to patch environment variables for Deployments that match the label shouldContinue, err := r.handleInferenceServices(ctx, instance, req.Namespace, modelMeshLabelKey, modelMeshLabelValue, payloadProcessorName, req.Name, false) From fc2430b1d8d2d81468ccf804074b2eef1224886a Mon Sep 17 00:00:00 2001 From: Rui Vieira Date: Mon, 7 Oct 2024 14:26:55 +0100 Subject: [PATCH 2/2] Update controller-gen and add missing field on --- Makefile | 2 +- api/v1alpha1/zz_generated.deepcopy.go | 1 - controllers/deployment_test.go | 3 ++- controllers/statuses_test.go | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 605395f15..43617c2db 100644 --- a/Makefile +++ b/Makefile @@ -182,7 +182,7 @@ ENVTEST ?= $(LOCALBIN)/setup-envtest ## Tool Versions KUSTOMIZE_VERSION ?= v3.8.7 -CONTROLLER_TOOLS_VERSION ?= v0.11.1 +CONTROLLER_TOOLS_VERSION ?= v0.14.0 KUSTOMIZE_INSTALL_SCRIPT ?= "https://raw.githubusercontent.com/kubernetes-sigs/kustomize/master/hack/install_kustomize.sh" .PHONY: kustomize diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index 1ddafeca0..2c7b65d0a 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -1,5 +1,4 @@ //go:build !ignore_autogenerated -// +build !ignore_autogenerated /* Copyright 2023. diff --git a/controllers/deployment_test.go b/controllers/deployment_test.go index 0aa0f9250..59515d425 100644 --- a/controllers/deployment_test.go +++ b/controllers/deployment_test.go @@ -7,6 +7,7 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + trustyaiopendatahubiov1alpha1 "github.com/trustyai-explainability/trustyai-service-operator/api/v1alpha1" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" @@ -274,7 +275,7 @@ func setupAndTestDeploymentInferenceService(instance *trustyaiopendatahubiov1alp return k8sClient.Create(ctx, inferenceService) }, "failed to create deployment") - Expect(reconciler.patchKServe(ctx, instance, *inferenceService, namespace, instance.Name, false)).ToNot(HaveOccurred()) + Expect(reconciler.patchKServe(ctx, instance, *inferenceService, namespace, instance.Name, false, false)).ToNot(HaveOccurred()) deployment := &appsv1.Deployment{} WaitFor(func() error { diff --git a/controllers/statuses_test.go b/controllers/statuses_test.go index f6ad7fe34..c8b24381a 100644 --- a/controllers/statuses_test.go +++ b/controllers/statuses_test.go @@ -323,7 +323,7 @@ var _ = Describe("Status and condition tests", func() { return k8sClient.Create(ctx, inferenceService) }, "failed to create InferenceService") - Expect(reconciler.patchKServe(ctx, instance, *inferenceService, namespace, instance.Name, false)).ToNot(HaveOccurred()) + Expect(reconciler.patchKServe(ctx, instance, *inferenceService, namespace, instance.Name, false, false)).ToNot(HaveOccurred()) WaitFor(func() error { return k8sClient.Create(ctx, instance)