44 "context"
55 "crypto/sha256"
66 "encoding/hex"
7+ "fmt"
78 "regexp"
89 "sort"
910 "strings"
@@ -12,6 +13,7 @@ import (
1213 "github.com/trustyai-explainability/trustyai-service-operator/controllers/constants"
1314 "k8s.io/apimachinery/pkg/api/errors"
1415 "k8s.io/apimachinery/pkg/types"
16+ "sigs.k8s.io/controller-runtime/pkg/client"
1517 "sigs.k8s.io/controller-runtime/pkg/log"
1618
1719 corev1 "k8s.io/api/core/v1"
@@ -752,3 +754,256 @@ func (r *EvalHubReconciler) createJobsServiceAccount(ctx context.Context, instan
752754
753755 return nil
754756}
757+
758+ // reconcileTenantNamespaces discovers namespaces with the tenant annotation and
759+ // provisions per-tenant RBAC (SA + RoleBindings) so the API SA can create jobs
760+ // in tenant namespaces. It also cleans up resources in namespaces that lost the
761+ // annotation.
762+ func (r * EvalHubReconciler ) reconcileTenantNamespaces (ctx context.Context , instance * evalhubv1alpha1.EvalHub ) error {
763+ log := log .FromContext (ctx )
764+
765+ // List all namespaces
766+ nsList := & corev1.NamespaceList {}
767+ if err := r .List (ctx , nsList ); err != nil {
768+ return fmt .Errorf ("listing namespaces: %w" , err )
769+ }
770+
771+ // Build set of annotated tenant namespaces (excluding control-plane)
772+ tenantNS := make (map [string ]bool )
773+ for _ , ns := range nsList .Items {
774+ if ns .Name == instance .Namespace {
775+ continue
776+ }
777+ if _ , ok := ns .Annotations [tenantAnnotation ]; ok {
778+ tenantNS [ns .Name ] = true
779+ }
780+ }
781+
782+ // Reconcile each tenant namespace
783+ for ns := range tenantNS {
784+ if err := r .reconcileTenantNamespace (ctx , instance , ns ); err != nil {
785+ log .Error (err , "Failed to reconcile tenant namespace" , "namespace" , ns )
786+ return fmt .Errorf ("reconciling tenant namespace %s: %w" , ns , err )
787+ }
788+ }
789+
790+ // Cleanup: find managed resources in namespaces that no longer have the annotation
791+ managedLabel := client.MatchingLabels {tenantLabel : instance .Name }
792+
793+ // Cleanup stale ServiceAccounts
794+ saList := & corev1.ServiceAccountList {}
795+ if err := r .List (ctx , saList , managedLabel ); err != nil {
796+ return fmt .Errorf ("listing managed service accounts: %w" , err )
797+ }
798+ for i := range saList .Items {
799+ sa := & saList .Items [i ]
800+ if ! tenantNS [sa .Namespace ] && sa .Namespace != instance .Namespace {
801+ log .Info ("Cleaning up stale tenant SA" , "namespace" , sa .Namespace , "name" , sa .Name )
802+ if err := r .Delete (ctx , sa ); err != nil && ! errors .IsNotFound (err ) {
803+ return fmt .Errorf ("deleting stale SA %s/%s: %w" , sa .Namespace , sa .Name , err )
804+ }
805+ }
806+ }
807+
808+ // Cleanup stale RoleBindings
809+ rbList := & rbacv1.RoleBindingList {}
810+ if err := r .List (ctx , rbList , managedLabel ); err != nil {
811+ return fmt .Errorf ("listing managed role bindings: %w" , err )
812+ }
813+ for i := range rbList .Items {
814+ rb := & rbList .Items [i ]
815+ if ! tenantNS [rb .Namespace ] && rb .Namespace != instance .Namespace {
816+ log .Info ("Cleaning up stale tenant RoleBinding" , "namespace" , rb .Namespace , "name" , rb .Name )
817+ if err := r .Delete (ctx , rb ); err != nil && ! errors .IsNotFound (err ) {
818+ return fmt .Errorf ("deleting stale RoleBinding %s/%s: %w" , rb .Namespace , rb .Name , err )
819+ }
820+ }
821+ }
822+
823+ return nil
824+ }
825+
826+ // reconcileTenantNamespace creates per-tenant RBAC resources in the given namespace.
827+ // All resources are labelled with tenantLabel for cleanup (no owner refs, since
828+ // cross-namespace owner references are forbidden).
829+ func (r * EvalHubReconciler ) reconcileTenantNamespace (ctx context.Context , instance * evalhubv1alpha1.EvalHub , namespace string ) error {
830+ log := log .FromContext (ctx )
831+ log .Info ("Reconciling tenant namespace RBAC" , "namespace" , namespace )
832+
833+ apiSAName := generateServiceAccountName (instance )
834+ jobsSAName := generateJobsServiceAccountName (instance )
835+
836+ managedLabels := map [string ]string {
837+ tenantLabel : instance .Name ,
838+ "app" : "eval-hub" ,
839+ "app.kubernetes.io/instance" : instance .Name ,
840+ "app.kubernetes.io/part-of" : "eval-hub" ,
841+ }
842+
843+ managedAnnotations := map [string ]string {
844+ tenantOwnerAnnotation : "eval-hub" ,
845+ }
846+
847+ // 1. Create jobs SA in the tenant namespace
848+ if err := r .ensureTenantServiceAccount (ctx , jobsSAName , namespace , managedLabels , managedAnnotations ); err != nil {
849+ return err
850+ }
851+
852+ // 2. RoleBinding: API SA → jobs-writer ClusterRole (create/delete jobs in tenant ns)
853+ if err := r .ensureTenantRoleBinding (ctx , instance .Name + "-tenant-jobs-writer" , namespace , managedLabels , managedAnnotations ,
854+ []rbacv1.Subject {{
855+ Kind : "ServiceAccount" ,
856+ Name : apiSAName ,
857+ Namespace : instance .Namespace ,
858+ }},
859+ rbacv1.RoleRef {Kind : "ClusterRole" , Name : jobsWriterClusterRoleName , APIGroup : rbacv1 .GroupName },
860+ ); err != nil {
861+ return err
862+ }
863+
864+ // 3. RoleBinding: API SA → job-config ClusterRole (create/get/list configmaps in tenant ns)
865+ if err := r .ensureTenantRoleBinding (ctx , instance .Name + "-tenant-job-config" , namespace , managedLabels , managedAnnotations ,
866+ []rbacv1.Subject {{
867+ Kind : "ServiceAccount" ,
868+ Name : apiSAName ,
869+ Namespace : instance .Namespace ,
870+ }},
871+ rbacv1.RoleRef {Kind : "ClusterRole" , Name : jobConfigClusterRoleName , APIGroup : rbacv1 .GroupName },
872+ ); err != nil {
873+ return err
874+ }
875+
876+ // 4. RoleBinding: API SA + Jobs SA (tenant) → mlflow-access ClusterRole
877+ if err := r .ensureTenantRoleBinding (ctx , instance .Name + "-tenant-mlflow" , namespace , managedLabels , managedAnnotations ,
878+ []rbacv1.Subject {
879+ {
880+ Kind : "ServiceAccount" ,
881+ Name : apiSAName ,
882+ Namespace : instance .Namespace ,
883+ },
884+ {
885+ Kind : "ServiceAccount" ,
886+ Name : jobsSAName ,
887+ Namespace : namespace ,
888+ },
889+ },
890+ rbacv1.RoleRef {Kind : "ClusterRole" , Name : mlflowAccessClusterRoleName , APIGroup : rbacv1 .GroupName },
891+ ); err != nil {
892+ return err
893+ }
894+
895+ return nil
896+ }
897+
898+ // ensureTenantServiceAccount creates a ServiceAccount in the given namespace if it
899+ // does not exist. No owner reference is set (cross-namespace not allowed).
900+ func (r * EvalHubReconciler ) ensureTenantServiceAccount (ctx context.Context , name , namespace string , labels map [string ]string , annotations map [string ]string ) error {
901+ sa := & corev1.ServiceAccount {}
902+ err := r .Get (ctx , types.NamespacedName {Name : name , Namespace : namespace }, sa )
903+ if err == nil {
904+ return nil // already exists
905+ }
906+ if ! errors .IsNotFound (err ) {
907+ return err
908+ }
909+
910+ log .FromContext (ctx ).Info ("Creating tenant SA" , "namespace" , namespace , "name" , name )
911+ sa = & corev1.ServiceAccount {
912+ ObjectMeta : metav1.ObjectMeta {
913+ Name : name ,
914+ Namespace : namespace ,
915+ Labels : labels ,
916+ Annotations : annotations ,
917+ },
918+ }
919+ return r .Create (ctx , sa )
920+ }
921+
922+ // ensureTenantRoleBinding creates or updates a RoleBinding in the given namespace.
923+ // No owner reference is set (cross-namespace not allowed).
924+ func (r * EvalHubReconciler ) ensureTenantRoleBinding (ctx context.Context , name , namespace string , labels map [string ]string , annotations map [string ]string , subjects []rbacv1.Subject , roleRef rbacv1.RoleRef ) error {
925+ log := log .FromContext (ctx )
926+
927+ desired := & rbacv1.RoleBinding {
928+ ObjectMeta : metav1.ObjectMeta {
929+ Name : name ,
930+ Namespace : namespace ,
931+ Labels : labels ,
932+ Annotations : annotations ,
933+ },
934+ Subjects : subjects ,
935+ RoleRef : roleRef ,
936+ }
937+
938+ found := & rbacv1.RoleBinding {}
939+ err := r .Get (ctx , types.NamespacedName {Name : name , Namespace : namespace }, found )
940+ if err != nil && errors .IsNotFound (err ) {
941+ log .Info ("Creating tenant RoleBinding" , "namespace" , namespace , "name" , name )
942+ return r .Create (ctx , desired )
943+ } else if err != nil {
944+ return err
945+ }
946+
947+ // Update if subjects or roleRef changed
948+ subjectsEqual := equalRoleBindingSubjects (found .Subjects , desired .Subjects )
949+ roleRefEqual := equalRoleBindingRoleRef (found .RoleRef , desired .RoleRef )
950+
951+ if ! subjectsEqual || ! roleRefEqual {
952+ if roleRefEqual && ! subjectsEqual {
953+ found .Subjects = desired .Subjects
954+ log .Info ("Updating tenant RoleBinding subjects" , "name" , name )
955+ return r .Update (ctx , found )
956+ }
957+ // RoleRef is immutable; delete and recreate
958+ log .Info ("RoleRef differs, recreating tenant RoleBinding" , "name" , name )
959+ if err := r .Delete (ctx , found ); err != nil {
960+ return err
961+ }
962+ return r .Create (ctx , desired )
963+ }
964+
965+ return nil
966+ }
967+
968+ // cleanupTenantResources removes all tenant-namespace resources managed by this
969+ // EvalHub instance (identified by tenantLabel). Called during EvalHub deletion.
970+ func (r * EvalHubReconciler ) cleanupTenantResources (ctx context.Context , instance * evalhubv1alpha1.EvalHub ) error {
971+ log := log .FromContext (ctx )
972+ log .Info ("Cleaning up tenant resources" , "instance" , instance .Name )
973+
974+ managedLabel := client.MatchingLabels {tenantLabel : instance .Name }
975+
976+ // Delete managed RoleBindings across all namespaces
977+ rbList := & rbacv1.RoleBindingList {}
978+ if err := r .List (ctx , rbList , managedLabel ); err != nil {
979+ return fmt .Errorf ("listing managed RoleBindings for cleanup: %w" , err )
980+ }
981+ for i := range rbList .Items {
982+ rb := & rbList .Items [i ]
983+ if rb .Namespace == instance .Namespace {
984+ continue // control-plane resources cleaned by owner-ref GC
985+ }
986+ log .Info ("Deleting tenant RoleBinding" , "namespace" , rb .Namespace , "name" , rb .Name )
987+ if err := r .Delete (ctx , rb ); err != nil && ! errors .IsNotFound (err ) {
988+ return fmt .Errorf ("deleting tenant RoleBinding %s/%s: %w" , rb .Namespace , rb .Name , err )
989+ }
990+ }
991+
992+ // Delete managed ServiceAccounts across all namespaces
993+ saList := & corev1.ServiceAccountList {}
994+ if err := r .List (ctx , saList , managedLabel ); err != nil {
995+ return fmt .Errorf ("listing managed SAs for cleanup: %w" , err )
996+ }
997+ for i := range saList .Items {
998+ sa := & saList .Items [i ]
999+ if sa .Namespace == instance .Namespace {
1000+ continue
1001+ }
1002+ log .Info ("Deleting tenant SA" , "namespace" , sa .Namespace , "name" , sa .Name )
1003+ if err := r .Delete (ctx , sa ); err != nil && ! errors .IsNotFound (err ) {
1004+ return fmt .Errorf ("deleting tenant SA %s/%s: %w" , sa .Namespace , sa .Name , err )
1005+ }
1006+ }
1007+
1008+ return nil
1009+ }
0 commit comments