Skip to content

Commit 0106b16

Browse files
authored
Perform migrations via Job (#1086)
Originating issues: [IBMPrivateCloud/roadmap#67379](https://github.ibm.com/IBMPrivateCloud/roadmap/issues/67379), [IBMPrivateCloud/roadmap#67424](https://github.ibm.com/IBMPrivateCloud/roadmap/issues/67424) In order to work around limitations imposed by shipping OLM bundles, migration behaviors previously handled via a goroutine in the Authentication controller are now instead by a dedicated Job. Additionally, the check performed for whether to use the `idauth` Route for SAML flows has also been broken out into a Job that is run in the event that the `MASTER_PATH` variable has not been set in the `platform-auth-idp` ConfigMap. --------- Signed-off-by: Rob Hundley <[email protected]>
1 parent 0a55ef1 commit 0106b16

16 files changed

+1256
-578
lines changed

api/operator/v1alpha1/authentication_types.go

Lines changed: 31 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ package v1alpha1
1818

1919
import (
2020
"context"
21-
"fmt"
2221
"reflect"
2322
"sync"
2423

@@ -220,15 +219,8 @@ const ReasonMigrationsInProgress string = "InProgress"
220219
const ReasonMigrationsDone string = "Done"
221220
const ReasonMigrationFailure string = "Failed"
222221

223-
func NewMigrationCompleteCondition() *metav1.Condition {
224-
return &metav1.Condition{
225-
Type: ConditionMigrated,
226-
Status: metav1.ConditionTrue,
227-
Reason: ReasonMigrationComplete,
228-
Message: MessageMigrationSuccess,
229-
}
230-
}
231-
222+
// Creates a new ConditionMigrationsRunning condition for when the migration Job
223+
// is still running.
232224
func NewMigrationInProgressCondition() *metav1.Condition {
233225
return &metav1.Condition{
234226
Type: ConditionMigrationsRunning,
@@ -238,6 +230,8 @@ func NewMigrationInProgressCondition() *metav1.Condition {
238230
}
239231
}
240232

233+
// Creates a new ConditionMigrationsRunning condition for when the migration Job
234+
// has stopped running.
241235
func NewMigrationFinishedCondition() *metav1.Condition {
242236
return &metav1.Condition{
243237
Type: ConditionMigrationsRunning,
@@ -247,8 +241,33 @@ func NewMigrationFinishedCondition() *metav1.Condition {
247241
}
248242
}
249243

250-
func NewMigrationFailureCondition(name string) *metav1.Condition {
251-
message := fmt.Sprintf("Migration %q failed; review the IM Operator \"migration_worker\" logs for more information", name)
244+
// Creates a new ConditionMigrated condition for when the migration Job has
245+
// succeeded.
246+
func NewMigrationCompleteCondition() *metav1.Condition {
247+
return &metav1.Condition{
248+
Type: ConditionMigrated,
249+
Status: metav1.ConditionTrue,
250+
Reason: ReasonMigrationComplete,
251+
Message: MessageMigrationSuccess,
252+
}
253+
}
254+
255+
// Creates a new ConditionMigrated condition for when the migration Job has yet
256+
// to run at all; should not be set unless the previous status is
257+
// `metav1.ConditionTrue`.
258+
func NewMigrationYetToBeCompleteCondition() *metav1.Condition {
259+
message := "The \"ibm-im-db-migration\" Job is not yet complete"
260+
return &metav1.Condition{
261+
Type: ConditionMigrated,
262+
Status: metav1.ConditionFalse,
263+
Reason: ReasonMigrationsInProgress,
264+
Message: message,
265+
}
266+
}
267+
268+
// Creates a new ConditionMigrated condition for when the migration Job fails.
269+
func NewMigrationFailureCondition() *metav1.Condition {
270+
message := "Migration failed; review the \"ibm-im-db-migrator\" Job logs for more information"
252271
return &metav1.Condition{
253272
Type: ConditionMigrated,
254273
Status: metav1.ConditionFalse,

bundle/manifests/ibm-iam-operator.clusterserviceversion.yaml

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,8 @@ metadata:
5757
}
5858
},
5959
"config": {
60+
"auditSecret": "",
61+
"auditUrl": "",
6062
"authUniqueHosts": "internal-ip1 internal-ip2 mycluster.icp",
6163
"bootstrapUserId": "kubeadmin",
6264
"claimsMap": "name=\"givenName\" family_name=\"givenName\" given_name=\"givenName\" preferred_username=\"displayName\" display_name=\"displayName\"",
@@ -79,8 +81,6 @@ metadata:
7981
"openshiftPort": 443,
8082
"preferredLogin": "",
8183
"providerIssuerURL": "",
82-
"auditUrl": "",
83-
"auditSecret": "",
8484
"roksEnabled": true,
8585
"roksURL": "https://roks.domain.name:443",
8686
"roksUserPrefix": "changeme",
@@ -154,7 +154,7 @@ metadata:
154154
categories: Security
155155
certified: "false"
156156
containerImage: icr.io/cpopen/ibm-iam-operator:4.14.0
157-
createdAt: "2025-08-18T18:56:37Z"
157+
createdAt: "2025-08-28T15:02:42Z"
158158
description: The IAM operator provides a simple Kubernetes CRD-Based API to manage the lifecycle of IAM services. With this operator, you can simply deploy and upgrade the IAM services
159159
features.operators.openshift.io/disconnected: "true"
160160
features.operators.openshift.io/fips-compliant: "true"
@@ -315,6 +315,8 @@ spec:
315315
value: icr.io/cpopen/cpfs/icp-identity-manager:4.14.0
316316
- name: IM_INITCONTAINER_IMAGE
317317
value: icr.io/cpopen/cpfs/im-initcontainer:4.14.0
318+
- name: IM_DB_MIGRATOR_IMAGE
319+
value: icr.io/cpopen/cpfs/ibm-im-db-migrator:0.0.1
318320
image: icr.io/cpopen/ibm-iam-operator:4.14.0
319321
imagePullPolicy: IfNotPresent
320322
livenessProbe:

bundle/manifests/operator.ibm.com_authentications.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,10 @@ spec:
172172
properties:
173173
attrMappingFromConfig:
174174
type: boolean
175+
auditSecret:
176+
type: string
177+
auditUrl:
178+
type: string
175179
authUniqueHosts:
176180
type: string
177181
bootstrapUserId:
@@ -228,10 +232,6 @@ spec:
228232
type: string
229233
providerIssuerURL:
230234
type: string
231-
auditUrl:
232-
type: string
233-
auditSecret:
234-
type: string
235235
roksEnabled:
236236
type: boolean
237237
roksURL:

config/manager/overlays/prod/image_env_vars_patch.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,11 @@
1818
value:
1919
name: IM_INITCONTAINER_IMAGE
2020
value: icr.io/cpopen/cpfs/im-initcontainer:4.14.0
21+
- op: add
22+
path: /spec/template/spec/containers/0/env/-
23+
value:
24+
name: IM_DB_MIGRATOR_IMAGE
25+
value: icr.io/cpopen/cpfs/ibm-im-db-migrator:0.0.1
2126
- op: replace
2227
path: /spec/template/spec/containers/0/imagePullPolicy
2328
value: IfNotPresent

internal/controller/operator/authentication_controller.go

Lines changed: 21 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,6 @@ import (
2525

2626
certmgr "github.com/IBM/ibm-iam-operator/internal/api/certmanager/v1"
2727
ctrlcommon "github.com/IBM/ibm-iam-operator/internal/controller/common"
28-
dbconn "github.com/IBM/ibm-iam-operator/internal/database/connectors"
29-
"github.com/IBM/ibm-iam-operator/internal/database/migration"
3028
"github.com/IBM/ibm-iam-operator/internal/version"
3129
routev1 "github.com/openshift/api/route/v1"
3230
appsv1 "k8s.io/api/apps/v1"
@@ -36,9 +34,7 @@ import (
3634
corev1 "k8s.io/api/core/v1"
3735
netv1 "k8s.io/api/networking/v1"
3836
k8sErrors "k8s.io/apimachinery/pkg/api/errors"
39-
"k8s.io/apimachinery/pkg/api/meta"
4037
"k8s.io/apimachinery/pkg/api/resource"
41-
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
4238
"k8s.io/apimachinery/pkg/runtime"
4339
"k8s.io/apimachinery/pkg/types"
4440
"k8s.io/client-go/discovery"
@@ -79,9 +75,6 @@ var memory550 = resource.NewQuantity(550*1024*1024, resource.BinarySI) // 550M
7975
var memory650 = resource.NewQuantity(650*1024*1024, resource.BinarySI) // 650Mi
8076
var memory1024 = resource.NewQuantity(1024*1024*1024, resource.BinarySI) // 1024Mi
8177

82-
// migrationWait is used when still waiting on a result to be produced by the migration worker
83-
var migrationWait time.Duration = 10 * time.Second
84-
8578
// opreqWait is used for the resources that interact with and originate from OperandRequests
8679
var opreqWait time.Duration = 100 * time.Millisecond
8780

@@ -91,29 +84,6 @@ var defaultLowerWait time.Duration = 5 * time.Millisecond
9184
// finalizerName is the finalizer appended to the Authentication CR
9285
var finalizerName = "authentication.operator.ibm.com"
9386

94-
func (r *AuthenticationReconciler) loopUntilConditionsSet(ctx context.Context, req ctrl.Request, conditions ...*metav1.Condition) {
95-
reqLogger := logf.FromContext(ctx)
96-
conditionsSet := false
97-
for !conditionsSet {
98-
authCR := &operatorv1alpha1.Authentication{}
99-
if result, err := r.getLatestAuthentication(ctx, req, authCR); subreconciler.ShouldHaltOrRequeue(result, err) {
100-
reqLogger.Info("Failed to retrieve Authentication CR for status update; retrying")
101-
continue
102-
}
103-
for _, condition := range conditions {
104-
if condition == nil {
105-
continue
106-
}
107-
meta.SetStatusCondition(&authCR.Status.Conditions, *condition)
108-
}
109-
if err := r.Client.Status().Update(ctx, authCR); err != nil {
110-
reqLogger.Error(err, "Failed to set conditions on Authentication; retrying", "conditions", conditions)
111-
continue
112-
}
113-
conditionsSet = true
114-
}
115-
}
116-
11787
func (r *AuthenticationReconciler) getLatestAuthentication(ctx context.Context, req ctrl.Request, authentication *operatorv1alpha1.Authentication) (result *ctrl.Result, err error) {
11888
reqLogger := logf.FromContext(ctx)
11989
if err := r.Get(ctx, req.NamespacedName, authentication); err != nil {
@@ -174,10 +144,7 @@ type AuthenticationReconciler struct {
174144
DiscoveryClient discovery.DiscoveryClient
175145
Mutex sync.Mutex
176146
clusterType ctrlcommon.ClusterType
177-
dbSetupChan chan *migration.Result
178147
needsRollout bool
179-
GetPostgresDB func(client.Client, context.Context, ctrl.Request) (dbconn.DBConn, error)
180-
GetMongoDB func(client.Client, context.Context, ctrl.Request) (dbconn.DBConn, error)
181148
}
182149

183150
// Reconcile is part of the main kubernetes reconciliation loop which aims to
@@ -284,19 +251,6 @@ func (r *AuthenticationReconciler) Reconcile(ctx context.Context, req ctrl.Reque
284251
return subreconciler.Evaluate(subResult, err)
285252
}
286253

287-
// perform any migrations that may be needed before Deployments run
288-
if subResult, err := r.handleMigrations(reconcileCtx, req); subreconciler.ShouldHaltOrRequeue(subResult, err) {
289-
return subreconciler.Evaluate(subResult, err)
290-
}
291-
292-
if subResult, err := r.setMigrationCompleteStatus(reconcileCtx, req); subreconciler.ShouldHaltOrRequeue(subResult, err) {
293-
return subreconciler.Evaluate(subResult, err)
294-
}
295-
296-
if result, err := r.handleMongoDBCleanup(reconcileCtx, req); subreconciler.ShouldHaltOrRequeue(result, err) {
297-
return subreconciler.Evaluate(result, err)
298-
}
299-
300254
reqLogger.Info("Creating ibm-iam-operand-restricted serviceaccount")
301255
currentSA := &corev1.ServiceAccount{}
302256
err = r.createSA(instance, currentSA, &needToRequeue)
@@ -307,6 +261,14 @@ func (r *AuthenticationReconciler) Reconcile(ctx context.Context, req ctrl.Reque
307261
r.createRole(instance)
308262
r.createRoleBinding(instance)
309263

264+
if subResult, err := r.ensureMigrationJobRuns(reconcileCtx, req); subreconciler.ShouldHaltOrRequeue(subResult, err) {
265+
return subreconciler.Evaluate(subResult, err)
266+
}
267+
268+
if subResult, err := r.checkSAMLPresence(reconcileCtx, req); subreconciler.ShouldHaltOrRequeue(subResult, err) {
269+
return subreconciler.Evaluate(subResult, err)
270+
}
271+
310272
// Check if this Certificate already exists and create it if it doesn't
311273
if subResult, err := r.handleCertificates(ctx, req); subreconciler.ShouldHaltOrRequeue(subResult, err) {
312274
return subreconciler.Evaluate(subResult, err)
@@ -319,6 +281,11 @@ func (r *AuthenticationReconciler) Reconcile(ctx context.Context, req ctrl.Reque
319281
return
320282
}
321283

284+
// Check if this Job already exists and create it if it doesn't
285+
if subResult, err := r.ensureOIDCClientRegistrationJobRuns(reconcileCtx, req); subreconciler.ShouldHaltOrRequeue(subResult, err) {
286+
return subreconciler.Evaluate(subResult, err)
287+
}
288+
322289
// Check if this Secret already exists and create it if it doesn't
323290
if subResult, err = r.handleSecrets(ctx, req); subreconciler.ShouldHaltOrRequeue(subResult, err) {
324291
return subreconciler.Evaluate(subResult, err)
@@ -332,12 +299,6 @@ func (r *AuthenticationReconciler) Reconcile(ctx context.Context, req ctrl.Reque
332299
return subreconciler.Evaluate(subResult, err)
333300
}
334301

335-
// Check if this Job already exists and create it if it doesn't
336-
currentJob := &batchv1.Job{}
337-
err = r.handleJob(instance, currentJob, &needToRequeue)
338-
if err != nil {
339-
return
340-
}
341302
// create clusterrole and clusterrolebinding
342303
if subResult, err := r.handleClusterRoles(reconcileCtx, req); subreconciler.ShouldHaltOrRequeue(subResult, err) {
343304
return subreconciler.Evaluate(subResult, err)
@@ -351,6 +312,10 @@ func (r *AuthenticationReconciler) Reconcile(ctx context.Context, req ctrl.Reque
351312
// updates redirecturi annotations to serviceaccount
352313
r.handleServiceAccount(instance, &needToRequeue)
353314

315+
if subResult, err = r.ensureMigrationJobSucceeded(ctx, req); subreconciler.ShouldHaltOrRequeue(subResult, err) {
316+
return subreconciler.Evaluate(subResult, err)
317+
}
318+
354319
if subResult, err := r.handleDeployments(reconcileCtx, req); subreconciler.ShouldHaltOrRequeue(subResult, err) {
355320
return subreconciler.Evaluate(subResult, err)
356321
}
@@ -371,6 +336,10 @@ func (r *AuthenticationReconciler) Reconcile(ctx context.Context, req ctrl.Reque
371336
return subreconciler.Evaluate(subResult, err)
372337
}
373338

339+
if result, err := r.handleMongoDBCleanup(reconcileCtx, req); subreconciler.ShouldHaltOrRequeue(result, err) {
340+
return subreconciler.Evaluate(result, err)
341+
}
342+
374343
return subreconciler.Evaluate(subreconciler.DoNotRequeue())
375344
}
376345

@@ -448,12 +417,6 @@ func (r *AuthenticationReconciler) SetupWithManager(mgr ctrl.Manager) error {
448417
return o.GetLabels()[ctrlcommon.ManagerVersionLabel] == version.Version
449418
})
450419

451-
r.GetPostgresDB = func(c client.Client, ctx context.Context, req ctrl.Request) (d dbconn.DBConn, err error) {
452-
return GetPostgresDB(c, ctx, req)
453-
}
454-
r.GetMongoDB = func(c client.Client, ctx context.Context, req ctrl.Request) (d dbconn.DBConn, err error) {
455-
return GetMongoDB(c, ctx, req)
456-
}
457420
authCtrl.Watches(&operatorv1alpha1.Authentication{}, &handler.EnqueueRequestForObject{}, builder.WithPredicates(bootstrappedPred))
458421
return authCtrl.Named("controller_authentication").
459422
Complete(r)

0 commit comments

Comments
 (0)