Skip to content

Commit edda4fb

Browse files
committed
Add Database and ObjectStorage Health Checks
- Delays deploying DSPA Manifests until DB and ObjStore Connections are healthy - Introduces status condition of "ComponentDeploymentNotFound" for Required Components with missing Deployments
1 parent ddb30e9 commit edda4fb

File tree

7 files changed

+264
-50
lines changed

7 files changed

+264
-50
lines changed

controllers/config/defaults.go

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,8 @@ const (
6969

7070
// DSPA Status Condition Types
7171
const (
72+
DatabaseAvailable = "DatabaseAvailable"
73+
ObjectStoreAvailable = "ObjectStoreAvailable"
7274
APIServerReady = "APIServerReady"
7375
PersistenceAgentReady = "PersistenceAgentReady"
7476
ScheduledWorkflowReady = "ScheduledWorkflowReady"
@@ -81,9 +83,10 @@ const (
8183
// kubectl get output, and in summarizing
8284
// occurrences of causes
8385
const (
84-
MinimumReplicasAvailable = "MinimumReplicasAvailable"
85-
FailingToDeploy = "FailingToDeploy"
86-
Deploying = "Deploying"
86+
MinimumReplicasAvailable = "MinimumReplicasAvailable"
87+
FailingToDeploy = "FailingToDeploy"
88+
Deploying = "Deploying"
89+
ComponentDeploymentNotFound = "ComponentDeploymentNotFound"
8790
)
8891

8992
// Any required Configmap paths can be added here,

controllers/database.go

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,11 @@ package controllers
1717

1818
import (
1919
"context"
20+
"database/sql"
21+
b64 "encoding/base64"
22+
"fmt"
23+
24+
_ "github.com/go-sql-driver/mysql"
2025

2126
dspav1alpha1 "github.com/opendatahub-io/data-science-pipelines-operator/api/v1alpha1"
2227
)
@@ -31,6 +36,48 @@ var dbTemplates = []string{
3136
dbSecret,
3237
}
3338

39+
func (r *DSPAReconciler) VerifyMySQLDBConnection(host, port, username, password, dbname string) bool {
40+
connectionString := fmt.Sprintf("%s:%s@tcp(%s:%s)/%s", username, password, host, port, dbname)
41+
db, err := sql.Open("mysql", connectionString)
42+
if err != nil {
43+
return false
44+
}
45+
defer db.Close()
46+
47+
testStatement := "SELECT 1;"
48+
_, err = db.Exec(testStatement)
49+
return err == nil
50+
}
51+
52+
func (r *DSPAReconciler) isDatabaseAccessible(ctx context.Context, dsp *dspav1alpha1.DataSciencePipelinesApplication,
53+
params *DSPAParams) bool {
54+
log := r.Log.WithValues("namespace", dsp.Namespace).WithValues("dspa_name", dsp.Name)
55+
56+
log.Info("Performing Database Health Check")
57+
databaseSpecified := dsp.Spec.Database != nil
58+
usingExternalDB := params.UsingExternalDB(dsp)
59+
usingMariaDB := !databaseSpecified || dsp.Spec.Database.MariaDB != nil
60+
if usingMariaDB || usingExternalDB {
61+
decodePass, _ := b64.StdEncoding.DecodeString(params.DBConnection.Password)
62+
db_connect := r.VerifyMySQLDBConnection(params.DBConnection.Host,
63+
params.DBConnection.Port,
64+
params.DBConnection.Username,
65+
string(decodePass),
66+
params.DBConnection.DBName)
67+
if db_connect {
68+
log.Info("Database Health Check Successful")
69+
} else {
70+
log.Info("Unable to connect to Database")
71+
}
72+
return db_connect
73+
74+
}
75+
76+
log.Info(fmt.Sprintf("Could not connect to Database: Unsupported Type"))
77+
// Only MariaDB and Mysql-Compliant Database supported.
78+
return false
79+
}
80+
3481
func (r *DSPAReconciler) ReconcileDatabase(ctx context.Context, dsp *dspav1alpha1.DataSciencePipelinesApplication,
3582
params *DSPAParams) error {
3683

controllers/dspipeline_controller.go

Lines changed: 72 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ package controllers
1919
import (
2020
"context"
2121
"fmt"
22+
2223
"github.com/go-logr/logr"
2324
mf "github.com/manifestival/manifestival"
2425
dspav1alpha1 "github.com/opendatahub-io/data-science-pipelines-operator/api/v1alpha1"
@@ -217,34 +218,42 @@ func (r *DSPAReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.
217218
return ctrl.Result{}, err
218219
}
219220

220-
err = r.ReconcileCommon(dspa, params)
221-
if err != nil {
222-
return ctrl.Result{}, err
223-
}
221+
// Get Prereq Status (DB and ObjStore Ready)
222+
dbAvailable := r.isDatabaseAccessible(ctx, dspa, params)
223+
objStoreAvailable := r.isObjectStorageAccessible(ctx, dspa, params)
224+
dspa_prereqs_ready := (dbAvailable && objStoreAvailable)
224225

225-
err = r.ReconcileAPIServer(ctx, dspa, params)
226-
if err != nil {
227-
return ctrl.Result{}, err
228-
}
226+
if dspa_prereqs_ready {
227+
// Manage Common Manifests
228+
err = r.ReconcileCommon(dspa, params)
229+
if err != nil {
230+
return ctrl.Result{}, err
231+
}
229232

230-
err = r.ReconcilePersistenceAgent(dspa, params)
231-
if err != nil {
232-
return ctrl.Result{}, err
233-
}
233+
err = r.ReconcileAPIServer(ctx, dspa, params)
234+
if err != nil {
235+
return ctrl.Result{}, err
236+
}
234237

235-
err = r.ReconcileScheduledWorkflow(dspa, params)
236-
if err != nil {
237-
return ctrl.Result{}, err
238-
}
238+
err = r.ReconcilePersistenceAgent(dspa, params)
239+
if err != nil {
240+
return ctrl.Result{}, err
241+
}
239242

240-
err = r.ReconcileUI(dspa, params)
241-
if err != nil {
242-
return ctrl.Result{}, err
243-
}
243+
err = r.ReconcileScheduledWorkflow(dspa, params)
244+
if err != nil {
245+
return ctrl.Result{}, err
246+
}
244247

245-
err = r.ReconcileMLMD(dspa, params)
246-
if err != nil {
247-
return ctrl.Result{}, err
248+
err = r.ReconcileUI(dspa, params)
249+
if err != nil {
250+
return ctrl.Result{}, err
251+
}
252+
253+
err = r.ReconcileMLMD(dspa, params)
254+
if err != nil {
255+
return ctrl.Result{}, err
256+
}
248257
}
249258

250259
log.Info("Updating CR status")
@@ -255,7 +264,7 @@ func (r *DSPAReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.
255264
return ctrl.Result{}, err
256265
}
257266

258-
conditions, err := r.GenerateStatus(ctx, dspa)
267+
conditions, err := r.GenerateStatus(ctx, dspa, params)
259268
if err != nil {
260269
log.Info(err.Error())
261270
return ctrl.Result{}, err
@@ -271,24 +280,20 @@ func (r *DSPAReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.
271280

272281
r.PublishMetrics(
273282
dspa,
283+
util.GetConditionByType(config.DatabaseAvailable, conditions),
284+
util.GetConditionByType(config.ObjectStoreAvailable, conditions),
274285
util.GetConditionByType(config.APIServerReady, conditions),
275286
util.GetConditionByType(config.PersistenceAgentReady, conditions),
276287
util.GetConditionByType(config.ScheduledWorkflowReady, conditions),
277288
util.GetConditionByType(config.CrReady, conditions),
278289
)
279-
280290
return ctrl.Result{}, nil
281291
}
282292

283293
// handleReadyCondition evaluates if condition with "name" is in condition of type "conditionType".
284294
// this procedure is valid only for conditions with bool status type, for conditions of non bool type
285295
// results are undefined.
286-
func (r *DSPAReconciler) handleReadyCondition(
287-
ctx context.Context,
288-
dspa *dspav1alpha1.DataSciencePipelinesApplication,
289-
name string,
290-
condition string,
291-
) (metav1.Condition, error) {
296+
func (r *DSPAReconciler) handleReadyCondition(ctx context.Context, dspa *dspav1alpha1.DataSciencePipelinesApplication, name string, condition string) (metav1.Condition, error) {
292297
readyCondition := r.buildCondition(condition, dspa, config.MinimumReplicasAvailable)
293298
deployment := &appsv1.Deployment{}
294299

@@ -297,7 +302,14 @@ func (r *DSPAReconciler) handleReadyCondition(
297302

298303
err := r.Get(ctx, types.NamespacedName{Name: component, Namespace: dspa.Namespace}, deployment)
299304
if err != nil {
300-
return metav1.Condition{}, err
305+
if apierrs.IsNotFound(err) {
306+
readyCondition.Reason = config.ComponentDeploymentNotFound
307+
readyCondition.Status = metav1.ConditionFalse
308+
readyCondition.Message = fmt.Sprintf("Deployment for component \"%s\" is missing", component)
309+
return readyCondition, nil
310+
} else {
311+
return metav1.Condition{}, err
312+
}
301313
}
302314

303315
// First check if deployment is scaled down, if it is, component is deemed not ready
@@ -397,7 +409,16 @@ func (r *DSPAReconciler) handleReadyCondition(
397409

398410
}
399411

400-
func (r *DSPAReconciler) GenerateStatus(ctx context.Context, dspa *dspav1alpha1.DataSciencePipelinesApplication) ([]metav1.Condition, error) {
412+
func (r *DSPAReconciler) GenerateStatus(ctx context.Context, dspa *dspav1alpha1.DataSciencePipelinesApplication, params *DSPAParams) ([]metav1.Condition, error) {
413+
databaseAvailable := r.buildCondition(config.DatabaseAvailable, dspa, config.DatabaseAvailable)
414+
if r.isDatabaseAccessible(ctx, dspa, params) {
415+
databaseAvailable.Status = metav1.ConditionTrue
416+
}
417+
418+
objStoreAvailable := r.buildCondition(config.ObjectStoreAvailable, dspa, config.ObjectStoreAvailable)
419+
if r.isObjectStorageAccessible(ctx, dspa, params) {
420+
objStoreAvailable.Status = metav1.ConditionTrue
421+
}
401422

402423
apiServerReady, err := r.handleReadyCondition(ctx, dspa, "ds-pipeline", config.APIServerReady)
403424
if err != nil {
@@ -411,7 +432,10 @@ func (r *DSPAReconciler) GenerateStatus(ctx context.Context, dspa *dspav1alpha1.
411432
if err != nil {
412433
return []metav1.Condition{}, err
413434
}
435+
414436
var conditions []metav1.Condition
437+
conditions = append(conditions, databaseAvailable)
438+
conditions = append(conditions, objStoreAvailable)
415439
conditions = append(conditions, apiServerReady)
416440
conditions = append(conditions, persistenceAgentReady)
417441
conditions = append(conditions, scheduledWorkflowReady)
@@ -420,7 +444,7 @@ func (r *DSPAReconciler) GenerateStatus(ctx context.Context, dspa *dspav1alpha1.
420444
crReady := r.buildCondition(config.CrReady, dspa, config.MinimumReplicasAvailable)
421445
crReady.Type = config.CrReady
422446

423-
componentConditions := []metav1.Condition{apiServerReady, persistenceAgentReady, scheduledWorkflowReady}
447+
componentConditions := []metav1.Condition{databaseAvailable, objStoreAvailable, apiServerReady, persistenceAgentReady, scheduledWorkflowReady}
424448
allReady := true
425449
failureMessages := ""
426450
for _, c := range componentConditions {
@@ -449,16 +473,24 @@ func (r *DSPAReconciler) GenerateStatus(ctx context.Context, dspa *dspav1alpha1.
449473
}
450474

451475
func (r *DSPAReconciler) PublishMetrics(dspa *dspav1alpha1.DataSciencePipelinesApplication,
452-
apiServerReady, persistenceAgentReady, scheduledWorkflowReady,
476+
dbAvailable, objStoreAvailable, apiServerReady, persistenceAgentReady, scheduledWorkflowReady,
453477
crReady metav1.Condition) {
454478
log := r.Log.WithValues("namespace", dspa.Namespace).WithValues("dspa_name", dspa.Name)
455479
log.Info("Publishing Ready Metrics")
456-
if apiServerReady.Status == metav1.ConditionTrue {
457-
log.Info("APIServer Ready")
458-
APIServerReadyMetric.WithLabelValues(dspa.Name, dspa.Namespace).Set(1)
480+
if dbAvailable.Status == metav1.ConditionTrue {
481+
log.Info("Database Accessible")
482+
DBAvailableMetric.WithLabelValues(dspa.Name, dspa.Namespace).Set(1)
483+
} else {
484+
log.Info("Database Not Yet Accessible")
485+
DBAvailableMetric.WithLabelValues(dspa.Name, dspa.Namespace).Set(0)
486+
}
487+
488+
if objStoreAvailable.Status == metav1.ConditionTrue {
489+
log.Info("Object Store Accessible")
490+
ObjectStoreAvailableMetric.WithLabelValues(dspa.Name, dspa.Namespace).Set(1)
459491
} else {
460-
log.Info("APIServer Not Ready")
461-
APIServerReadyMetric.WithLabelValues(dspa.Name, dspa.Namespace).Set(0)
492+
log.Info("Object Store Not Yet Accessible")
493+
ObjectStoreAvailableMetric.WithLabelValues(dspa.Name, dspa.Namespace).Set(0)
462494
}
463495

464496
if persistenceAgentReady.Status == metav1.ConditionTrue {

controllers/metrics.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,26 @@ import (
2323

2424
// Prometheus metrics gauges
2525
var (
26+
DBAvailableMetric = prometheus.NewGaugeVec(
27+
prometheus.GaugeOpts{
28+
Name: "data_science_pipelines_application_database_available",
29+
Help: "Data Science Pipelines Application - Database Availability Status",
30+
},
31+
[]string{
32+
"dspa_name",
33+
"dspa_namespace",
34+
},
35+
)
36+
ObjectStoreAvailableMetric = prometheus.NewGaugeVec(
37+
prometheus.GaugeOpts{
38+
Name: "data_science_pipelines_application_object_store_available",
39+
Help: "Data Science Pipelines Application - Object Store Availability Status",
40+
},
41+
[]string{
42+
"dspa_name",
43+
"dspa_namespace",
44+
},
45+
)
2646
APIServerReadyMetric = prometheus.NewGaugeVec(
2747
prometheus.GaugeOpts{
2848
Name: "data_science_pipelines_application_apiserver_ready",

controllers/storage.go

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,12 @@ package controllers
1818

1919
import (
2020
"context"
21+
"encoding/base64"
22+
"fmt"
23+
"net/http"
2124

25+
minio "github.com/minio/minio-go/v7"
26+
"github.com/minio/minio-go/v7/pkg/credentials"
2227
dspav1alpha1 "github.com/opendatahub-io/data-science-pipelines-operator/api/v1alpha1"
2328
)
2429

@@ -31,6 +36,69 @@ var storageTemplates = []string{
3136
storageSecret,
3237
}
3338

39+
func joinHostPort(host, port string) string {
40+
if port == "" {
41+
return host
42+
}
43+
return fmt.Sprintf("%s:%s", host, port)
44+
}
45+
46+
func createCredentialProvidersChain(endpoint, accessKey, secretKey string) *credentials.Credentials {
47+
// first try with static api key
48+
if accessKey != "" && secretKey != "" {
49+
return credentials.NewStaticV4(accessKey, secretKey, "")
50+
}
51+
// otherwise use a chained provider: minioEnv -> awsEnv -> IAM
52+
providers := []credentials.Provider{
53+
&credentials.EnvMinio{},
54+
&credentials.EnvAWS{},
55+
&credentials.IAM{
56+
Client: &http.Client{
57+
Transport: http.DefaultTransport,
58+
},
59+
},
60+
}
61+
return credentials.New(&credentials.Chain{Providers: providers})
62+
}
63+
64+
func (r *DSPAReconciler) isObjectStorageAccessible(ctx context.Context, dsp *dspav1alpha1.DataSciencePipelinesApplication,
65+
params *DSPAParams) bool {
66+
log := r.Log.WithValues("namespace", dsp.Namespace).WithValues("dspa_name", dsp.Name)
67+
log.Info("Performing Object Storage Health Check")
68+
69+
endpoint := joinHostPort(params.ObjectStorageConnection.Host, params.ObjectStorageConnection.Port)
70+
accesskey, err := base64.StdEncoding.DecodeString(params.ObjectStorageConnection.AccessKeyID)
71+
if err != nil {
72+
log.Error(err, "Could not decode Object Storage Access Key ID")
73+
return false
74+
}
75+
76+
secretkey, err := base64.StdEncoding.DecodeString(params.ObjectStorageConnection.SecretAccessKey)
77+
if err != nil {
78+
log.Error(err, "Could not decode Object Storage Secret Access Key")
79+
return false
80+
}
81+
82+
cred := createCredentialProvidersChain(endpoint, string(accesskey), string(secretkey))
83+
minioClient, err := minio.New(endpoint, &minio.Options{
84+
Creds: cred,
85+
Secure: params.ObjectStorageConnection.Secure,
86+
})
87+
if err != nil {
88+
log.Info(fmt.Sprintf("Could not connect to object storage endpoint: %s", endpoint))
89+
return false
90+
}
91+
92+
_, err = minioClient.ListBuckets(ctx)
93+
if err != nil {
94+
log.Info(fmt.Sprintf("Could not perform ListBuckets health check on object storage endpoint: %s", endpoint))
95+
return false
96+
}
97+
98+
log.Info("Object Storage Health Check Successful")
99+
return true
100+
}
101+
34102
// ReconcileStorage will set up Storage Connection.
35103
func (r *DSPAReconciler) ReconcileStorage(ctx context.Context, dsp *dspav1alpha1.DataSciencePipelinesApplication,
36104
params *DSPAParams) error {

0 commit comments

Comments
 (0)