Skip to content

Commit 5a21456

Browse files
authored
Merge pull request #13 from ucloud/feature/restore
Feature/restore
2 parents e77e6f9 + c18e005 commit 5a21456

File tree

17 files changed

+383
-96
lines changed

17 files changed

+383
-96
lines changed

hack/docker/redis-tools/redis-tools.sh

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ REDIS_BUCKET=${REDIS_BUCKET:-}
2929
REDIS_FOLDER=${REDIS_FOLDER:-}
3030
REDIS_SNAPSHOT=${REDIS_SNAPSHOT:-}
3131
REDIS_DATA_DIR=${REDIS_DATA_DIR:-/data}
32+
REDIS_RESTORE_SUCCEEDED=${REDIS_RESTORE_SUCCEEDED:-0}
3233
OSM_CONFIG_FILE=/etc/osm/config
3334
ENABLE_ANALYTICS=${ENABLE_ANALYTICS:-false}
3435

@@ -87,10 +88,10 @@ fi
8788

8889
# Wait for redis to start
8990
# ref: http://unix.stackexchange.com/a/5279
90-
while ! nc -q 1 $REDIS_HOST $REDIS_PORT </dev/null; do
91-
echo "Waiting... database is not ready yet"
92-
sleep 5
93-
done
91+
#while ! nc -q 1 "${REDIS_HOST}" "${REDIS_PORT}" </dev/null; do
92+
# echo "Waiting... database is not ready yet"
93+
# sleep 5
94+
#done
9495

9596
# cleanup data dump dir
9697
mkdir -p "$REDIS_DATA_DIR"
@@ -100,15 +101,21 @@ rm -rf *
100101
case "$op" in
101102
backup)
102103
echo "Dumping database......"
103-
redis-cli --rdb dump.rdb -h ${REDIS_HOST} -a "${REDIS_PASSWORD}"
104-
redis-cli -h ${REDIS_HOST} -a "${REDIS_PASSWORD}" CLUSTER NODES | grep myself > nodes.conf
104+
redis-cli --rdb dump.rdb -h "${REDIS_HOST}" -a "${REDIS_PASSWORD}"
105+
redis-cli -h "${REDIS_HOST}" -a "${REDIS_PASSWORD}" CLUSTER NODES | grep myself > nodes.conf
105106
echo "Uploading dump file to the backend......."
106107
osm push --enable-analytics="$ENABLE_ANALYTICS" --osmconfig="$OSM_CONFIG_FILE" -c "$REDIS_BUCKET" "$REDIS_DATA_DIR" "$REDIS_FOLDER/$REDIS_SNAPSHOT"
107108

108109
echo "Backup successful"
109110
;;
110111
restore)
111112
echo "Pulling backup file from the backend"
113+
if [ "${REDIS_RESTORE_SUCCEEDED}" == "1" ];then
114+
echo "Has been restored successfully"
115+
exit 0
116+
fi
117+
index=$(echo "${POD_NAME}" | awk -F- '{print $NF}')
118+
REDIS_SNAPSHOT=${REDIS_SNAPSHOT}-${index}
112119
osm pull --enable-analytics="$ENABLE_ANALYTICS" --osmconfig="$OSM_CONFIG_FILE" -c "$REDIS_BUCKET" "$REDIS_FOLDER/$REDIS_SNAPSHOT" "$REDIS_DATA_DIR"
113120

114121
echo "Recovery successful"

pkg/apis/redis/v1alpha1/constants.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,4 +74,7 @@ const (
7474

7575
PrometheusExporterPortNumber = 9100
7676
PrometheusExporterTelemetryPath = "/metrics"
77+
78+
BackupDumpDir = "/data"
79+
UtilVolumeName = "util-volume"
7780
)

pkg/apis/redis/v1alpha1/default.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,9 @@ func (in *DistributedRedisCluster) Validate() {
2121
in.Spec.MasterSize = minMasterSize
2222
}
2323

24-
if in.Spec.ClusterReplicas < minClusterReplicas {
25-
in.Spec.ClusterReplicas = minClusterReplicas
26-
}
24+
//if in.Spec.ClusterReplicas < minClusterReplicas {
25+
// in.Spec.ClusterReplicas = minClusterReplicas
26+
//}
2727

2828
if in.Spec.Image == "" {
2929
in.Spec.Image = defaultRedisImage
@@ -98,7 +98,7 @@ func (in *RedisClusterBackup) Validate() error {
9898

9999
func (in *RedisClusterBackup) Location() (string, error) {
100100
spec := in.Spec.Backend
101-
timePrefix := in.Status.StartTime.Format("200601020304")
101+
timePrefix := in.Status.StartTime.Format("20060102150405")
102102
if spec.S3 != nil {
103103
return filepath.Join(spec.S3.Prefix, DatabaseNamePrefix, in.Namespace, in.Spec.RedisClusterName, timePrefix), nil
104104
} else if spec.GCS != nil {
@@ -118,5 +118,5 @@ func (in *RedisClusterBackup) OSMSecretName() string {
118118
}
119119

120120
func (in *RedisClusterBackup) JobName() string {
121-
return fmt.Sprintf("redisbacup-%v", in.Name)
121+
return fmt.Sprintf("redisbackup-%v", in.Name)
122122
}

pkg/apis/redis/v1alpha1/distributedrediscluster_types.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ type DistributedRedisClusterSpec struct {
3030
Resources *corev1.ResourceRequirements `json:"resources,omitempty"`
3131
PasswordSecret *corev1.LocalObjectReference `json:"rootPasswordSecret,omitempty"`
3232
Monitor *AgentSpec `json:"monitor,omitempty"`
33+
Init *InitSpec `json:"init,omitempty"`
3334
}
3435

3536
type AgentSpec struct {
@@ -78,6 +79,17 @@ type PrometheusSpec struct {
7879
//Annotations map[string]string `json:"annotations,omitempty"`
7980
}
8081

82+
type InitSpec struct {
83+
BackupSource *BackupSourceSpec `json:"backupSource,omitempty"`
84+
}
85+
86+
type BackupSourceSpec struct {
87+
Namespace string `json:"namespace"`
88+
Name string `json:"name"`
89+
// Arguments to the restore job
90+
Args []string `json:"args,omitempty"`
91+
}
92+
8193
// RedisStorage defines the structure used to store the Redis Data
8294
type RedisStorage struct {
8395
Size resource.Quantity `json:"size"`
@@ -96,6 +108,9 @@ type DistributedRedisClusterStatus struct {
96108
Reason string `json:"reason,omitempty"`
97109
NumberOfMaster int32 `json:"numberOfMaster,omitempty"`
98110
Nodes []RedisClusterNode `json:"nodes"`
111+
// The number of restore which reached phase Succeeded.
112+
// +optional
113+
RestoreSucceeded int32 `json:"restoreSucceeded,omitempty"`
99114
}
100115

101116
// RedisClusterNode represent a RedisCluster Node

pkg/apis/redis/v1alpha1/redisclusterbackup_types.go

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -151,10 +151,13 @@ const (
151151
// +k8s:openapi-gen=true
152152
type RedisClusterBackupStatus struct {
153153
// INSERT ADDITIONAL STATUS FIELD - define observed state of cluster
154-
StartTime *metav1.Time `json:"startTime,omitempty"`
155-
CompletionTime *metav1.Time `json:"completionTime,omitempty"`
156-
Phase BackupPhase `json:"phase,omitempty"`
157-
Reason string `json:"reason,omitempty"`
154+
StartTime *metav1.Time `json:"startTime,omitempty"`
155+
CompletionTime *metav1.Time `json:"completionTime,omitempty"`
156+
Phase BackupPhase `json:"phase,omitempty"`
157+
Reason string `json:"reason,omitempty"`
158+
MasterSize int32 `json:"masterSize,omitempty"`
159+
ClusterReplicas int32 `json:"clusterReplicas,omitempty"`
160+
ClusterImage string `json:"clusterImage,omitempty"`
158161
}
159162

160163
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object

pkg/controller/distributedrediscluster/distributedrediscluster_controller.go

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ func Add(mgr manager.Manager) error {
4242
func newReconciler(mgr manager.Manager) reconcile.Reconciler {
4343
reconiler := &ReconcileDistributedRedisCluster{client: mgr.GetClient(), scheme: mgr.GetScheme()}
4444
reconiler.statefulSetController = k8sutil.NewStatefulSetController(reconiler.client)
45-
reconiler.clusterStatusController = k8sutil.NewCRControl(reconiler.client)
45+
reconiler.crController = k8sutil.NewCRControl(reconiler.client)
4646
reconiler.ensurer = clustermanger.NewEnsureResource(reconiler.client, log)
4747
reconiler.checker = clustermanger.NewCheck(reconiler.client)
4848
return reconiler
@@ -96,12 +96,12 @@ var _ reconcile.Reconciler = &ReconcileDistributedRedisCluster{}
9696
type ReconcileDistributedRedisCluster struct {
9797
// This client, initialized using mgr.Client() above, is a split client
9898
// that reads objects from the cache and writes to the apiserver
99-
client client.Client
100-
scheme *runtime.Scheme
101-
ensurer clustermanger.IEnsureResource
102-
checker clustermanger.ICheck
103-
statefulSetController k8sutil.IStatefulSetControl
104-
clusterStatusController k8sutil.ICustomResource
99+
client client.Client
100+
scheme *runtime.Scheme
101+
ensurer clustermanger.IEnsureResource
102+
checker clustermanger.ICheck
103+
statefulSetController k8sutil.IStatefulSetControl
104+
crController k8sutil.ICustomResource
105105
}
106106

107107
// Reconcile reads that state of the cluster for a DistributedRedisCluster object and makes changes based on the state read
@@ -129,6 +129,11 @@ func (r *ReconcileDistributedRedisCluster) Reconcile(request reconcile.Request)
129129

130130
err = r.waitPodReady(instance)
131131
if err != nil {
132+
switch GetType(err) {
133+
case StopRetry:
134+
reqLogger.Info("invalid", "err", err)
135+
return reconcile.Result{}, nil
136+
}
132137
reqLogger.WithValues("err", err).Info("requeue")
133138
new := instance.Status.DeepCopy()
134139
SetClusterScaling(new, err.Error())
@@ -171,6 +176,19 @@ func (r *ReconcileDistributedRedisCluster) Reconcile(request reconcile.Request)
171176
return reconcile.Result{}, err
172177
}
173178

179+
// update cr and wait for the next Reconcile loop
180+
if instance.Spec.Init != nil && instance.Status.RestoreSucceeded <= 0 {
181+
reqLogger.Info("update restore redis cluster cr")
182+
instance.Status.RestoreSucceeded = 1
183+
if err := r.crController.UpdateCRStatus(instance); err != nil {
184+
return reconcile.Result{}, err
185+
}
186+
if err := r.crController.UpdateCR(instance); err != nil {
187+
return reconcile.Result{}, err
188+
}
189+
return reconcile.Result{}, nil
190+
}
191+
174192
status := buildClusterStatus(clusterInfos, redisClusterPods.Items, &instance.Status)
175193
reqLogger.V(4).Info("buildClusterStatus", "status", status)
176194
r.updateClusterIfNeed(instance, status)
@@ -191,5 +209,6 @@ func (r *ReconcileDistributedRedisCluster) Reconcile(request reconcile.Request)
191209
newStatus := buildClusterStatus(newClusterInfos, redisClusterPods.Items, &instance.Status)
192210
SetClusterOK(newStatus, "OK")
193211
r.updateClusterIfNeed(instance, newStatus)
194-
return reconcile.Result{RequeueAfter: requeueEnsure}, nil
212+
//return reconcile.Result{RequeueAfter: requeueEnsure}, nil
213+
return reconcile.Result{}, nil
195214
}

pkg/controller/distributedrediscluster/errors.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ const (
2020
Redis
2121
// Cluster
2222
Cluster
23+
// StopRetry stop retry error
24+
StopRetry
2325
)
2426

2527
type customError struct {
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
package distributedrediscluster
2+
3+
import (
4+
"github.com/go-logr/logr"
5+
6+
redisv1alpha1 "github.com/ucloud/redis-cluster-operator/pkg/apis/redis/v1alpha1"
7+
)
8+
9+
func (r *ReconcileDistributedRedisCluster) initFromBackup(reqLogger logr.Logger, cluster *redisv1alpha1.DistributedRedisCluster) error {
10+
return nil
11+
}

pkg/controller/distributedrediscluster/status.go

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,9 @@ func SetClusterScaling(status *redisv1alpha1.DistributedRedisClusterStatus, reas
3232

3333
func buildClusterStatus(clusterInfos *redisutil.ClusterInfos, pods []corev1.Pod, oldStatus *redisv1alpha1.DistributedRedisClusterStatus) *redisv1alpha1.DistributedRedisClusterStatus {
3434
status := &redisv1alpha1.DistributedRedisClusterStatus{
35-
Status: oldStatus.Status,
36-
Reason: oldStatus.Reason,
35+
Status: oldStatus.Status,
36+
Reason: oldStatus.Reason,
37+
RestoreSucceeded: oldStatus.RestoreSucceeded,
3738
}
3839

3940
nbMaster := int32(0)
@@ -89,7 +90,7 @@ func (r *ReconcileDistributedRedisCluster) updateClusterIfNeed(cluster *redisv1a
8990
log.WithValues("namespace", cluster.Namespace, "name", cluster.Name).
9091
V(3).Info("status changed")
9192
cluster.Status = *newStatus
92-
r.clusterStatusController.UpdateCRStatus(cluster)
93+
r.crController.UpdateCRStatus(cluster)
9394
}
9495
}
9596

@@ -110,6 +111,10 @@ func compareStatus(old, new *redisv1alpha1.DistributedRedisClusterStatus) bool {
110111
return true
111112
}
112113

114+
if compareInts("restoreSucceeded", old.RestoreSucceeded, new.RestoreSucceeded) {
115+
return true
116+
}
117+
113118
for _, nodeA := range old.Nodes {
114119
found := false
115120
for _, nodeB := range new.Nodes {

pkg/controller/distributedrediscluster/sync_handler.go

Lines changed: 64 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
package distributedrediscluster
22

33
import (
4+
"fmt"
45
"net"
56
"time"
67

78
redisv1alpha1 "github.com/ucloud/redis-cluster-operator/pkg/apis/redis/v1alpha1"
89
"github.com/ucloud/redis-cluster-operator/pkg/controller/clustering"
10+
"github.com/ucloud/redis-cluster-operator/pkg/k8sutil"
911
"github.com/ucloud/redis-cluster-operator/pkg/redisutil"
1012
)
1113

@@ -15,18 +17,37 @@ const (
1517
)
1618

1719
func (r *ReconcileDistributedRedisCluster) waitPodReady(cluster *redisv1alpha1.DistributedRedisCluster) error {
18-
cluster.Validate()
20+
if err := r.validate(cluster); err != nil {
21+
if k8sutil.IsRequestRetryable(err) {
22+
return Kubernetes.Wrap(err, "Validate")
23+
}
24+
return StopRetry.Wrap(err, "stop retry")
25+
}
1926
// step 1. apply statefulSet for cluster
2027
labels := getLabels(cluster)
28+
var backup *redisv1alpha1.RedisClusterBackup
29+
var err error
30+
if cluster.Spec.Init != nil {
31+
backup, err = r.crController.GetRedisClusterBackup(cluster.Spec.Init.BackupSource.Namespace, cluster.Spec.Init.BackupSource.Name)
32+
if err != nil {
33+
return err
34+
}
35+
}
2136
if err := r.ensurer.EnsureRedisConfigMap(cluster, labels); err != nil {
2237
return Kubernetes.Wrap(err, "EnsureRedisConfigMap")
2338
}
24-
if err := r.ensurer.EnsureRedisStatefulset(cluster, labels); err != nil {
39+
if err := r.ensurer.EnsureRedisStatefulset(cluster, backup, labels); err != nil {
2540
return Kubernetes.Wrap(err, "EnsureRedisStatefulset")
2641
}
2742
if err := r.ensurer.EnsureRedisHeadLessSvc(cluster, labels); err != nil {
2843
return Kubernetes.Wrap(err, "EnsureRedisHeadLessSvc")
2944
}
45+
if err := r.ensurer.EnsureRedisOSMSecret(cluster, backup, labels); err != nil {
46+
if k8sutil.IsRequestRetryable(err) {
47+
return Kubernetes.Wrap(err, "EnsureRedisOSMSecret")
48+
}
49+
return StopRetry.Wrap(err, "stop retry")
50+
}
3051

3152
// step 2. wait for all redis node ready
3253
if err := r.checker.CheckRedisNodeNum(cluster); err != nil {
@@ -36,6 +57,33 @@ func (r *ReconcileDistributedRedisCluster) waitPodReady(cluster *redisv1alpha1.D
3657
return nil
3758
}
3859

60+
func (r *ReconcileDistributedRedisCluster) validate(cluster *redisv1alpha1.DistributedRedisCluster) error {
61+
initSpec := cluster.Spec.Init
62+
if initSpec != nil {
63+
if initSpec.BackupSource == nil {
64+
return fmt.Errorf("backupSource is required")
65+
}
66+
backup, err := r.crController.GetRedisClusterBackup(initSpec.BackupSource.Namespace, initSpec.BackupSource.Name)
67+
if err != nil {
68+
return err
69+
}
70+
if backup.Status.Phase != redisv1alpha1.BackupPhaseSucceeded {
71+
return fmt.Errorf("backup is still running")
72+
}
73+
if cluster.Spec.Image == "" {
74+
cluster.Spec.Image = backup.Status.ClusterImage
75+
}
76+
cluster.Spec.MasterSize = backup.Status.MasterSize
77+
if cluster.Status.RestoreSucceeded <= 0 {
78+
cluster.Spec.ClusterReplicas = 0
79+
} else {
80+
cluster.Spec.ClusterReplicas = backup.Status.ClusterReplicas
81+
}
82+
}
83+
cluster.Validate()
84+
return nil
85+
}
86+
3987
func (r *ReconcileDistributedRedisCluster) waitForClusterJoin(cluster *redisv1alpha1.DistributedRedisCluster, clusterInfos *redisutil.ClusterInfos, admin redisutil.IAdmin) error {
4088
logger := log.WithValues("namespace", cluster.Namespace, "name", cluster.Name)
4189
//logger.Info(">>> Assign a different config epoch to each node")
@@ -221,11 +269,11 @@ func (r *ReconcileDistributedRedisCluster) ensureCluster(cluster *redisv1alpha1.
221269
return Cluster.Wrap(err, "newRedisCluster")
222270
}
223271

224-
currentMasterNodes := nodes.FilterByFunc(redisutil.IsMasterWithSlot)
225-
if len(currentMasterNodes) == int(cluster.Spec.MasterSize) {
226-
logger.V(3).Info("cluster ok")
227-
return nil
228-
}
272+
//currentMasterNodes := nodes.FilterByFunc(redisutil.IsMasterWithSlot)
273+
//if len(currentMasterNodes) == int(cluster.Spec.MasterSize) {
274+
// logger.V(3).Info("cluster ok")
275+
// return nil
276+
//}
229277

230278
// First, we define the new masters
231279
newMasters, curMasters, allMaster, err := clustering.DispatchMasters(rCluster, nodes, cNbMaster)
@@ -280,6 +328,15 @@ func (r *ReconcileDistributedRedisCluster) ensureCluster(cluster *redisv1alpha1.
280328
if err := clustering.RebalancedCluster(admin, newMasters); err != nil {
281329
return Cluster.Wrap(err, "RebalancedCluster")
282330
}
331+
} else if len(newMasters) == len(curMasters) {
332+
newRedisSlavesByMaster, bestEffort := clustering.PlaceSlaves(rCluster, newMasters, currentSlaveNodes, newSlave, cReplicaFactor)
333+
if bestEffort {
334+
rCluster.NodesPlacement = redisv1alpha1.NodesPlacementInfoBestEffort
335+
}
336+
337+
if err := clustering.AttachingSlavesToMaster(rCluster, admin, newRedisSlavesByMaster); err != nil {
338+
return Cluster.Wrap(err, "AttachingSlavesToMaster")
339+
}
283340
}
284341

285342
return nil

0 commit comments

Comments
 (0)