Skip to content

Commit ebd0417

Browse files
committed
Modify startup scripts for ovn-controller-ovs
In order to minimize the downtime during update of the ovn-controller-ovs pods we're modifying the update strategy so it doesn't accept any Unavailable pod. This means that it will create new ovn-controller-ovs while the old one is running. This commit enables that two ovn-controller-ovs pods can coexists. It accomplish this by modifying the start-up scripts of all containers so it checks if a pod is already running and slowly stops in a controlled fashon the old pods while it starts the new ones. The logic is done with a temporary file created during the init container that will inform the ovsdb-server/ovs-vswitchd containers if they're on an update scenario or a normal one. The temporary file is deleted after the end of the ovs-vswitchd so when the ovnController CR is deleted, the volumes won't have any leftovers. Related: OSPRH-11636 Jira: OSPRH-10821 Depends-on: lib-common#611
1 parent 602aa84 commit ebd0417

File tree

7 files changed

+129
-10
lines changed

7 files changed

+129
-10
lines changed

pkg/ovncontroller/daemonset.go

Lines changed: 37 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import (
2424
appsv1 "k8s.io/api/apps/v1"
2525
corev1 "k8s.io/api/core/v1"
2626
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
27+
"k8s.io/apimachinery/pkg/util/intstr"
2728
"k8s.io/utils/ptr"
2829
)
2930

@@ -236,6 +237,27 @@ func CreateOVSDaemonSet(
236237
envVars := map[string]env.Setter{}
237238
envVars["CONFIG_HASH"] = env.SetValue(configHash)
238239

240+
volumes := []corev1.Volume{}
241+
mounts := []corev1.VolumeMount{}
242+
243+
// add OVN dbs cert and CA
244+
if instance.Spec.TLS.Enabled() {
245+
svc := tls.Service{
246+
SecretName: *instance.Spec.TLS.GenericService.SecretName,
247+
CertMount: ptr.To(ovn_common.OVNDbCertPath),
248+
KeyMount: ptr.To(ovn_common.OVNDbKeyPath),
249+
CaMount: ptr.To(ovn_common.OVNDbCaCertPath),
250+
}
251+
volumes = append(volumes, svc.CreateVolume(ovnv1.ServiceNameOVS))
252+
mounts = append(mounts, svc.CreateVolumeMounts(ovnv1.ServiceNameOVS)...)
253+
254+
// add CA bundle if defined
255+
if instance.Spec.TLS.CaBundleSecretName != "" {
256+
volumes = append(volumes, instance.Spec.TLS.CreateVolume())
257+
mounts = append(mounts, instance.Spec.TLS.CreateVolumeMounts(nil)...)
258+
}
259+
}
260+
239261
initContainers := []corev1.Container{
240262
{
241263
Name: "ovsdb-server-init",
@@ -250,7 +272,7 @@ func CreateOVSDaemonSet(
250272
Privileged: &privileged,
251273
},
252274
Env: env.MergeEnvs([]corev1.EnvVar{}, envVars),
253-
VolumeMounts: GetOVSDbVolumeMounts(),
275+
VolumeMounts: append(GetOVSDbVolumeMounts(), mounts...),
254276
},
255277
}
256278

@@ -276,7 +298,7 @@ func CreateOVSDaemonSet(
276298
Privileged: &privileged,
277299
},
278300
Env: env.MergeEnvs([]corev1.EnvVar{}, envVars),
279-
VolumeMounts: GetOVSDbVolumeMounts(),
301+
VolumeMounts: append(GetOVSDbVolumeMounts(), mounts...),
280302
// TODO: consider the fact that resources are now double booked
281303
Resources: instance.Spec.Resources,
282304
LivenessProbe: ovsDbLivenessProbe,
@@ -303,7 +325,7 @@ func CreateOVSDaemonSet(
303325
Privileged: &privileged,
304326
},
305327
Env: env.MergeEnvs([]corev1.EnvVar{}, envVars),
306-
VolumeMounts: GetVswitchdVolumeMounts(),
328+
VolumeMounts: append(GetVswitchdVolumeMounts(), mounts...),
307329
// TODO: consider the fact that resources are now double booked
308330
Resources: instance.Spec.Resources,
309331
LivenessProbe: ovsVswitchdLivenessProbe,
@@ -312,6 +334,9 @@ func CreateOVSDaemonSet(
312334
},
313335
}
314336

337+
maxUnavailable := intstr.FromInt32(0)
338+
maxSurge := intstr.FromInt32(1)
339+
315340
daemonset := &appsv1.DaemonSet{
316341
ObjectMeta: metav1.ObjectMeta{
317342
Name: ovnv1.ServiceNameOVS,
@@ -327,9 +352,17 @@ func CreateOVSDaemonSet(
327352
},
328353
Spec: corev1.PodSpec{
329354
ServiceAccountName: instance.RbacResourceName(),
355+
HostPID: true,
330356
InitContainers: initContainers,
331357
Containers: containers,
332-
Volumes: GetOVSVolumes(instance.Name, instance.Namespace),
358+
Volumes: append(GetOVSVolumes(instance.Name, instance.Namespace), volumes...),
359+
},
360+
},
361+
UpdateStrategy: appsv1.DaemonSetUpdateStrategy{
362+
Type: appsv1.RollingUpdateDaemonSetStrategyType,
363+
RollingUpdate: &appsv1.RollingUpdateDaemonSet{
364+
MaxUnavailable: &maxUnavailable,
365+
MaxSurge: &maxSurge,
333366
},
334367
},
335368
},

templates/ovncontroller/bin/functions

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,15 @@ FLOWS_RESTORE_SCRIPT=$ovs_dir/flows-script
2929
FLOWS_RESTORE_DIR=$ovs_dir/saved-flows
3030
SAFE_TO_STOP_OVSDB_SERVER_SEMAPHORE=$ovs_dir/is_safe_to_stop_ovsdb_server
3131

32+
# Variables declaration used by start-up optimization
33+
ovs_vswitchd_pid_file=/var/run/openvswitch/ovs-vswitchd.pid
34+
ovsdb_server_pid_file=/var/run/openvswitch/ovsdb-server.pid
35+
update_semaphore_file=/var/lib/openvswitch/update
36+
stop_vswitchd_script_file=/usr/local/bin/container-scripts/stop-vswitchd.sh
37+
stop_ovsdb_server_script_file=/usr/local/bin/container-scripts/stop-ovsdb-server.sh
38+
skip_ovsdb_server_stop_file=/var/lib/openvswitch/skip_stop_ovsdbserver
39+
skip_vswitchd_stop_file=/var/lib/openvswitch/skip_stop_vswitchd
40+
3241
function cleanup_ovsdb_server_semaphore() {
3342
rm -f $SAFE_TO_STOP_OVSDB_SERVER_SEMAPHORE 2>&1 > /dev/null
3443
}

templates/ovncontroller/bin/init-ovsdb-server.sh

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,25 @@ trap wait_for_db_creation EXIT
2323
if ! [ -s ${DB_FILE} ]; then
2424
rm -f ${DB_FILE}
2525
fi
26-
# Initialize or upgrade database if needed
27-
CTL_ARGS="--system-id=random --no-ovs-vswitchd"
28-
/usr/share/openvswitch/scripts/ovs-ctl start $CTL_ARGS
29-
/usr/share/openvswitch/scripts/ovs-ctl stop $CTL_ARGS
3026

31-
wait_for_db_creation
32-
trap - EXIT
27+
# Check if it's a normal start or an update
28+
# Normal start: ovsdb-server & ovs-vswitchd are not running, start normal
29+
# Update: ovsdb-server & ovs-vswitchd still running, need different approach
30+
if [ -f $ovs_vswitchd_pid_file ] || [ -f $ovsdb_server_pid_file ]; then
31+
# Some process it's running, it's an update. Create semaphore
32+
echo "UPDATE" > $update_semaphore_file
33+
# No need to initializice ovs-vswitchd in this path, as this has done before
34+
# TODO: check what happens if during the update an update to the ovs db is needed
35+
else
36+
# In case something went wrong last run, ensure that semaphor_file is not present in this path
37+
if [ -f $update_semaphore_file ]; then
38+
rm $update_semaphore_file
39+
fi
40+
# Initialize or upgrade database if needed
41+
CTL_ARGS="--system-id=random --no-ovs-vswitchd"
42+
/usr/share/openvswitch/scripts/ovs-ctl start $CTL_ARGS
43+
/usr/share/openvswitch/scripts/ovs-ctl stop $CTL_ARGS
44+
45+
wait_for_db_creation
46+
trap - EXIT
47+
fi

templates/ovncontroller/bin/start-ovsdb-server.sh

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,40 @@ source $(dirname $0)/functions
2020
# Remove the obsolete semaphore file in case it still exists.
2121
cleanup_ovsdb_server_semaphore
2222

23+
# Check if we're on the update path
24+
if [ -f $update_semaphore_file ]; then
25+
echo "In the middle of an upgrade"
26+
# Need to stop vsitchd
27+
echo "Stopping vswitchd"
28+
bash $stop_vswitchd_script_file
29+
# with this script the current lflows should be already stored in a file
30+
# and vswitchd should be stopped.
31+
# Need to wait until vswitchd is stoped in order to stop also the ovsdb-server
32+
while true; do
33+
if [ ! -f $ovs_vswitchd_pid_file ]; then
34+
break
35+
fi
36+
sleep 0.1
37+
done
38+
# Ovs-vswtichd was already restarted, need to skip the preStop from the openshift
39+
# lifecicle when the old pod gets deleted
40+
echo "Creating flag file to skip ovs-vswitchd stop"
41+
touch $skip_vswitchd_stop_file
42+
# Run stop-ovsdbserver script to ensure lflows semaphor is cleaned correctly
43+
bash $stop_ovsdb_server_script_file
44+
# Need to create a flag-file to skip ovsdb-server stop
45+
# to avoid triggering it again when openshift triggers the preStop script.
46+
echo "Creating flag file to skip ovsdb-server stop"
47+
touch $skip_ovsdb_server_stop_file
48+
# Ensure that ovsdb-server is stopped
49+
while true; do
50+
if [ ! -f $ovsdb_server_pid_file ]; then
51+
break
52+
fi
53+
sleep 0.1
54+
done
55+
fi
56+
2357
# Start the service
2458
ovsdb-server ${DB_FILE} \
2559
--pidfile \

templates/ovncontroller/bin/start-vswitchd.sh

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,18 @@
1515
# under the License.
1616

1717
source $(dirname $0)/functions
18+
19+
# If we're on an update wait until past vswitchd process is stopped correctly
20+
if [ -f $update_semaphore_file ]; then
21+
# In the middle of an update, wait until vswitchd is already stopped
22+
while true; do
23+
if [ ! -f $ovs_vswitchd_pid_file ]; then
24+
break
25+
fi
26+
sleep 0.1
27+
done
28+
fi
29+
1830
wait_for_ovsdb_server
1931

2032
# The order - first wait for db server, then set -ex - is important. Otherwise,
@@ -49,6 +61,10 @@ cleanup_flows_backup
4961
# Now, inform vswitchd that we are done.
5062
ovs-vsctl remove open_vswitch . other_config flow-restore-wait
5163

64+
# At this point, ovsdb-server and vswitchd are already running, update (if it was the case)
65+
# is already done. Delete update file
66+
rm $update_semaphore_file || true
67+
5268
# This is container command script. Block it from exiting, otherwise k8s will
5369
# restart the container again.
5470
sleep infinity

templates/ovncontroller/bin/stop-ovsdb-server.sh

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,12 @@
1717
set -ex
1818
source $(dirname $0)/functions
1919

20+
# If file is present, skip stop script
21+
if [ -f $skip_ovsdb_server_stop_file ]; then
22+
rm $skip_ovsdb_server_stop_file
23+
exit 0
24+
fi
25+
2026
# The ovs_vswitchd container has to terminate before ovsdb-server because it
2127
# needs access to db in its preStop script. The preStop script backs up flows
2228
# for restoration during the next startup. This semaphore ensures the vswitchd

templates/ovncontroller/bin/stop-vswitchd.sh

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,12 @@
1717
set -ex
1818
source $(dirname $0)/functions
1919

20+
# If file is present, skip stop script
21+
if [ -f $skip_vswitchd_stop_file ]; then
22+
rm $skip_vswitchd_stop_file
23+
exit 0
24+
fi
25+
2026
# Clean up any previously created flow backups to avoid conflict with newly
2127
# generated backup.
2228
cleanup_flows_backup

0 commit comments

Comments
 (0)