Skip to content

Commit d0a21be

Browse files
improve docker service reliability, update docker systemd files (#21174)
* docker: update template for docker.service * wait for docker * fix unit test
1 parent c432003 commit d0a21be

File tree

4 files changed

+57
-39
lines changed

4 files changed

+57
-39
lines changed

pkg/minikube/cruntime/cruntime_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -554,7 +554,7 @@ func (f *FakeRunner) systemctl(args []string, root bool) (string, error) { // no
554554
f.t.Logf("fake systemctl: SvcRestarted %s", svc)
555555
case "is-active":
556556
f.t.Logf("fake systemctl: %s is-status: %v", svc, state)
557-
if state == SvcRunning {
557+
if state == SvcRunning || state == SvcRestarted {
558558
return out, nil
559559
}
560560
return out, fmt.Errorf("%s in state: %v", svc, state)

pkg/minikube/cruntime/docker.go

Lines changed: 21 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -168,8 +168,7 @@ func (r *Docker) Enable(disOthers bool, cgroupDriver string, inUserNamespace boo
168168
return err
169169
}
170170

171-
_ = r.Init.ResetFailed("docker")
172-
if err := r.Init.Restart("docker"); err != nil {
171+
if err := r.restartServiceWithExpoRetry("docker"); err != nil {
173172
return err
174173
}
175174

@@ -199,20 +198,9 @@ func (r *Docker) Enable(disOthers bool, cgroupDriver string, inUserNamespace boo
199198
return err
200199
}
201200

202-
_ = r.Init.ResetFailed(service)
203-
_ = r.Init.Restart(service)
204-
// try to restart service if stopped, restart until it works
205-
retry.Expo(
206-
func() error {
207-
if !r.Init.Active(service) {
208-
r.Init.ResetFailed(service)
209-
r.Init.Restart(service)
210-
return fmt.Errorf("cri-docker not running")
211-
}
212-
return nil
213-
},
214-
1*time.Second, time.Minute, 5,
215-
)
201+
if err := r.restartServiceWithExpoRetry(service); err != nil {
202+
return err
203+
}
216204
}
217205

218206
return nil
@@ -824,3 +812,20 @@ ExecStart={{.ExecPath}} --container-runtime-endpoint fd:// --pod-infra-container
824812
}
825813
return nil
826814
}
815+
816+
func (r *Docker) restartServiceWithExpoRetry(service string) error {
817+
_ = r.Init.ResetFailed(service)
818+
_ = r.Init.Restart(service)
819+
// try to restart service if stopped, restart until it works
820+
return retry.Expo(
821+
func() error {
822+
if !r.Init.Active(service) {
823+
r.Init.ResetFailed(service)
824+
r.Init.Restart(service)
825+
return fmt.Errorf("%s not running", service)
826+
}
827+
return nil
828+
},
829+
1*time.Second, time.Minute, 5,
830+
)
831+
}

pkg/provision/buildroot.go

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -70,18 +70,21 @@ func (p *BuildrootProvisioner) GenerateDockerOptions(dockerPort int) (*provision
7070
klog.Infof("root file system type: %s", fstype)
7171
noPivot = fstype == "rootfs"
7272
}
73-
73+
/* Template is copied from https://github.com/moby/moby/blob/44bca1adf3c806c4ed6688d67c6f995653b09b26/contrib/init/systemd/docker.service#L2 with the following changes:
74+
After: minikube-automount.service is added
75+
ExecStart: additional flags are added
76+
*/
7477
engineConfigTmpl := `[Unit]
7578
Description=Docker Application Container Engine
7679
Documentation=https://docs.docker.com
77-
After=network.target minikube-automount.service docker.socket
78-
Requires= minikube-automount.service docker.socket
80+
After=network-online.target minikube-automount.service nss-lookup.target docker.socket firewalld.service containerd.service time-set.target
81+
Wants=network-online.target containerd.service
82+
Requires=docker.socket
7983
StartLimitBurst=3
8084
StartLimitIntervalSec=60
81-
8285
[Service]
8386
Type=notify
84-
Restart=on-failure
87+
Restart=always
8588
`
8689
if noPivot {
8790
klog.Warning("Using fundamentally insecure --no-pivot option")
@@ -94,18 +97,15 @@ Environment=DOCKER_RAMDISK=yes
9497
{{range .EngineOptions.Env}}Environment={{.}}
9598
{{end}}
9699
97-
# This file is a systemd drop-in unit that inherits from the base dockerd configuration.
98-
# The base configuration already specifies an 'ExecStart=...' command. The first directive
99-
# here is to clear out that command inherited from the base configuration. Without this,
100-
# the command from the base configuration and the command specified here are treated as
101-
# a sequence of commands, which is not the desired behavior, nor is it valid -- systemd
102-
# will catch this invalid input and refuse to start the service with an error like:
103-
# Service has more than one ExecStart= setting, which is only allowed for Type=oneshot services.
104-
105-
# NOTE: default-ulimit=nofile is set to an arbitrary number for consistency with other
106-
# container runtimes. If left unlimited, it may result in OOM issues with MySQL.
107100
ExecStart=
108-
ExecStart=/usr/bin/dockerd -H tcp://0.0.0.0:2376 -H unix:///var/run/docker.sock --default-ulimit=nofile=1048576:1048576 --tlsverify --tlscacert {{.AuthOptions.CaCertRemotePath}} --tlscert {{.AuthOptions.ServerCertRemotePath}} --tlskey {{.AuthOptions.ServerKeyRemotePath}} {{ range .EngineOptions.Labels }}--label {{.}} {{ end }}{{ range .EngineOptions.InsecureRegistry }}--insecure-registry {{.}} {{ end }}{{ range .EngineOptions.RegistryMirror }}--registry-mirror {{.}} {{ end }}{{ range .EngineOptions.ArbitraryFlags }}--{{.}} {{ end }}
101+
ExecStart=/usr/bin/dockerd -H tcp://0.0.0.0:2376 \
102+
-H fd:// --containerd=/run/containerd/containerd.sock \
103+
-H unix:///var/run/docker.sock \
104+
--default-ulimit=nofile=1048576:1048576 \
105+
--tlsverify \
106+
--tlscacert {{.AuthOptions.CaCertRemotePath}} \
107+
--tlscert {{.AuthOptions.ServerCertRemotePath}} \
108+
--tlskey {{.AuthOptions.ServerKeyRemotePath}} {{ range .EngineOptions.Labels }}--label {{.}} {{ end }}{{ range .EngineOptions.InsecureRegistry }}--insecure-registry {{.}} {{ end }}{{ range .EngineOptions.RegistryMirror }}--registry-mirror {{.}} {{ end }}{{ range .EngineOptions.ArbitraryFlags }}--{{.}} {{ end }}
109109
ExecReload=/bin/kill -s HUP \$MAINPID
110110
111111
# Having non-zero Limit*s causes performance problems due to accounting overhead
@@ -117,13 +117,13 @@ LimitCORE=infinity
117117
# Uncomment TasksMax if your systemd version supports it.
118118
# Only systemd 226 and above support this version.
119119
TasksMax=infinity
120-
TimeoutStartSec=0
121120
122121
# set delegate yes so that systemd does not reset the cgroups of docker containers
123122
Delegate=yes
124123
125124
# kill only the docker process, not all processes in the cgroup
126125
KillMode=process
126+
OOMScoreAdjust=-500
127127
128128
[Install]
129129
WantedBy=multi-user.target

pkg/provision/ubuntu.go

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -71,20 +71,25 @@ func (p *UbuntuProvisioner) GenerateDockerOptions(dockerPort int) (*provision.Do
7171
klog.Infof("root file system type: %s", fstype)
7272
noPivot = fstype == "rootfs"
7373
}
74+
/* Template is copied from https://github.com/moby/moby/blob/44bca1adf3c806c4ed6688d67c6f995653b09b26/contrib/init/systemd/docker.service#L2 with the following changes:
75+
ExecStart: additional flags are added
7476
77+
StartLimitBurst=3 : The service can be restarted up to 3 times within the time window defined by StartLimitIntervalSec.
78+
79+
StartLimitIntervalSec=60 : The time window is 60 seconds. If the service fails and restarts more than 3 times in 60 seconds, systemd will stop trying to restart it automatically.
80+
*/
7581
engineConfigTmpl := `[Unit]
7682
Description=Docker Application Container Engine
7783
Documentation=https://docs.docker.com
78-
BindsTo=containerd.service
79-
After=network-online.target firewalld.service containerd.service
80-
Wants=network-online.target
84+
After=network-online.target nss-lookup.target docker.socket firewalld.service containerd.service time-set.target
85+
Wants=network-online.target containerd.service
8186
Requires=docker.socket
8287
StartLimitBurst=3
8388
StartLimitIntervalSec=60
8489
8590
[Service]
8691
Type=notify
87-
Restart=on-failure
92+
Restart=always
8893
`
8994
if noPivot {
9095
klog.Warning("Using fundamentally insecure --no-pivot option")
@@ -108,7 +113,14 @@ Environment=DOCKER_RAMDISK=yes
108113
# NOTE: default-ulimit=nofile is set to an arbitrary number for consistency with other
109114
# container runtimes. If left unlimited, it may result in OOM issues with MySQL.
110115
ExecStart=
111-
ExecStart=/usr/bin/dockerd -H tcp://0.0.0.0:2376 -H unix:///var/run/docker.sock --default-ulimit=nofile=1048576:1048576 --tlsverify --tlscacert {{.AuthOptions.CaCertRemotePath}} --tlscert {{.AuthOptions.ServerCertRemotePath}} --tlskey {{.AuthOptions.ServerKeyRemotePath}} {{ range .EngineOptions.Labels }}--label {{.}} {{ end }}{{ range .EngineOptions.InsecureRegistry }}--insecure-registry {{.}} {{ end }}{{ range .EngineOptions.RegistryMirror }}--registry-mirror {{.}} {{ end }}{{ range .EngineOptions.ArbitraryFlags }}--{{.}} {{ end }}
116+
ExecStart=/usr/bin/dockerd -H tcp://0.0.0.0:2376 \
117+
-H fd:// --containerd=/run/containerd/containerd.sock \
118+
-H unix:///var/run/docker.sock \
119+
--default-ulimit=nofile=1048576:1048576 \
120+
--tlsverify \
121+
--tlscacert {{.AuthOptions.CaCertRemotePath}} \
122+
--tlscert {{.AuthOptions.ServerCertRemotePath}} \
123+
--tlskey {{.AuthOptions.ServerKeyRemotePath}} {{ range .EngineOptions.Labels }}--label {{.}} {{ end }}{{ range .EngineOptions.InsecureRegistry }}--insecure-registry {{.}} {{ end }}{{ range .EngineOptions.RegistryMirror }}--registry-mirror {{.}} {{ end }}{{ range .EngineOptions.ArbitraryFlags }}--{{.}} {{ end }}
112124
ExecReload=/bin/kill -s HUP \$MAINPID
113125
114126
# Having non-zero Limit*s causes performance problems due to accounting overhead
@@ -127,6 +139,7 @@ Delegate=yes
127139
128140
# kill only the docker process, not all processes in the cgroup
129141
KillMode=process
142+
OOMScoreAdjust=-500
130143
131144
[Install]
132145
WantedBy=multi-user.target

0 commit comments

Comments
 (0)