This repository was archived by the owner on Jul 30, 2021. It is now read-only.
File tree Expand file tree Collapse file tree 7 files changed +53
-22
lines changed
Expand file tree Collapse file tree 7 files changed +53
-22
lines changed Original file line number Diff line number Diff line change @@ -79,3 +79,8 @@ ConfigMaps are stored using a path of:
7979```
8080/etc/kubernets/checkpoint-configmaps/<namespace>/<pod-name>/<configmap-name>
8181```
82+ ### Self Checkpointing
83+
84+ The pod checkpoint will also checkpoint itself to the disk to handle the absence of the API server.
85+ After a node reboot, the on-disk pod-checkpointer will take over the responsibility.
86+ Once it reaches the API server and finds out that it's no longer being scheduled, it will clean up itself.
Original file line number Diff line number Diff line change @@ -608,6 +608,13 @@ func handleRemove(remove []string) {
608608 // Remove active checkpoints.
609609 // We do this as the last step because we want to clean up
610610 // resources before the checkpointer itself exits.
611+ //
612+ // TODO(yifan): Removing the pods after removing the secrets/configmaps
613+ // might disturb other pods since they might want to use the configmap
614+ // or secrets during termination.
615+ //
616+ // However, since we are not waiting for them to terminate anyway, so it's
617+ // ok to just leave as is for now. We can handle this more gracefully later.
611618 p = PodFullNameToActiveCheckpointPath (id )
612619 if err := os .Remove (p ); err != nil && ! os .IsNotExist (err ) {
613620 glog .Errorf ("Failed to remove active checkpoint %s: %v" , p , err )
Original file line number Diff line number Diff line change 11FROM alpine
22
33COPY checkpoint /checkpoint
4- COPY checkpoint-install.sh /checkpoint-installer.sh
5- COPY checkpoint-pod.yaml /checkpoint-pod.yaml
Original file line number Diff line number Diff line change @@ -12,9 +12,6 @@ function image::build() {
1212 # Add assets for container build
1313 cp ${BOOTKUBE_ROOT} /_output/bin/linux/checkpoint ${TEMP_DIR}
1414 cp ${BOOTKUBE_ROOT} /image/checkpoint/Dockerfile ${TEMP_DIR}
15- cp ${BOOTKUBE_ROOT} /image/checkpoint/checkpoint-install.sh ${TEMP_DIR}
16- cp ${BOOTKUBE_ROOT} /image/checkpoint/checkpoint-pod.yaml ${TEMP_DIR}
17- sed -i " s#{{ REPO }}:{{ TAG }}#${IMAGE_REPO} :${VERSION} #" ${TEMP_DIR} /checkpoint-pod.yaml
1815
1916 docker build -t ${IMAGE_REPO} :${VERSION} -f ${TEMP_DIR} /Dockerfile ${TEMP_DIR}
2017 rm -rf ${TEMP_DIR}
Load Diff This file was deleted.
Original file line number Diff line number Diff line change @@ -38,7 +38,7 @@ const (
3838 AssetPathKubeDNSDeployment = "manifests/kube-dns-deployment.yaml"
3939 AssetPathKubeDNSSvc = "manifests/kube-dns-svc.yaml"
4040 AssetPathSystemNamespace = "manifests/kube-system-ns.yaml"
41- AssetPathCheckpointer = "manifests/pod-checkpoint-installer .yaml"
41+ AssetPathCheckpointer = "manifests/pod-checkpointer .yaml"
4242 AssetPathEtcdOperator = "manifests/etcd-operator.yaml"
4343 AssetPathEtcdSvc = "manifests/etcd-service.yaml"
4444 AssetPathKenc = "manifests/kenc.yaml"
Original file line number Diff line number Diff line change @@ -247,31 +247,61 @@ spec:
247247 CheckpointerTemplate = []byte (`apiVersion: "extensions/v1beta1"
248248kind: DaemonSet
249249metadata:
250- name: checkpoint-installer
250+ name: pod-checkpointer
251251 namespace: kube-system
252252 labels:
253- k8s-app: pod-checkpoint-installer
253+ k8s-app: pod-checkpointer
254254spec:
255255 template:
256256 metadata:
257257 labels:
258- k8s-app: pod-checkpoint-installer
258+ k8s-app: pod-checkpointer
259+ annotations:
260+ checkpointer.alpha.coreos.com/checkpoint: "true"
259261 spec:
260262 nodeSelector:
261263 master: "true"
262264 hostNetwork: true
263265 containers:
264- - name: checkpoint-installer
265- image: quay.io/coreos/pod-checkpointer:417b8f7552ccf3db192ba1e5472e524848f0eb5f
266+ - name: checkpoint
267+ image: quay.io/coreos/pod-checkpointer:f0631b5e25a21db9c68cff6c5e719c72c0181c4f
266268 command:
267- - /checkpoint-installer.sh
269+ - /checkpoint
270+ - --v=4
271+ - --lock-file=/var/run/lock/pod-checkpointer.lock
272+ env:
273+ - name: NODE_NAME
274+ valueFrom:
275+ fieldRef:
276+ fieldPath: spec.nodeName
277+ - name: POD_NAME
278+ valueFrom:
279+ fieldRef:
280+ fieldPath: metadata.name
281+ - name: POD_NAMESPACE
282+ valueFrom:
283+ fieldRef:
284+ fieldPath: metadata.namespace
285+ imagePullPolicy: Always
268286 volumeMounts:
269- - mountPath: /etc/kubernetes/manifests
270- name: etc-k8s-manifests
287+ - mountPath: /etc/kubernetes
288+ name: etc-kubernetes
289+ - mountPath: /srv/kubernetes
290+ name: srv-kubernetes
291+ - mountPath: /var/run
292+ name: var-run
293+ hostNetwork: true
294+ restartPolicy: Always
271295 volumes:
272- - name: etc-k8s-manifests
296+ - name: etc-kubernetes
297+ hostPath:
298+ path: /etc/kubernetes
299+ - name: srv-kubernetes
300+ hostPath:
301+ path: /srv/kubernetes
302+ - name: var-run
273303 hostPath:
274- path: /etc/kubernetes/manifests
304+ path: /var/run
275305` )
276306 ControllerManagerTemplate = []byte (`apiVersion: extensions/v1beta1
277307kind: Deployment
You can’t perform that action at this time.
0 commit comments