|
| 1 | +// Package checkpoint provides libraries that are used by the pod-checkpointer utility to checkpoint |
| 2 | +// pods on a node. See cmd/checkpoint/README.md in this repository for more information. |
| 3 | +package checkpoint |
| 4 | + |
| 5 | +import ( |
| 6 | + "fmt" |
| 7 | + "time" |
| 8 | + |
| 9 | + "k8s.io/client-go/kubernetes" |
| 10 | + restclient "k8s.io/client-go/rest" |
| 11 | +) |
| 12 | + |
| 13 | +const ( |
| 14 | + activeCheckpointPath = "/etc/kubernetes/manifests" |
| 15 | + inactiveCheckpointPath = "/etc/kubernetes/inactive-manifests" |
| 16 | + checkpointSecretPath = "/etc/kubernetes/checkpoint-secrets" |
| 17 | + checkpointConfigMapPath = "/etc/kubernetes/checkpoint-configmaps" |
| 18 | + |
| 19 | + shouldCheckpointAnnotation = "checkpointer.alpha.coreos.com/checkpoint" // = "true" |
| 20 | + checkpointParentAnnotation = "checkpointer.alpha.coreos.com/checkpoint-of" // = "podName" |
| 21 | + podSourceAnnotation = "kubernetes.io/config.source" |
| 22 | + |
| 23 | + shouldCheckpoint = "true" |
| 24 | + podSourceFile = "file" |
| 25 | + |
| 26 | + defaultPollingFrequency = 3 * time.Second |
| 27 | + defaultCheckpointTimeout = 1 * time.Minute |
| 28 | +) |
| 29 | + |
| 30 | +var ( |
| 31 | + lastCheckpoint time.Time |
| 32 | +) |
| 33 | + |
| 34 | +// Options defines the parameters that are required to start the checkpointer. |
| 35 | +type Options struct { |
| 36 | + // CheckpointerPod holds information about this checkpointer pod. |
| 37 | + CheckpointerPod CheckpointerPod |
| 38 | + // KubeConfig is a valid kubeconfig for communicating with the APIServer. |
| 39 | + KubeConfig *restclient.Config |
| 40 | + // RemoteRuntimeEndpoint is the location of the CRI GRPC endpoint. |
| 41 | + RemoteRuntimeEndpoint string |
| 42 | + // RuntimeRequestTimeout is the timeout that is used for requests to the RemoteRuntimeEndpoint. |
| 43 | + RuntimeRequestTimeout time.Duration |
| 44 | +} |
| 45 | + |
| 46 | +// CheckpointerPod holds information about this checkpointer pod. |
| 47 | +type CheckpointerPod struct { |
| 48 | + // The name of the node this checkpointer is running on. |
| 49 | + NodeName string |
| 50 | + // The name of the pod that is running this checkpointer. |
| 51 | + PodName string |
| 52 | + // The namespace of the pod that is running this checkpointer. |
| 53 | + PodNamespace string |
| 54 | +} |
| 55 | + |
| 56 | +// checkpointer holds state used by the checkpointer to perform its duties. |
| 57 | +type checkpointer struct { |
| 58 | + apiserver kubernetes.Interface |
| 59 | + kubelet *kubeletClient |
| 60 | + cri *remoteRuntimeService |
| 61 | + checkpointerPod CheckpointerPod |
| 62 | +} |
| 63 | + |
| 64 | +// Run instantiates and starts a new checkpointer. Returns error if there was a problem creating |
| 65 | +// the checkpointer, otherwise never returns. |
| 66 | +func Run(opts Options) error { |
| 67 | + apiserver := kubernetes.NewForConfigOrDie(opts.KubeConfig) |
| 68 | + |
| 69 | + kubelet, err := newKubeletClient(opts.KubeConfig) |
| 70 | + if err != nil { |
| 71 | + return fmt.Errorf("failed to load kubelet client: %v", err) |
| 72 | + } |
| 73 | + |
| 74 | + // Open a GRPC connection to the CRI shim |
| 75 | + cri, err := newRemoteRuntimeService(opts.RemoteRuntimeEndpoint, opts.RuntimeRequestTimeout) |
| 76 | + if err != nil { |
| 77 | + return fmt.Errorf("failed to connect to CRI server: %v", err) |
| 78 | + } |
| 79 | + |
| 80 | + cp := &checkpointer{ |
| 81 | + apiserver: apiserver, |
| 82 | + kubelet: kubelet, |
| 83 | + cri: cri, |
| 84 | + checkpointerPod: opts.CheckpointerPod, |
| 85 | + } |
| 86 | + cp.run() |
| 87 | + |
| 88 | + return nil |
| 89 | +} |
| 90 | + |
| 91 | +// run is the main checkpointing loop. |
| 92 | +func (c *checkpointer) run() { |
| 93 | + for { |
| 94 | + time.Sleep(defaultPollingFrequency) |
| 95 | + |
| 96 | + // We must use both the :10255/pods endpoint and CRI shim, because /pods |
| 97 | + // endpoint could have stale data. The /pods endpoint will only show the last cached |
| 98 | + // status which has successfully been written to an apiserver. However, if there is |
| 99 | + // no apiserver, we may get stale state (e.g. saying pod is running, when it really is |
| 100 | + // not). |
| 101 | + localParentPods := c.kubelet.localParentPods() |
| 102 | + localRunningPods := c.cri.localRunningPods() |
| 103 | + |
| 104 | + c.createCheckpointsForValidParents(localParentPods) |
| 105 | + |
| 106 | + // Try to get scheduled pods from the apiserver. |
| 107 | + // These will be used to GC checkpoints for parents no longer scheduled to this node. |
| 108 | + // A return value of nil is assumed to be "could not contact apiserver" |
| 109 | + // TODO(aaron): only check this every 30 seconds or so |
| 110 | + apiParentPods := c.getAPIParentPods(c.checkpointerPod.NodeName) |
| 111 | + |
| 112 | + // Get on disk copies of (in)active checkpoints |
| 113 | + //TODO(aaron): Could be racy to load from disk each time, but much easier than trying to keep in-memory state in sync. |
| 114 | + activeCheckpoints := getFileCheckpoints(activeCheckpointPath) |
| 115 | + inactiveCheckpoints := getFileCheckpoints(inactiveCheckpointPath) |
| 116 | + |
| 117 | + start, stop, remove := process(localRunningPods, localParentPods, apiParentPods, activeCheckpoints, inactiveCheckpoints, c.checkpointerPod) |
| 118 | + |
| 119 | + // Handle remove at last because we may still have some work to do |
| 120 | + // before removing the checkpointer itself. |
| 121 | + handleStop(stop) |
| 122 | + handleStart(start) |
| 123 | + handleRemove(remove) |
| 124 | + } |
| 125 | +} |
0 commit comments