|
95 | 95 |
|
96 | 96 | services = |
97 | 97 | let |
98 | | - mainServices = lib.mapAttrs ( |
| 98 | + mainServices = lib.concatMapAttrs ( |
99 | 99 | mainServiceName: serviceConfig: |
100 | 100 | let |
101 | 101 | cfg = serviceConfig.healthcheck; |
|
104 | 104 | let |
105 | 105 | probeCfg = cfg.readiness-probe; |
106 | 106 | in |
107 | | - { |
108 | | - # We have to force it to be a notify service, in order to use systemd-notify. |
109 | | - serviceConfig.Type = lib.mkForce "notify"; |
110 | | - # If the TimeoutStartSec is not infinity, it can cause the service to fail, because the readiness probe is considered part of the startup. |
111 | | - serviceConfig.TimeoutStartSec = lib.mkForce "infinity"; |
112 | | - |
113 | | - # We add a ExecStartPost with a script that runs the readiness probe |
114 | | - serviceConfig.ExecStartPre = |
115 | | - let |
116 | | - scriptPath = lib.makeBinPath ( |
117 | | - [ |
118 | | - pkgs.systemd |
119 | | - pkgs.curl |
120 | | - pkgs.gawk |
121 | | - ] |
122 | | - ++ (cfg.runtimePackages or [ ]) |
123 | | - ++ (serviceConfig.path or [ ]) |
124 | | - ); |
125 | | - in |
126 | | - pkgs.writeShellScript "${mainServiceName}-readiness-check" '' |
127 | | - #!${pkgs.runtimeShell} |
128 | | - set -o nounset |
129 | | -
|
130 | | - export NOTIFY_SOCKET |
131 | | - monitor() { |
132 | | - export PATH="${scriptPath}:$PATH" |
133 | | -
|
134 | | - echo "Health check: starting background readiness probe for ${mainServiceName}." 1>>/tmp/banica1 2>>/tmp/banica2 |
135 | | - sleep ${toString probeCfg.initialDelay} |
136 | | - retryCount=${toString probeCfg.retryCount} |
137 | | - while true; do |
138 | | - if (timeout ${toString probeCfg.timeout}s ${probeCfg.command} &> /dev/null); then |
139 | | - echo "Health check: probe successful. Notifying systemd that the service is ready." 1>>/tmp/banica1 2>>/tmp/banica2 |
140 | | - systemd-notify --ready --status="${probeCfg.statusReadyMessage}" 1>>/tmp/banica1 2>>/tmp/banica2 |
141 | | - exit 0 |
142 | | - else |
143 | | - echo "Health check: probe not successful. Notifying systemd that the service is still waiting. Retrying in ${toString probeCfg.interval} seconds..." 1>>/tmp/banica1 2>>/tmp/banica2 |
144 | | - systemd-notify --status="${probeCfg.statusWaitingMessage}" 1>>/tmp/banica1 2>>/tmp/banica2 |
145 | | - if [[ ''${retryCount} -ne -1 ]]; then |
146 | | - retryCount=$((retryCount - 1)) |
147 | | - if [[ ''${retryCount} -le 0 ]]; then |
148 | | - echo "Health check: probe failed after maximum retries. Exiting." 1>>/tmp/banica1 2>>/tmp/banica2 |
149 | | - exit 1 |
| 107 | + { |
| 108 | + "${mainServiceName}-liveness-check" = { |
| 109 | + # We have to force it to be a notify service, in order to use systemd-notify. |
| 110 | + serviceConfig.Type = "oneshot"; |
| 111 | + # If the TimeoutStartSec is not infinity, it can cause the service to fail, because the readiness probe is considered part of the startup. |
| 112 | + serviceConfig.TimeoutStartSec = "infinity"; |
| 113 | + |
| 114 | + # We add a ExecStartPost with a script that runs the readiness probe |
| 115 | + script = |
| 116 | + let |
| 117 | + scriptPath = lib.makeBinPath ( |
| 118 | + [ |
| 119 | + pkgs.systemd |
| 120 | + pkgs.curl |
| 121 | + pkgs.gawk |
| 122 | + ] |
| 123 | + ++ (cfg.runtimePackages or [ ]) |
| 124 | + ++ (serviceConfig.path or [ ]) |
| 125 | + ); |
| 126 | + in |
| 127 | + pkgs.writeShellScript "${mainServiceName}-readiness-check" '' |
| 128 | + #!${pkgs.runtimeShell} |
| 129 | + set -o nounset |
| 130 | +
|
| 131 | + export PATH="${scriptPath}:$PATH" |
| 132 | +
|
| 133 | + echo "Health check: starting background readiness probe for ${mainServiceName}." |
| 134 | + sleep ${toString probeCfg.initialDelay} |
| 135 | + retryCount=${toString probeCfg.retryCount} |
| 136 | + while true; do |
| 137 | + if (timeout ${toString probeCfg.timeout}s ${probeCfg.command} &> /dev/null); then |
| 138 | + echo "Health check: probe successful. Notifying systemd that the service is ready." |
| 139 | + exit 0 |
| 140 | + else |
| 141 | + echo "Health check: probe not successful. Notifying systemd that the service is still waiting. Retrying in ${toString probeCfg.interval} seconds..." |
| 142 | + if [[ ''${retryCount} -ne -1 ]]; then |
| 143 | + retryCount=$((retryCount - 1)) |
| 144 | + if [[ ''${retryCount} -le 0 ]]; then |
| 145 | + echo "Health check: probe failed after maximum retries. Exiting." |
| 146 | + exit 1 |
| 147 | + fi |
150 | 148 | fi |
151 | 149 | fi |
152 | | - fi |
153 | | - sleep ${toString probeCfg.interval} |
154 | | - done |
155 | | - } |
| 150 | + sleep ${toString probeCfg.interval} |
| 151 | + done |
| 152 | + ''; |
156 | 153 |
|
157 | | - monitor & |
158 | | - ''; |
159 | | - } |
| 154 | + requires = [ "${mainServiceName}.service" ]; |
| 155 | + after = [ "${mainServiceName}.service" ]; |
| 156 | + }; |
| 157 | + } // lib.pipe config.systemd.services [ |
| 158 | + # TODO: not only `requires`, also `after` and friends |
| 159 | + (lib.filterAttrs (name: value: lib.elem "${mainServiceName}.service" value.requires)) |
| 160 | + (lib.mapAttrs (value: lib.recursiveUpdate value { |
| 161 | + requires = value.requires ++ [ |
| 162 | + "${mainServiceName}-readiness-check.service" |
| 163 | + ]; |
| 164 | + })) |
| 165 | + ]; |
160 | 166 | )) |
161 | 167 | ) servicesWithHealthcheck; |
162 | 168 | healthCheckServices = lib.mapAttrs' ( |
|
0 commit comments