Skip to content

Commit 6a13923

Browse files
committed
chore: simplify
1 parent 1809855 commit 6a13923

File tree

3 files changed

+42
-7
lines changed

3 files changed

+42
-7
lines changed

docker-compose.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,9 +64,11 @@ services:
6464
start_period: 15s
6565
retries: 3
6666

67+
# runner-net 设为 external,避免 compose down 时删除网络导致已注册的 Runner 容器
68+
#(由 Manager 动态创建、未在 compose 中定义)无法启动。首次使用请执行:docker network create runner-net
6769
networks:
6870
runner-net:
69-
driver: bridge
71+
external: true
7072

7173
volumes:
7274
dind-storage:

docs/docker.md

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,10 @@ cp config.yaml.example config.yaml
2626
chown 1001:1001 config.yaml
2727
mkdir -p runners && chown 1001:1001 runners
2828

29-
# 3. 启动(仅 Manager,适用于 job_docker_backend: host-socket 或 none)
29+
# 3. 创建网络(仅首次;compose 中 runner-net 为 external,避免 down 时删网导致已注册 Runner 容器无法启动)
30+
docker network create runner-net 2>/dev/null || true
31+
32+
# 4. 启动(仅 Manager,适用于 job_docker_backend: host-socket 或 none)
3033
docker compose up -d
3134

3235
# 若 config.yaml 中 job_docker_backend: dind,需同时启动 DinD:
@@ -192,6 +195,14 @@ docker run -d --name runner-manager \
192195

193196
## 排障与迁移
194197

198+
### docker compose down 后 Runner 容器无法启动(状态为 Created)
199+
200+
原因:`docker compose down` 会删除 compose 创建的网络 `runner-net`,而由 Manager 动态创建的 Runner 容器不在 compose 中,不会被删除,仍引用已删除的网络,导致无法启动。
201+
202+
**预防**:仓库内 `docker-compose.yml` 已将 `runner-net` 设为 **external**,`compose down` 不会删除该网络。首次使用前执行一次:`docker network create runner-net`。
203+
204+
**已出现问题时**:在 Web 界面点击该 Runner 的「启动」即可——Manager 会检测到 `docker start` 因网络失效而失败,自动删除旧容器并用当前配置重新创建并启动,无需手动 `docker rm`。若仍失败,可手动删除后再点「启动」:`docker rm -f github-runner-<名称>`。无需删除 `runners/` 下对应目录或重新注册。
205+
195206
### root / 非 root、RUNNER_ALLOW_RUNASROOT
196207

197208
- **推荐**:Manager 与 Runner 容器均以非 root(如 UID 1001)运行,避免 GitHub Runner 报「Must not run with sudo」。

internal/runner/container.go

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,22 @@ func containerNotFound(out []byte) bool {
105105
return false
106106
}
107107

108+
// containerStartUnrecoverable 判断 docker start 失败是否因网络已删除等导致无法恢复,需删容器后重建
109+
func containerStartUnrecoverable(out []byte) bool {
110+
s := string(out)
111+
lower := strings.ToLower(s)
112+
if strings.Contains(lower, "network") && (strings.Contains(lower, "not found") || strings.Contains(lower, "no such")) {
113+
return true
114+
}
115+
if strings.Contains(lower, "could not find network") || strings.Contains(lower, "could not attach to network") {
116+
return true
117+
}
118+
if strings.Contains(lower, "failed to create endpoint") || strings.Contains(lower, "failed to get network") {
119+
return true
120+
}
121+
return false
122+
}
123+
108124
// dockerPermissionDenied 判断是否为访问 Docker 权限/连接错误(宿主机 socket 需对 Manager 容器可访问)
109125
func dockerPermissionDenied(out []byte) bool {
110126
s := string(out)
@@ -180,12 +196,18 @@ func StartRunnerContainer(ctx context.Context, cfg *config.Config, runnerName, i
180196
return dockerCmdError("docker ps", out, err)
181197
}
182198
if len(strings.TrimSpace(string(out))) > 0 {
183-
out, err := dockerCmd(ctx, "start", cn)
184-
if err != nil {
185-
return dockerCmdError("docker start", out, err)
199+
startOut, startErr := dockerCmd(ctx, "start", cn)
200+
if startErr == nil {
201+
time.Sleep(2 * time.Second)
202+
return CallAgentStart(ctx, cn, cfg.Runners.AgentPort)
203+
}
204+
// start 失败且为网络已删除等不可恢复原因时,删除旧容器并走下方「创建新容器」流程(如 compose down 后网络被删)
205+
if containerStartUnrecoverable(startOut) {
206+
_, _ = dockerCmd(ctx, "rm", "-f", cn)
207+
// fall through to create new container
208+
} else {
209+
return dockerCmdError("docker start", startOut, startErr)
186210
}
187-
time.Sleep(2 * time.Second)
188-
return CallAgentStart(ctx, cn, cfg.Runners.AgentPort)
189211
}
190212
// 创建新容器
191213
// 容器模式下若 Manager 在容器内(base_path 通常为 /app/runners),未设置 volume_host_path 会导致 docker create -v 使用容器内路径,宿主机上无效

0 commit comments

Comments
 (0)