diff --git a/packages/sandbox/daemon/setup/clone.ts b/packages/sandbox/daemon/setup/clone.ts index c160bf56ad..18fca8bc79 100644 --- a/packages/sandbox/daemon/setup/clone.ts +++ b/packages/sandbox/daemon/setup/clone.ts @@ -47,6 +47,13 @@ const TRANSIENT_ERRORS = [ "unexpected disconnect", "Connection reset by peer", "Connection timed out", + // libcurl CURLE_OPERATION_TIMEDOUT triggered by http.lowSpeedLimit/Time — + // fires when the egress NAT silently drops in-flight packets (e.g. fck-nat + // ASG instance refresh) and the stream stalls below the threshold. + "Operation too slow", + "transfer closed with", + "RPC failed", + "the remote end hung up", ]; const CLONE_MAX_RETRIES = 3; const CLONE_RETRY_DELAY_MS = 3000; diff --git a/packages/sandbox/image/Dockerfile b/packages/sandbox/image/Dockerfile index ae4f9bdebf..eda3712795 100644 --- a/packages/sandbox/image/Dockerfile +++ b/packages/sandbox/image/Dockerfile @@ -44,6 +44,13 @@ RUN pip3 install --break-system-packages --no-cache-dir \ ENV LANG=en_US.UTF-8 \ LC_ALL=en_US.UTF-8 +# Convert silent stalls (NAT instance replacement, PMTUD blackholes, mid-stream +# packet drops) into fast errors that the daemon's clone retry loop can catch. +# Without this, libcurl waits on TCP keepalive (~2h default) and the clone +# hangs at "Receiving objects" indefinitely. +RUN git config --system http.lowSpeedLimit 1000 \ + && git config --system http.lowSpeedTime 30 + # Non-root sandbox user. The bun image comes with a 'bun' user (UID 1000), # but we drop privileges further by replacing it with a 'sandbox' user. RUN userdel --remove bun \ diff --git a/packages/sandbox/package.json b/packages/sandbox/package.json index 119dfb0ead..39270e8805 100644 --- a/packages/sandbox/package.json +++ b/packages/sandbox/package.json @@ -1,6 +1,6 @@ { "name": "@decocms/sandbox", - "version": "0.4.5", + "version": "0.4.6", "type": "module", "description": "Sandbox runner for isolated per-user containerised tool execution", "scripts": {