Skip to content

Commit df927ac

Browse files
authored
Merge pull request #350 from Dstack-TEE/wg-reconnect
cvm: Auto reconnect when wg get stucked
2 parents df0d608 + 16a2148 commit df927ac

File tree

5 files changed

+258
-168
lines changed

5 files changed

+258
-168
lines changed

basefiles/wg-checker.service

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[Unit]
22
Description=WireGuard Endpoint Checker Service
3-
After=network-online.target tboot.service
3+
After=network-online.target dstack-prepare.service
44
Wants=network-online.target
55

66
[Service]

basefiles/wg-checker.sh

100644100755
Lines changed: 58 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,27 +4,73 @@
44
#
55
# SPDX-License-Identifier: Apache-2.0
66

7-
get_conf_endpoint() {
8-
grep "Endpoint" /etc/wireguard/wg0.conf | awk "{print \$3}"
7+
HANDSHAKE_TIMEOUT=180
8+
LAST_REFRESH=0
9+
STALE_SINCE=0
10+
DSTACK_WORK_DIR=${DSTACK_WORK_DIR:-/dstack}
11+
IFNAME=dstack-wg0
12+
13+
get_latest_handshake() {
14+
wg show $IFNAME latest-handshakes 2>/dev/null | awk 'BEGIN { max = 0 } NF >= 2 { if ($2 > max) max = $2 } END { print max }'
915
}
1016

11-
get_current_endpoint() {
12-
wg show wg0 endpoints | awk "{print \$2}"
17+
maybe_refresh() {
18+
now=$1
19+
20+
if [ "$LAST_REFRESH" -ne 0 ] && [ $((now - LAST_REFRESH)) -lt $HANDSHAKE_TIMEOUT ]; then
21+
return
22+
fi
23+
24+
if ! command -v dstack-util >/dev/null 2>&1; then
25+
printf 'dstack-util not found; cannot refresh gateway.\n' >&2
26+
LAST_REFRESH=$now
27+
return
28+
fi
29+
30+
printf 'WireGuard handshake stale; refreshing dstack gateway...\n'
31+
if dstack-util gateway-refresh --work-dir "$DSTACK_WORK_DIR"; then
32+
printf 'dstack gateway refresh succeeded.\n'
33+
else
34+
printf 'dstack gateway refresh failed.\n' >&2
35+
fi
36+
37+
LAST_REFRESH=$now
38+
STALE_SINCE=$now
1339
}
1440

15-
check_endpoint() {
16-
CONF_ENDPOINT=$(get_conf_endpoint)
17-
CURRENT_ENDPOINT=$(get_current_endpoint)
41+
check_handshake() {
42+
if ! command -v wg >/dev/null 2>&1; then
43+
return
44+
fi
45+
46+
now=$(date +%s)
47+
latest=$(get_latest_handshake)
48+
49+
if [ -z "$latest" ]; then
50+
latest=0
51+
fi
1852

19-
if [ "$CURRENT_ENDPOINT" != "$CONF_ENDPOINT" ]; then
20-
echo "Wg endpoint changed from $CONF_ENDPOINT to $CURRENT_ENDPOINT."
21-
wg syncconf wg0 <(wg-quick strip wg0)
53+
if [ "$latest" -gt 0 ]; then
54+
if [ $((now - latest)) -ge $HANDSHAKE_TIMEOUT ]; then
55+
maybe_refresh "$now"
56+
else
57+
STALE_SINCE=0
58+
fi
59+
else
60+
if [ "$STALE_SINCE" -eq 0 ]; then
61+
STALE_SINCE=$now
62+
fi
63+
if [ $((now - STALE_SINCE)) -ge $HANDSHAKE_TIMEOUT ]; then
64+
maybe_refresh "$now"
65+
fi
2266
fi
2367
}
2468

2569
while true; do
26-
if [ -f /etc/wireguard/wg0.conf ]; then
27-
check_endpoint
70+
if [ -f /etc/wireguard/$IFNAME.conf ]; then
71+
check_handshake
72+
else
73+
STALE_SINCE=0
2874
fi
2975
sleep 10
3076
done

dstack-util/src/main.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ use std::{
2424
io::{self, Read, Write},
2525
path::PathBuf,
2626
};
27-
use system_setup::{cmd_sys_setup, SetupArgs};
27+
use system_setup::{cmd_gateway_refresh, cmd_sys_setup, GatewayRefreshArgs, SetupArgs};
2828
use tdx_attest as att;
2929
use utils::AppKeys;
3030

@@ -64,6 +64,8 @@ enum Commands {
6464
Rand(RandArgs),
6565
/// Prepare dstack system.
6666
Setup(SetupArgs),
67+
/// Refresh the dstack gateway configuration
68+
GatewayRefresh(GatewayRefreshArgs),
6769
/// Notify the host about the dstack app
6870
NotifyHost(HostNotifyArgs),
6971
/// Remove orphaned containers
@@ -533,6 +535,9 @@ async fn main() -> Result<()> {
533535
Commands::Setup(args) => {
534536
cmd_sys_setup(args).await?;
535537
}
538+
Commands::GatewayRefresh(args) => {
539+
cmd_gateway_refresh(args).await?;
540+
}
536541
Commands::NotifyHost(args) => {
537542
cmd_notify_host(args).await?;
538543
}

0 commit comments

Comments
 (0)