Skip to content

Commit 8980842

Browse files
nybidarigvisor-bot
authored andcommitted
Start TCP dispatchers after endpoints are completely loaded during restore.
- During restore if endpoints' states are not loaded before starting dispatchers, then the incoming packets will be queued and the dispatcher will try to send the packets, which will result in a panic as the endpoint might still be in an invalid state (eg: initial). - To avoid this, wait for all the endpoints to be loaded completely before starting the dispatchers by calling AsycnLoading.Wait() in stack.Restore() and remove this from kernel.LoadFrom() - Added a test to check save restore of multiple(100) listening sockets. PiperOrigin-RevId: 762010964
1 parent ab9666d commit 8980842

File tree

5 files changed

+209
-42
lines changed

5 files changed

+209
-42
lines changed

images/basic/integrationtest/Dockerfile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ RUN gcc -O2 -o test_sticky test_sticky.c
1515
RUN gcc -O2 -o host_fd host_fd.c
1616
RUN gcc -O2 -o host_connect host_connect.c
1717
RUN gcc -O2 -o tcp_server tcp_server.c
18+
RUN gcc -O2 -o tcp_stress_server tcp_stress_server.c -pthread
1819

1920
# Add nonprivileged regular user named "nonroot".
2021
RUN groupadd --gid 1337 nonroot && \
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
// Copyright 2025 The gVisor Authors.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
// This test validates the checkpoint/restore functionality for approximately
16+
// 100 listening sockets operating in network=sandbox mode. It will create the
17+
// sockets, accept all incoming connections, and then a checkpoint will be
18+
// initiated from the other end. After restore, the test verifies that these
19+
// listening sockets are correctly re-established and new client connections can
20+
// be made.
21+
22+
#include <errno.h>
23+
#include <netinet/in.h>
24+
#include <pthread.h>
25+
#include <stdio.h>
26+
#include <stdlib.h>
27+
#include <string.h>
28+
#include <sys/socket.h>
29+
#include <unistd.h>
30+
31+
#define NUM_SOCKETS 100
32+
#define START_PORT 9000
33+
#define BACKLOG 10
34+
35+
// Structure to pass arguments to the listener thread
36+
typedef struct {
37+
int port;
38+
int listen_fd;
39+
} listener_arg_t;
40+
41+
void *listener_thread(void *arg) {
42+
listener_arg_t *listener_arg = (listener_arg_t *)arg;
43+
int port = listener_arg->port;
44+
int listen_fd = listener_arg->listen_fd;
45+
46+
while (1) {
47+
printf("Listener on port %d started. Waiting for connection...\n", port);
48+
int client_fd = accept(listen_fd, NULL, NULL);
49+
if (errno == EINTR) {
50+
continue;
51+
}
52+
if (client_fd < 0) {
53+
perror("accept");
54+
exit(EXIT_FAILURE);
55+
}
56+
printf("Accepted connection on port %d\n", port);
57+
close(client_fd);
58+
}
59+
}
60+
61+
int main() {
62+
struct sockaddr_in server_addr;
63+
int listen_fds[NUM_SOCKETS];
64+
pthread_t listener_threads[NUM_SOCKETS];
65+
int active_listeners = 0;
66+
int i;
67+
68+
// Initialize listen_fds with -1 to indicate no socket yet
69+
for (i = 0; i < NUM_SOCKETS; ++i) {
70+
listen_fds[i] = -1;
71+
}
72+
73+
printf("Attempting to create %d listening sockets starting from port %d...\n",
74+
NUM_SOCKETS, START_PORT);
75+
76+
for (i = 0; i < NUM_SOCKETS; ++i) {
77+
int port = START_PORT + i;
78+
int listen_fd = socket(AF_INET, SOCK_STREAM, 0);
79+
if (listen_fd < 0) {
80+
perror("socket failed");
81+
exit(EXIT_FAILURE);
82+
}
83+
84+
memset(&server_addr, 0, sizeof(server_addr));
85+
server_addr.sin_family = AF_INET;
86+
server_addr.sin_addr.s_addr = INADDR_ANY;
87+
server_addr.sin_port = htons(port);
88+
89+
if (bind(listen_fd, (struct sockaddr *)&server_addr, sizeof(server_addr)) <
90+
0) {
91+
perror("bind failed");
92+
exit(EXIT_FAILURE);
93+
}
94+
if (listen(listen_fd, BACKLOG) < 0) {
95+
perror("listen failed");
96+
exit(EXIT_FAILURE);
97+
}
98+
printf("Successfully listening on port %d (FD: %d)\n", port, listen_fd);
99+
100+
listen_fds[active_listeners] = listen_fd;
101+
active_listeners++;
102+
103+
// Allocate memory for the argument to pass to the thread
104+
listener_arg_t *arg = (listener_arg_t *)malloc(sizeof(listener_arg_t));
105+
if (arg == NULL) {
106+
close(listen_fd);
107+
active_listeners--;
108+
exit(EXIT_FAILURE);
109+
}
110+
arg->port = port;
111+
arg->listen_fd = listen_fd;
112+
113+
if (pthread_create(&listener_threads[i], NULL, listener_thread,
114+
(void *)arg) != 0) {
115+
perror("pthread_create failed");
116+
close(listen_fd);
117+
free(arg);
118+
active_listeners--;
119+
exit(EXIT_FAILURE);
120+
}
121+
}
122+
123+
// Wait for all threads to finish
124+
for (int i = 0; i < NUM_SOCKETS; i++) {
125+
if (pthread_join(listener_threads[i], NULL) != 0) {
126+
perror("failed to join thread");
127+
exit(EXIT_FAILURE);
128+
}
129+
}
130+
printf("Program finished.\n");
131+
return 0;
132+
}

pkg/sentry/kernel/kernel.go

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,6 @@ import (
7979
"gvisor.dev/gvisor/pkg/sentry/vfs"
8080
"gvisor.dev/gvisor/pkg/state"
8181
"gvisor.dev/gvisor/pkg/sync"
82-
"gvisor.dev/gvisor/pkg/tcpip"
8382
)
8483

8584
// IOUringEnabled is set to true when IO_URING is enabled. Added as a global to
@@ -806,8 +805,6 @@ func (k *Kernel) LoadFrom(ctx context.Context, r io.Reader, loadMFs bool, timeRe
806805
return vfs.PrependErrMsg("vfs.CompleteRestore() failed", err)
807806
}
808807

809-
tcpip.AsyncLoading.Wait()
810-
811808
log.Infof("Overall load took [%s] after async work", time.Since(loadStart))
812809

813810
// Applications may size per-cpu structures based on k.applicationCores, so

pkg/tcpip/stack/stack.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2009,6 +2009,11 @@ func (s *Stack) Restore() {
20092009
for _, e := range eps {
20102010
e.Restore(s)
20112011
}
2012+
2013+
// Make sure all the endpoints are loaded correctly before resuming the
2014+
// protocol level background workers.
2015+
tcpip.AsyncLoading.Wait()
2016+
20122017
// Now resume any protocol level background workers.
20132018
for _, p := range s.transportProtocols {
20142019
if saveRestoreEnabled {

test/e2e/integration_test.go

Lines changed: 71 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1223,37 +1223,64 @@ func TestCheckpointResume(t *testing.T) {
12231223
}
12241224
}
12251225

1226-
func testCheckpointRestoreListeningConnection(ctx context.Context, t *testing.T, d *dockerutil.Container) {
1226+
func connectWithTCPServer(t *testing.T, serverIP string, startPort int, numConn int) {
1227+
const timeout = 1 * time.Minute
1228+
var conn net.Conn
1229+
var err error
1230+
1231+
for i := range numConn {
1232+
ip := serverIP + ":" + strconv.Itoa(startPort+i)
1233+
for {
1234+
conn, err = net.DialTimeout("tcp", ip, timeout)
1235+
// Retry to connect to the server if there are any errors.
1236+
// Connect can fail sometimes when the server is not ready
1237+
// and is not in listening state when the request was sent.
1238+
if err != nil {
1239+
t.Logf("Error connecting to server %s: %v, retrying...", ip, err)
1240+
continue
1241+
}
1242+
break
1243+
}
1244+
conn.Close()
1245+
}
1246+
}
1247+
1248+
func connectAndReadWrite(t *testing.T, serverIP string, port int) {
1249+
newserverIP := serverIP + ":" + strconv.Itoa(port)
1250+
newConn, err := net.DialTimeout("tcp", newserverIP, 1*time.Minute)
1251+
if err != nil {
1252+
t.Fatalf("Error connecting to server: %v", err)
1253+
}
1254+
defer newConn.Close()
1255+
1256+
readBuf := make([]byte, 32)
1257+
if _, err := newConn.Read(readBuf); err != nil {
1258+
t.Fatalf("Read failed: %v", err)
1259+
}
1260+
1261+
if _, err := newConn.Write([]byte("Hello!")); err != nil {
1262+
t.Fatalf("Write failed: %v", err)
1263+
}
1264+
}
1265+
1266+
func testCheckpointRestoreListeningConnection(ctx context.Context, t *testing.T, d *dockerutil.Container, fName string, numConn int) {
12271267
defer d.CleanUp(ctx)
12281268

1229-
const port = 9000
12301269
opts := dockerutil.RunOpts{
12311270
Image: "basic/integrationtest",
12321271
}
1233-
12341272
// Start the tcp server.
1235-
if err := d.Spawn(ctx, opts, "./tcp_server"); err != nil {
1273+
if err := d.Spawn(ctx, opts, fName); err != nil {
12361274
t.Fatalf("docker run failed: %v", err)
12371275
}
12381276

1239-
var (
1240-
ip net.IP
1241-
err error
1242-
conn net.Conn
1243-
)
1244-
ip, err = d.FindIP(ctx, false)
1277+
ip, err := d.FindIP(ctx, false)
12451278
if err != nil {
12461279
t.Fatalf("docker.FindIP failed: %v", err)
12471280
}
1248-
serverIP := ip.String() + ":" + strconv.Itoa(port)
1249-
const timeout = 1 * time.Minute
1250-
for {
1251-
conn, err = net.DialTimeout("tcp", serverIP, timeout)
1252-
if err == nil {
1253-
break
1254-
}
1255-
}
1256-
conn.Close()
1281+
1282+
const port = 9000
1283+
connectWithTCPServer(t, ip.String(), port, numConn)
12571284

12581285
// Create a snapshot.
12591286
const checkpointFile = "networktest"
@@ -1265,30 +1292,20 @@ func testCheckpointRestoreListeningConnection(ctx context.Context, t *testing.T,
12651292
}
12661293
d.RestoreInTest(ctx, t, checkpointFile)
12671294

1268-
var (
1269-
newIP net.IP
1270-
newConn net.Conn
1271-
)
1295+
var newIP net.IP
12721296
newIP, err = d.FindIP(ctx, false)
12731297
if err != nil {
12741298
t.Fatalf("docker.FindIP failed: %v", err)
12751299
}
1276-
newserverIP := newIP.String() + ":" + strconv.Itoa(port)
1277-
newConn, err = net.DialTimeout("tcp", newserverIP, timeout)
1278-
if err != nil {
1279-
t.Fatalf("Error connecting to server: %v", err)
1280-
}
1281-
defer newConn.Close()
1282-
1283-
readBuf := make([]byte, 32)
1284-
if _, err := newConn.Read(readBuf); err != nil {
1285-
t.Fatalf("Read failed: %v", err)
1286-
}
1287-
1288-
if _, err := newConn.Write([]byte("Hello!")); err != nil {
1289-
t.Fatalf("Write failed: %v", err)
1300+
if numConn > 1 {
1301+
connectWithTCPServer(t, newIP.String(), port, numConn)
1302+
if err := d.Kill(ctx); err != nil {
1303+
t.Fatalf("Wait failed: %v", err)
1304+
}
1305+
return
12901306
}
12911307

1308+
connectAndReadWrite(t, newIP.String(), port)
12921309
if err := d.Wait(ctx); err != nil {
12931310
t.Fatalf("Wait failed: %v", err)
12941311
}
@@ -1303,7 +1320,7 @@ func TestRestoreListenConn(t *testing.T) {
13031320

13041321
ctx := context.Background()
13051322
d := dockerutil.MakeContainer(ctx, t)
1306-
testCheckpointRestoreListeningConnection(ctx, t, d)
1323+
testCheckpointRestoreListeningConnection(ctx, t, d, "./tcp_server" /* fName */, 1 /* numConn */)
13071324
}
13081325

13091326
// Test to check restore of a TCP listening connection with netstack S/R.
@@ -1318,5 +1335,20 @@ func TestRestoreListenConnWithNetstackSR(t *testing.T) {
13181335

13191336
ctx := context.Background()
13201337
d := dockerutil.MakeContainerWithRuntime(ctx, t, "-TESTONLY-save-restore-netstack")
1321-
testCheckpointRestoreListeningConnection(ctx, t, d)
1338+
testCheckpointRestoreListeningConnection(ctx, t, d, "./tcp_server" /* fName */, 1 /* numConn */)
1339+
}
1340+
1341+
// Test to check restore of multiple TCP listening connections with netstack S/R.
1342+
func TestRestoreMultipleListenConnWithNetstackSR(t *testing.T) {
1343+
if !testutil.IsCheckpointSupported() {
1344+
t.Skip("Checkpoint is not supported.")
1345+
}
1346+
if !testutil.IsRunningWithSaveRestoreNetstack() {
1347+
t.Skip("Netstack save restore is not supported.")
1348+
}
1349+
dockerutil.EnsureDockerExperimentalEnabled()
1350+
1351+
ctx := context.Background()
1352+
d := dockerutil.MakeContainerWithRuntime(ctx, t, "-TESTONLY-save-restore-netstack")
1353+
testCheckpointRestoreListeningConnection(ctx, t, d, "./tcp_stress_server" /* fName */, 100 /* numConn */)
13221354
}

0 commit comments

Comments
 (0)