Skip to content

Commit f94d542

Browse files
Marc Dionnebrauner
authored andcommitted
afs: Fix possible infinite loop with unresponsive servers
A return code of 0 from afs_wait_for_one_fs_probe is an indication that the endpoint state attached to the operation is stale and has been superseded. In that case the iteration needs to be restarted so that the newer probe result state gets used. Failure to do so can result in an tight infinite loop around the iterate_address label, where all addresses are thought to be responsive and have been tried, with nothing to refresh the endpoint state. Fixes: 495f2ae ("afs: Fix fileserver rotation") Reported-by: Markus Suvanto <[email protected]> Link: https://lists.infradead.org/pipermail/linux-afs/2024-July/008628.html cc: [email protected] Signed-off-by: Marc Dionne <[email protected]> Signed-off-by: David Howells <[email protected]> Link: https://lore.kernel.org/r/[email protected]/ Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Christian Brauner <[email protected]>
1 parent 8a46067 commit f94d542

File tree

2 files changed

+10
-5
lines changed

2 files changed

+10
-5
lines changed

fs/afs/fs_probe.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -506,10 +506,10 @@ int afs_wait_for_one_fs_probe(struct afs_server *server, struct afs_endpoint_sta
506506
finish_wait(&server->probe_wq, &wait);
507507

508508
dont_wait:
509-
if (estate->responsive_set & ~exclude)
510-
return 1;
511509
if (test_bit(AFS_ESTATE_SUPERSEDED, &estate->flags))
512510
return 0;
511+
if (estate->responsive_set & ~exclude)
512+
return 1;
513513
if (is_intr && signal_pending(current))
514514
return -ERESTARTSYS;
515515
if (timo == 0)

fs/afs/rotate.c

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -632,8 +632,10 @@ bool afs_select_fileserver(struct afs_operation *op)
632632
wait_for_more_probe_results:
633633
error = afs_wait_for_one_fs_probe(op->server, op->estate, op->addr_tried,
634634
!(op->flags & AFS_OPERATION_UNINTR));
635-
if (!error)
635+
if (error == 1)
636636
goto iterate_address;
637+
if (!error)
638+
goto restart_from_beginning;
637639

638640
/* We've now had a failure to respond on all of a server's addresses -
639641
* immediately probe them again and consider retrying the server.
@@ -644,10 +646,13 @@ bool afs_select_fileserver(struct afs_operation *op)
644646
error = afs_wait_for_one_fs_probe(op->server, op->estate, op->addr_tried,
645647
!(op->flags & AFS_OPERATION_UNINTR));
646648
switch (error) {
647-
case 0:
649+
case 1:
648650
op->flags &= ~AFS_OPERATION_RETRY_SERVER;
649-
trace_afs_rotate(op, afs_rotate_trace_retry_server, 0);
651+
trace_afs_rotate(op, afs_rotate_trace_retry_server, 1);
650652
goto retry_server;
653+
case 0:
654+
trace_afs_rotate(op, afs_rotate_trace_retry_server, 0);
655+
goto restart_from_beginning;
651656
case -ERESTARTSYS:
652657
afs_op_set_error(op, error);
653658
goto failed;

0 commit comments

Comments
 (0)