Skip to content

Commit 0c59ae1

Browse files
committed
Merge tag 'afs-fix-rotation-20240105' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs
Pull afs updates from David Howells: "The majority of the patches are aimed at fixing and improving the AFS filesystem's rotation over server IP addresses, but there are also some fixes from Oleg Nesterov for the use of read_seqbegin_or_lock(). - Fix fileserver probe handling so that the next round of probes doesn't break ongoing server/address rotation by clearing all the probe result tracking. This could occasionally cause the rotation algorithm to drop straight through, give a 'successful' result without actually emitting any RPC calls, leaving the reply buffer in an undefined state. Instead, detach the probe results into a separate struct and allocate a new one each time we start probing and update the pointer to it. Probes are also sent in order of address preference to try and improve the chance that the preferred one will complete first. - Fix server rotation so that it uses configurable address preferences across on the probes that have completed so far than ranking them by RTT as the latter doesn't necessarily give the best route. The preference list can be altered by writing into /proc/net/afs/addr_prefs. - Fix the handling of Read-Only (and Backup) volume callbacks as there is one per volume, not one per file, so if someone performs a command that, say, offlines the volume but doesn't change it, when it comes back online we don't spam the server with a status fetch for every vnode we're using. Instead, check the Creation timestamp in the VolSync record when prompted by a callback break. - Handle volume regression (ie. a RW volume being restored from a backup) by scrubbing all cache data for that volume. This is detected from the VolSync creation timestamp. - Adjust abort handling and abort -> error mapping to match better with what other AFS clients do. - Fix offline and busy volume state handling as they only apply to individual server instances and not entire volumes and the rotation algorithm should go and look at other servers if available. Also make it sleep briefly before each retry if all the volume instances are unavailable" * tag 'afs-fix-rotation-20240105' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs: (40 commits) afs: trace: Log afs_make_call(), including server address afs: Fix offline and busy message emission afs: Fix fileserver rotation afs: Overhaul invalidation handling to better support RO volumes afs: Parse the VolSync record in the reply of a number of RPC ops afs: Don't leave DONTUSE/NEWREPSITE servers out of server list afs: Fix comment in afs_do_lookup() afs: Apply server breaks to mmap'd files in the call processor afs: Move the vnode/volume validity checking code into its own file afs: Defer volume record destruction to a workqueue afs: Make it possible to find the volumes that are using a server afs: Combine the endpoint state bools into a bitmask afs: Keep a record of the current fileserver endpoint state afs: Dispatch vlserver probes in priority order afs: Dispatch fileserver probes in priority order afs: Mark address lists with configured priorities afs: Provide a way to configure address priorities afs: Remove the unimplemented afs_cmp_addr_list() afs: Add some more info to /proc/net/afs/servers rxrpc: Create a procfile to display outstanding client conn bundles ...
2 parents 032500a + abcbd3b commit 0c59ae1

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+3544
-1692
lines changed

fs/afs/Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
kafs-y := \
77
addr_list.o \
8+
addr_prefs.o \
89
callback.o \
910
cell.o \
1011
cmservice.o \
@@ -27,6 +28,7 @@ kafs-y := \
2728
server.o \
2829
server_list.o \
2930
super.o \
31+
validation.o \
3032
vlclient.o \
3133
vl_alias.o \
3234
vl_list.o \

fs/afs/addr_list.c

Lines changed: 92 additions & 132 deletions
Original file line numberDiff line numberDiff line change
@@ -13,26 +13,55 @@
1313
#include "internal.h"
1414
#include "afs_fs.h"
1515

16+
static void afs_free_addrlist(struct rcu_head *rcu)
17+
{
18+
struct afs_addr_list *alist = container_of(rcu, struct afs_addr_list, rcu);
19+
unsigned int i;
20+
21+
for (i = 0; i < alist->nr_addrs; i++)
22+
rxrpc_kernel_put_peer(alist->addrs[i].peer);
23+
trace_afs_alist(alist->debug_id, refcount_read(&alist->usage), afs_alist_trace_free);
24+
kfree(alist);
25+
}
26+
1627
/*
1728
* Release an address list.
1829
*/
19-
void afs_put_addrlist(struct afs_addr_list *alist)
30+
void afs_put_addrlist(struct afs_addr_list *alist, enum afs_alist_trace reason)
31+
{
32+
unsigned int debug_id;
33+
bool dead;
34+
int r;
35+
36+
if (!alist)
37+
return;
38+
debug_id = alist->debug_id;
39+
dead = __refcount_dec_and_test(&alist->usage, &r);
40+
trace_afs_alist(debug_id, r - 1, reason);
41+
if (dead)
42+
call_rcu(&alist->rcu, afs_free_addrlist);
43+
}
44+
45+
struct afs_addr_list *afs_get_addrlist(struct afs_addr_list *alist, enum afs_alist_trace reason)
2046
{
21-
if (alist && refcount_dec_and_test(&alist->usage))
22-
kfree_rcu(alist, rcu);
47+
int r;
48+
49+
if (alist) {
50+
__refcount_inc(&alist->usage, &r);
51+
trace_afs_alist(alist->debug_id, r + 1, reason);
52+
}
53+
return alist;
2354
}
2455

2556
/*
2657
* Allocate an address list.
2758
*/
28-
struct afs_addr_list *afs_alloc_addrlist(unsigned int nr,
29-
unsigned short service,
30-
unsigned short port)
59+
struct afs_addr_list *afs_alloc_addrlist(unsigned int nr)
3160
{
3261
struct afs_addr_list *alist;
33-
unsigned int i;
62+
static atomic_t debug_id;
3463

35-
_enter("%u,%u,%u", nr, service, port);
64+
_enter("%u", nr);
3665

3766
if (nr > AFS_MAX_ADDRESSES)
3867
nr = AFS_MAX_ADDRESSES;
@@ -43,17 +72,8 @@ struct afs_addr_list *afs_alloc_addrlist(unsigned int nr,
4372

4473
refcount_set(&alist->usage, 1);
4574
alist->max_addrs = nr;
46-
47-
for (i = 0; i < nr; i++) {
48-
struct sockaddr_rxrpc *srx = &alist->addrs[i];
49-
srx->srx_family = AF_RXRPC;
50-
srx->srx_service = service;
51-
srx->transport_type = SOCK_DGRAM;
52-
srx->transport_len = sizeof(srx->transport.sin6);
53-
srx->transport.sin6.sin6_family = AF_INET6;
54-
srx->transport.sin6.sin6_port = htons(port);
55-
}
56-
75+
alist->debug_id = atomic_inc_return(&debug_id);
76+
trace_afs_alist(alist->debug_id, 1, afs_alist_trace_alloc);
5777
return alist;
5878
}
5979

@@ -126,7 +146,7 @@ struct afs_vlserver_list *afs_parse_text_addrs(struct afs_net *net,
126146
if (!vllist->servers[0].server)
127147
goto error_vl;
128148

129-
alist = afs_alloc_addrlist(nr, service, AFS_VL_PORT);
149+
alist = afs_alloc_addrlist(nr);
130150
if (!alist)
131151
goto error;
132152

@@ -197,9 +217,11 @@ struct afs_vlserver_list *afs_parse_text_addrs(struct afs_net *net,
197217
}
198218

199219
if (family == AF_INET)
200-
afs_merge_fs_addr4(alist, x[0], xport);
220+
ret = afs_merge_fs_addr4(net, alist, x[0], xport);
201221
else
202-
afs_merge_fs_addr6(alist, x, xport);
222+
ret = afs_merge_fs_addr6(net, alist, x, xport);
223+
if (ret < 0)
224+
goto error;
203225

204226
} while (p < end);
205227

@@ -216,25 +238,12 @@ struct afs_vlserver_list *afs_parse_text_addrs(struct afs_net *net,
216238
problem, p - text, (int)len, (int)len, text);
217239
ret = -EINVAL;
218240
error:
219-
afs_put_addrlist(alist);
241+
afs_put_addrlist(alist, afs_alist_trace_put_parse_error);
220242
error_vl:
221243
afs_put_vlserverlist(net, vllist);
222244
return ERR_PTR(ret);
223245
}
224246

225-
/*
226-
* Compare old and new address lists to see if there's been any change.
227-
* - How to do this in better than O(Nlog(N)) time?
228-
* - We don't really want to sort the address list, but would rather take the
229-
* list as we got it so as not to undo record rotation by the DNS server.
230-
*/
231-
#if 0
232-
static int afs_cmp_addr_list(const struct afs_addr_list *a1,
233-
const struct afs_addr_list *a2)
234-
{
235-
}
236-
#endif
237-
238247
/*
239248
* Perform a DNS query for VL servers and build a up an address list.
240249
*/
@@ -271,25 +280,33 @@ struct afs_vlserver_list *afs_dns_query(struct afs_cell *cell, time64_t *_expiry
271280
/*
272281
* Merge an IPv4 entry into a fileserver address list.
273282
*/
274-
void afs_merge_fs_addr4(struct afs_addr_list *alist, __be32 xdr, u16 port)
283+
int afs_merge_fs_addr4(struct afs_net *net, struct afs_addr_list *alist,
284+
__be32 xdr, u16 port)
275285
{
276-
struct sockaddr_rxrpc *srx;
277-
u32 addr = ntohl(xdr);
286+
struct sockaddr_rxrpc srx;
287+
struct rxrpc_peer *peer;
278288
int i;
279289

280290
if (alist->nr_addrs >= alist->max_addrs)
281-
return;
291+
return 0;
282292

283-
for (i = 0; i < alist->nr_ipv4; i++) {
284-
struct sockaddr_in *a = &alist->addrs[i].transport.sin;
285-
u32 a_addr = ntohl(a->sin_addr.s_addr);
286-
u16 a_port = ntohs(a->sin_port);
293+
srx.srx_family = AF_RXRPC;
294+
srx.transport_type = SOCK_DGRAM;
295+
srx.transport_len = sizeof(srx.transport.sin);
296+
srx.transport.sin.sin_family = AF_INET;
297+
srx.transport.sin.sin_port = htons(port);
298+
srx.transport.sin.sin_addr.s_addr = xdr;
287299

288-
if (addr == a_addr && port == a_port)
289-
return;
290-
if (addr == a_addr && port < a_port)
291-
break;
292-
if (addr < a_addr)
300+
peer = rxrpc_kernel_lookup_peer(net->socket, &srx, GFP_KERNEL);
301+
if (!peer)
302+
return -ENOMEM;
303+
304+
for (i = 0; i < alist->nr_ipv4; i++) {
305+
if (peer == alist->addrs[i].peer) {
306+
rxrpc_kernel_put_peer(peer);
307+
return 0;
308+
}
309+
if (peer <= alist->addrs[i].peer)
293310
break;
294311
}
295312

@@ -298,107 +315,50 @@ void afs_merge_fs_addr4(struct afs_addr_list *alist, __be32 xdr, u16 port)
298315
alist->addrs + i,
299316
sizeof(alist->addrs[0]) * (alist->nr_addrs - i));
300317

301-
srx = &alist->addrs[i];
302-
srx->srx_family = AF_RXRPC;
303-
srx->transport_type = SOCK_DGRAM;
304-
srx->transport_len = sizeof(srx->transport.sin);
305-
srx->transport.sin.sin_family = AF_INET;
306-
srx->transport.sin.sin_port = htons(port);
307-
srx->transport.sin.sin_addr.s_addr = xdr;
318+
alist->addrs[i].peer = peer;
308319
alist->nr_ipv4++;
309320
alist->nr_addrs++;
321+
return 0;
310322
}
311323

312324
/*
313325
* Merge an IPv6 entry into a fileserver address list.
314326
*/
315-
void afs_merge_fs_addr6(struct afs_addr_list *alist, __be32 *xdr, u16 port)
327+
int afs_merge_fs_addr6(struct afs_net *net, struct afs_addr_list *alist,
328+
__be32 *xdr, u16 port)
316329
{
317-
struct sockaddr_rxrpc *srx;
318-
int i, diff;
330+
struct sockaddr_rxrpc srx;
331+
struct rxrpc_peer *peer;
332+
int i;
319333

320334
if (alist->nr_addrs >= alist->max_addrs)
321-
return;
335+
return 0;
322336

323-
for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) {
324-
struct sockaddr_in6 *a = &alist->addrs[i].transport.sin6;
325-
u16 a_port = ntohs(a->sin6_port);
337+
srx.srx_family = AF_RXRPC;
338+
srx.transport_type = SOCK_DGRAM;
339+
srx.transport_len = sizeof(srx.transport.sin6);
340+
srx.transport.sin6.sin6_family = AF_INET6;
341+
srx.transport.sin6.sin6_port = htons(port);
342+
memcpy(&srx.transport.sin6.sin6_addr, xdr, 16);
326343

327-
diff = memcmp(xdr, &a->sin6_addr, 16);
328-
if (diff == 0 && port == a_port)
329-
return;
330-
if (diff == 0 && port < a_port)
331-
break;
332-
if (diff < 0)
344+
peer = rxrpc_kernel_lookup_peer(net->socket, &srx, GFP_KERNEL);
345+
if (!peer)
346+
return -ENOMEM;
347+
348+
for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) {
349+
if (peer == alist->addrs[i].peer) {
350+
rxrpc_kernel_put_peer(peer);
351+
return 0;
352+
}
353+
if (peer <= alist->addrs[i].peer)
333354
break;
334355
}
335356

336357
if (i < alist->nr_addrs)
337358
memmove(alist->addrs + i + 1,
338359
alist->addrs + i,
339360
sizeof(alist->addrs[0]) * (alist->nr_addrs - i));
340-
341-
srx = &alist->addrs[i];
342-
srx->srx_family = AF_RXRPC;
343-
srx->transport_type = SOCK_DGRAM;
344-
srx->transport_len = sizeof(srx->transport.sin6);
345-
srx->transport.sin6.sin6_family = AF_INET6;
346-
srx->transport.sin6.sin6_port = htons(port);
347-
memcpy(&srx->transport.sin6.sin6_addr, xdr, 16);
361+
alist->addrs[i].peer = peer;
348362
alist->nr_addrs++;
349-
}
350-
351-
/*
352-
* Get an address to try.
353-
*/
354-
bool afs_iterate_addresses(struct afs_addr_cursor *ac)
355-
{
356-
unsigned long set, failed;
357-
int index;
358-
359-
if (!ac->alist)
360-
return false;
361-
362-
set = ac->alist->responded;
363-
failed = ac->alist->failed;
364-
_enter("%lx-%lx-%lx,%d", set, failed, ac->tried, ac->index);
365-
366-
ac->nr_iterations++;
367-
368-
set &= ~(failed | ac->tried);
369-
370-
if (!set)
371-
return false;
372-
373-
index = READ_ONCE(ac->alist->preferred);
374-
if (test_bit(index, &set))
375-
goto selected;
376-
377-
index = __ffs(set);
378-
379-
selected:
380-
ac->index = index;
381-
set_bit(index, &ac->tried);
382-
ac->responded = false;
383-
return true;
384-
}
385-
386-
/*
387-
* Release an address list cursor.
388-
*/
389-
int afs_end_cursor(struct afs_addr_cursor *ac)
390-
{
391-
struct afs_addr_list *alist;
392-
393-
alist = ac->alist;
394-
if (alist) {
395-
if (ac->responded &&
396-
ac->index != alist->preferred &&
397-
test_bit(ac->alist->preferred, &ac->tried))
398-
WRITE_ONCE(alist->preferred, ac->index);
399-
afs_put_addrlist(alist);
400-
ac->alist = NULL;
401-
}
402-
403-
return ac->error;
363+
return 0;
404364
}

0 commit comments

Comments
 (0)