Skip to content

Commit 7b853d0

Browse files
committed
gossip_store: don't make bogus assumption that writes are atomic wrt readers.
They're not defined to be, though we've not seen this on Linux (testing showed that it is page-level atomic, which means it can still happen across page boundaries though!). This was pointed out by whitslack in #4288 In practice, this just means not complaining when it happens, and also not trying to get tricky to use it on MacOS (we can safely seek & write, since we're single-threaded). Signed-off-by: Rusty Russell <[email protected]> Changelog-Removed: Removed bogus UNUSUAL log about gossip_store 'short test'.
1 parent cbde1f8 commit 7b853d0

File tree

2 files changed

+10
-27
lines changed

2 files changed

+10
-27
lines changed

common/gossip_store.c

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,8 @@ static bool timestamp_filter(const struct per_peer_state *pps, u32 timestamp)
5858
&& timestamp <= pps->gs->timestamp_max;
5959
}
6060

61-
static void undo_read(int fd, int len, size_t wanted)
61+
/* Not all the data we expected was there: rewind file */
62+
static void failed_read(int fd, int len)
6263
{
6364
if (len < 0) {
6465
/* Grab errno before lseek overrides it */
@@ -68,10 +69,6 @@ static void undo_read(int fd, int len, size_t wanted)
6869
(u64)lseek(fd, 0, SEEK_CUR), err);
6970
}
7071

71-
/* Shouldn't happen, but some filesystems are not as atomic as
72-
* they should be! */
73-
status_unusual("gossip_store: short read %i of %zu @%"PRIu64,
74-
len, wanted, (u64)lseek(fd, 0, SEEK_CUR) - len);
7572
lseek(fd, -len, SEEK_CUR);
7673
}
7774

@@ -93,7 +90,7 @@ u8 *gossip_store_next(const tal_t *ctx, struct per_peer_state *pps)
9390
if (r != sizeof(hdr)) {
9491
/* We expect a 0 read here at EOF */
9592
if (r != 0)
96-
undo_read(pps->gossip_store_fd, r, sizeof(hdr));
93+
failed_read(pps->gossip_store_fd, r);
9794
per_peer_state_reset_gossip_timer(pps);
9895
return NULL;
9996
}
@@ -116,7 +113,7 @@ u8 *gossip_store_next(const tal_t *ctx, struct per_peer_state *pps)
116113
msg = tal_arr(ctx, u8, msglen);
117114
r = read(pps->gossip_store_fd, msg, msglen);
118115
if (r != msglen) {
119-
undo_read(pps->gossip_store_fd, r, msglen);
116+
failed_read(pps->gossip_store_fd, r);
120117
per_peer_state_reset_gossip_timer(pps);
121118
return NULL;
122119
}

gossipd/gossip_store.c

Lines changed: 6 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ static void gossip_store_destroy(struct gossip_store *gs)
6262
}
6363

6464
#if HAVE_PWRITEV
65+
/* One fewer syscall for the win! */
6566
static ssize_t gossip_pwritev(int fd, const struct iovec *iov, int iovcnt,
6667
off_t offset)
6768
{
@@ -71,25 +72,9 @@ static ssize_t gossip_pwritev(int fd, const struct iovec *iov, int iovcnt,
7172
static ssize_t gossip_pwritev(int fd, const struct iovec *iov, int iovcnt,
7273
off_t offset)
7374
{
74-
u8 *buf;
75-
size_t len;
76-
ssize_t ret;
77-
78-
/* Make a temporary linear buffer to fall back to pwrite() */
79-
len = 0;
80-
for (size_t i = 0; i < iovcnt; i++)
81-
len += iov[i].iov_len;
82-
83-
buf = tal_arr(NULL, u8, len);
84-
len = 0;
85-
for (size_t i = 0; i < iovcnt; i++) {
86-
memcpy(buf + len, iov[i].iov_base, iov[i].iov_len);
87-
len += iov[i].iov_len;
88-
}
89-
90-
ret = pwrite(fd, buf, len, offset);
91-
tal_free(buf);
92-
return ret;
75+
if (lseek(fd, offset, SEEK_SET) != offset)
76+
return -1;
77+
return writev(fd, iov, iovcnt);
9378
}
9479
#endif /* !HAVE_PWRITEV */
9580

@@ -107,7 +92,8 @@ static bool append_msg(int fd, const u8 *msg, u32 timestamp,
10792
hdr.crc = cpu_to_be32(crc32c(timestamp, msg, msglen));
10893
hdr.timestamp = cpu_to_be32(timestamp);
10994

110-
/* Use pwritev so it will appear in store atomically */
95+
/* pwritev makes it more likely to appear at once, plus it's
96+
* exactly what we want. */
11197
iov[0].iov_base = &hdr;
11298
iov[0].iov_len = sizeof(hdr);
11399
iov[1].iov_base = (void *)msg;

0 commit comments

Comments
 (0)