Skip to content

Commit cede0b1

Browse files
committed
net_imap: Add caching for local folder stats to speed up LIST-STATUS.
Caching is already used to cache remote mailbox sizes for proxied folders, since most IMAP servers do not support LIST-STATUS and therefore calculating the sizes of all folders can be very slow. However, caching has never previously been used for this sort of thing for local mailbox folders. Add a caching layer for the 'cur' directory in maildirs, specifically, since this is where most messages end up over time (the 'new' dir wouldn't benefit from caching). In particular, this allows us to return information for STATUS (which may be part of LIST-STATUS) without having to traverse all the files in the cur dir, if there have no changes to it since the last time the stats were cached. For large archive folders that are rarely or never modified, this means we can eliminate many expensive and time-consuming directory traversals. When calculating stats in the cur dir, we now cache all the stats, along with the current MODSEQ and UIDNEXT values. If either has changed the next time we need them, we ignore the cached values since they are out of date. Performance benchmark for a LIST-STATUS operation in a multi-folder mailbox totaling 248,668 messages (6014 MB total): Without caching: 1128 ms With caching: 245 ms In this test, caching sped up the time for LIST-STATUS by ~80%. This allows webmail applications (e.g. mod_webmail) to load and become usable much sooner. Add tests that do not test that we are caching, per se, but do test that the correct information is returned in scenarios where cached stats would get returned.
1 parent 20b3ee0 commit cede0b1

File tree

2 files changed

+261
-1
lines changed

2 files changed

+261
-1
lines changed

nets/net_imap.c

Lines changed: 132 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,7 @@
136136
#include "include/node.h"
137137
#include "include/auth.h"
138138
#include "include/user.h"
139+
#include "include/kvs.h"
139140
#include "include/test.h"
140141
#include "include/notify.h"
141142
#include "include/oauth.h"
@@ -182,6 +183,10 @@ unsigned int maxuserproxies = 10;
182183

183184
#define MAX_USER_PROXIES 32
184185

186+
/* Cache and use cached folder stats in mailboxes when unchanged.
187+
* This greatly improves performance for SELECT/STATUS/LIST-STATUS for large mailboxes, although it adds additional overhead for small folders with few messages. */
188+
#define CACHE_FOLDER_STATS
189+
185190
/* All IMAP traversals must be ordered, so we can't use these functions or we'll get a ~random (at least incorrect) order */
186191
/* Sequence numbers must strictly be in order, if they aren't, all sorts of weird stuff will happened.
187192
* I know this because I tried using these functions first, and it didn't really work.
@@ -1119,9 +1124,135 @@ static int imap_expunge(struct imap_session *imap, int silent)
11191124
return 0;
11201125
}
11211126

1127+
#ifdef CACHE_FOLDER_STATS
1128+
static int imap_kvs_get_num(const char *key, size_t keylen, unsigned int *restrict intresult, unsigned long *restrict longresult)
1129+
{
1130+
char buf[32];
1131+
size_t outlen;
1132+
int res = bbs_kvs_get(key, keylen, buf, sizeof(buf) - 1, &outlen);
1133+
if (res < 0) {
1134+
return -1;
1135+
}
1136+
buf[outlen] = '\0';
1137+
if (intresult) {
1138+
*intresult = (unsigned int) atoi(buf);
1139+
} else {
1140+
*longresult = (unsigned long) atol(buf);
1141+
}
1142+
return 0;
1143+
}
1144+
1145+
#define GET_CACHED_INT(keyname, var) \
1146+
keylen = (size_t) snprintf(keybuf, sizeof(keybuf), "%s/%s", path, keyname); \
1147+
if (imap_kvs_get_num(keybuf, keylen, &var, NULL)) { \
1148+
return -1; \
1149+
}
1150+
1151+
#define GET_CACHED_LONG(keyname, var) \
1152+
keylen = (size_t) snprintf(keybuf, sizeof(keybuf), "%s/%s", path, keyname); \
1153+
if (imap_kvs_get_num(keybuf, keylen, NULL, &var)) { \
1154+
return -1; \
1155+
}
1156+
1157+
static int imap_kvs_put_num(const char *key, size_t keylen, unsigned long num)
1158+
{
1159+
char numbuf[32];
1160+
size_t numlen;
1161+
numlen = (size_t) snprintf(numbuf, sizeof(numbuf), "%lu", num);
1162+
return bbs_kvs_put(key, keylen, numbuf, numlen);
1163+
}
1164+
1165+
#define SET_CACHED_NUMBER(keyname, var) \
1166+
keylen = (size_t) snprintf(keybuf, sizeof(keybuf), "%s/%s", path, keyname); \
1167+
if (imap_kvs_put_num(keybuf, keylen, (unsigned long) var)) { \
1168+
return -1; \
1169+
}
1170+
1171+
static int fetch_cached_mailbox_stats(const char *path, struct imap_traversal *traversal, unsigned int *restrict actual_uidnext)
1172+
{
1173+
char keybuf[4096];
1174+
size_t keylen;
1175+
unsigned long modseq, actual_modseq;
1176+
unsigned int uidvalidity, uidnext;
1177+
1178+
/* XXX This call to mailbox_get_next_uid means that we'll end up making 2 calls to this function in a traversal */
1179+
mailbox_get_next_uid(traversal->mbox, traversal->imap->node, traversal->dir, 0, &uidvalidity, actual_uidnext);
1180+
1181+
actual_modseq = maildir_max_modseq(NULL, path); /* XXX __maildir_modseq doesn't currently use the mbox argument, but if it does in the future, we need to fix this */
1182+
if (actual_modseq <= 0) {
1183+
return 1;
1184+
}
1185+
1186+
/* Get cached MODSEQ/UIDNEXT first, so we know if all these values are up to date */
1187+
GET_CACHED_LONG("modseq", modseq);
1188+
if (modseq != actual_modseq) {
1189+
bbs_debug(5, "Actual MODSEQ %lu != cached MODSEQ %lu\n", actual_modseq, modseq);
1190+
return 1;
1191+
}
1192+
GET_CACHED_INT("uidnext", uidnext); /* Get cached MODSEQ first, so we know if all these values are up to date */
1193+
if (uidnext != *actual_uidnext) {
1194+
bbs_debug(5, "Actual UIDNEXT %u != cached UIDNEXT %u\n", *actual_uidnext, uidnext);
1195+
return 1;
1196+
}
1197+
1198+
GET_CACHED_INT("totalcur", traversal->totalcur);
1199+
GET_CACHED_LONG("totalsize", traversal->totalsize);
1200+
GET_CACHED_INT("totalunseen", traversal->totalunseen);
1201+
GET_CACHED_INT("firstunseen", traversal->firstunseen);
1202+
return 0;
1203+
}
1204+
1205+
static int cache_mailbox_stats(const char *path, struct imap_traversal *traversal, unsigned int uidnext)
1206+
{
1207+
char keybuf[4096];
1208+
size_t keylen;
1209+
unsigned long modseq;
1210+
1211+
modseq = maildir_max_modseq(NULL, path); /* XXX __maildir_modseq doesn't currently use the mbox argument, but if it does in the future, we need to fix this */
1212+
if (modseq <= 0) {
1213+
return 1;
1214+
}
1215+
1216+
/* We only cache info for the cur dir, not the new dir.
1217+
* Over time, most messages will end up in cur, only RECENT messages are ever in new. */
1218+
SET_CACHED_NUMBER("totalcur", traversal->totalcur);
1219+
SET_CACHED_NUMBER("totalsize", traversal->totalsize);
1220+
SET_CACHED_NUMBER("totalunseen", traversal->totalunseen);
1221+
SET_CACHED_NUMBER("firstunseen", traversal->firstunseen);
1222+
SET_CACHED_NUMBER("modseq", modseq); /* Update cached MODSEQ last, so that if cached values are read in the meantime by another reader, we know they are outdated. */
1223+
SET_CACHED_NUMBER("uidnext", uidnext);
1224+
return 0;
1225+
}
1226+
#endif
1227+
11221228
static int imap_traverse_cur(const char *path, int (*on_file)(const char *dir_name, const char *filename, int seqno, void *obj), struct imap_traversal *traversal)
11231229
{
1124-
return maildir_ordered_traverse(path, on_file, traversal);
1230+
int res;
1231+
#ifdef CACHE_FOLDER_STATS
1232+
unsigned int uidnext;
1233+
1234+
/* imap_traverse_cur is only called by the IMAP_TRAVERSAL macros, which are only called with on_select as the on_file callback,
1235+
* i.e. this callback is only used for SELECT and STATUS (including LIST-STATUS).
1236+
*
1237+
* The optimization here is that iterating over all existing maildir files can be quite expensive,
1238+
* so if we can cache results and reuse them, we do so. */
1239+
if (!fetch_cached_mailbox_stats(path, traversal, &uidnext)) {
1240+
bbs_debug(6, "Using cached traversal stats for %s\n", path);
1241+
return 0;
1242+
}
1243+
1244+
/* Reset any fields we may have changed */
1245+
traversal->totalcur = traversal->totalunseen = traversal->firstunseen = 0;
1246+
traversal->totalsize = 0;
1247+
#endif
1248+
1249+
res = maildir_ordered_traverse(path, on_file, traversal);
1250+
#ifdef CACHE_FOLDER_STATS
1251+
if (!res) {
1252+
cache_mailbox_stats(path, traversal, uidnext);
1253+
}
1254+
#endif
1255+
return res;
11251256
}
11261257

11271258
static int imap_traverse_new(const char *path, int (*on_file)(const char *dir_name, const char *filename, int seqno, void *obj), struct imap_traversal *traversal)
Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
/*
2+
* LBBS -- The Lightweight Bulletin Board System
3+
*
4+
* Copyright (C) 2026, Naveen Albert
5+
*
6+
* Naveen Albert <bbs@phreaknet.org>
7+
*
8+
* This program is free software, distributed under the terms of
9+
* the GNU General Public License Version 2. See the LICENSE file
10+
* at the top of the source tree.
11+
*/
12+
13+
/*! \file
14+
*
15+
* \brief IMAP Traversal Caching Tests
16+
*
17+
* \author Naveen Albert <bbs@phreaknet.org>
18+
*/
19+
20+
#include "test.h"
21+
#include "email.h"
22+
23+
static int pre(void)
24+
{
25+
test_preload_module("mod_mail.so");
26+
test_preload_module("mod_mimeparse.so");
27+
test_preload_module("net_smtp.so");
28+
test_preload_module("mod_lmdb.so");
29+
test_load_module("mod_smtp_delivery_local.so");
30+
test_load_module("net_imap.so");
31+
32+
TEST_ADD_CONFIG("mod_mail.conf");
33+
TEST_ADD_CONFIG("net_smtp.conf");
34+
TEST_ADD_CONFIG("net_imap.conf");
35+
36+
TEST_RESET_MKDIR(TEST_MAIL_DIR);
37+
return 0;
38+
}
39+
40+
static int run(void)
41+
{
42+
int clientfd = -1;
43+
int res = -1;
44+
45+
clientfd = test_make_socket(143);
46+
REQUIRE_FD(clientfd);
47+
48+
/* Connect and log in */
49+
CLIENT_EXPECT(clientfd, "OK");
50+
SWRITE(clientfd, "a1 LOGIN \"" TEST_USER "\" \"" TEST_PASS "\"" ENDL);
51+
CLIENT_EXPECT(clientfd, "a1 OK");
52+
53+
SWRITE(clientfd, "a2 SELECT \"INBOX\"" ENDL);
54+
CLIENT_EXPECT_EVENTUALLY(clientfd, "a2 OK");
55+
56+
if (test_make_messages(TEST_EMAIL, 5)) { /* Now have 5 messages in INBOX */
57+
return -1;
58+
}
59+
60+
/* Note that none of these tests here actually verify that caching is used,
61+
* i.e. the tests will (should) still pass if mod_lmdb is not loaded.
62+
* However, if caching *is* used, they do ensure it works correctly,
63+
* as if no caching were used. */
64+
65+
SWRITE(clientfd, "b1 NOOP" ENDL);
66+
CLIENT_EXPECT_EVENTUALLY(clientfd, "b1 OK"); /* Flush the untagged EXISTS/RECENT messages from message delivery */
67+
68+
SWRITE(clientfd, "c1 COPY 1:5 \"Trash\"" ENDL);
69+
CLIENT_EXPECT_EVENTUALLY(clientfd, "c1 OK"); /* Now have 5 messages in Trash */
70+
71+
SWRITE(clientfd, "c2 COPY 1:5 \"Trash\"" ENDL);
72+
CLIENT_EXPECT_EVENTUALLY(clientfd, "c2 OK"); /* Now have 10 messages in Trash */
73+
74+
SWRITE(clientfd, "c3 COPY 1:5 \"Trash\"" ENDL);
75+
CLIENT_EXPECT_EVENTUALLY(clientfd, "c3 OK"); /* Now have 15 messages in Trash */
76+
77+
SWRITE(clientfd, "d1 SELECT \"Trash\"" ENDL);
78+
CLIENT_EXPECT_EVENTUALLY(clientfd, "d1 OK");
79+
80+
SWRITE(clientfd, "d2 SELECT \"INBOX\"" ENDL);
81+
CLIENT_EXPECT_EVENTUALLY(clientfd, "5 EXISTS");
82+
83+
/* This traversal should be cached */
84+
SWRITE(clientfd, "d3 SELECT \"Trash\"" ENDL);
85+
CLIENT_EXPECT_EVENTUALLY(clientfd, "15 EXISTS");
86+
87+
SWRITE(clientfd, "d4 STORE 1 +FLAGS \\Seen" ENDL);
88+
CLIENT_EXPECT_EVENTUALLY(clientfd, "d4 OK"); /* Still have 15 messages in Trash, but only 14 unread */
89+
90+
/* This traversal should not be cached */
91+
SWRITE(clientfd, "d5 LIST (SUBSCRIBED) \"\" (\"Trash\") RETURN (STATUS (MESSAGES UNSEEN RECENT SIZE))" ENDL);
92+
CLIENT_EXPECT_EVENTUALLY(clientfd, "* STATUS \"Trash\" (MESSAGES 15 RECENT 0 UNSEEN 14 SIZE 3270");
93+
94+
/* This traversal should be cached */
95+
SWRITE(clientfd, "d6 SELECT \"Trash\"" ENDL);
96+
CLIENT_EXPECT_EVENTUALLY(clientfd, "15 EXISTS");
97+
98+
/* As should this one */
99+
SWRITE(clientfd, "d7 LIST (SUBSCRIBED) \"\" (\"INBOX\") RETURN (STATUS (MESSAGES UNSEEN RECENT SIZE))" ENDL);
100+
CLIENT_EXPECT_EVENTUALLY(clientfd, "* STATUS \"INBOX\" (MESSAGES 5 RECENT 0 UNSEEN 5 SIZE 1090");
101+
102+
if (test_make_messages(TEST_EMAIL, 1)) {
103+
return -1;
104+
}
105+
106+
/* This traversal is still cached, since there are only new messages (cur is unchanged) */
107+
SWRITE(clientfd, "e1 LIST (SUBSCRIBED) \"\" (\"INBOX\") RETURN (STATUS (MESSAGES UNSEEN RECENT SIZE))" ENDL);
108+
CLIENT_EXPECT_EVENTUALLY(clientfd, "* STATUS \"INBOX\" (MESSAGES 6 RECENT 1 UNSEEN 6 SIZE 1308");
109+
110+
/* Delete a message in the trash, which will increment MODSEQ. */
111+
SWRITE(clientfd, "e2 STORE 1:2 +FLAGS \\Deleted" ENDL);
112+
SWRITE(clientfd, "e3 EXPUNGE" ENDL);
113+
CLIENT_EXPECT_EVENTUALLY(clientfd, "* 1 EXPUNGE");
114+
115+
/* ... so this traversal should not be cached. */
116+
SWRITE(clientfd, "e4 LIST (SUBSCRIBED) \"\" (\"Trash\") RETURN (STATUS (MESSAGES UNSEEN RECENT SIZE))" ENDL);
117+
CLIENT_EXPECT_EVENTUALLY(clientfd, "* STATUS \"Trash\" (MESSAGES 13 RECENT 0 UNSEEN 13 SIZE 2834");
118+
119+
/* LOGOUT */
120+
SWRITE(clientfd, "z1 LOGOUT" ENDL);
121+
CLIENT_EXPECT_EVENTUALLY(clientfd, "* BYE");
122+
res = 0;
123+
124+
cleanup:
125+
close_if(clientfd);
126+
return res;
127+
}
128+
129+
TEST_MODULE_INFO_STANDARD("IMAP Traversal Caching Tests");

0 commit comments

Comments
 (0)