Skip to content

Commit 9ddd5d8

Browse files
hpk42link2xt
authored andcommitted
Replace expiry "find" commands with a new chatmaild.expire python module + a reporting one
1 parent 4cfe228 commit 9ddd5d8

File tree

14 files changed

+558
-67
lines changed

14 files changed

+558
-67
lines changed

ARCHITECTURE.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ graph LR;
1919
/var/lib/acme`")] --> nginx-internal;
2020
cron --- chatmail-metrics;
2121
cron --- acmetool;
22-
cron --- expunge;
2322
chatmail-metrics --- website;
2423
acmetool --> certs[("`TLS certs
2524
/var/lib/acme`")];
@@ -35,7 +34,8 @@ graph LR;
3534
dovecot --- users;
3635
dovecot --- |metadata.socket|chatmail-metadata;
3736
doveauth --- users;
38-
expunge --- users;
37+
chatmail-expire-daily --- users;
38+
chatmail-fsreport-daily --- users;
3939
chatmail-metadata --- iroh-relay;
4040
certs-nginx --> postfix;
4141
certs-nginx --> dovecot;

CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,13 @@
5050
- Add `--skip-dns-check` argument to `cmdeploy run` command, which disables DNS record checking before installation.
5151
([#661](https://github.com/chatmail/relay/pull/661))
5252

53+
- Rework expiry of message files and mailboxes in Python
54+
to only do a single iteration over sometimes millions of messages
55+
instead of doing "find" commands that iterate 9 times over the messages.
56+
Provide an "fsreport" CLI for more fine grained analysis of message files.
57+
([#637](https://github.com/chatmail/relay/pull/632))
58+
59+
5360
## 1.7.0 2025-09-11
5461

5562
- Make www upload path configurable

chatmaild/pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@ chatmail-metadata = "chatmaild.metadata:main"
2727
filtermail = "chatmaild.filtermail:main"
2828
echobot = "chatmaild.echo:main"
2929
chatmail-metrics = "chatmaild.metrics:main"
30-
delete_inactive_users = "chatmaild.delete_inactive_users:main"
30+
chatmail-expire = "chatmaild.expire:main"
31+
chatmail-fsreport = "chatmaild.fsreport:main"
3132
lastlogin = "chatmaild.lastlogin:main"
3233
turnserver = "chatmaild.turnserver:main"
3334

chatmaild/src/chatmaild/delete_inactive_users.py

Lines changed: 0 additions & 31 deletions
This file was deleted.

chatmaild/src/chatmaild/expire.py

Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
"""
2+
Expire old messages and addresses.
3+
4+
"""
5+
6+
import os
7+
import shutil
8+
import sys
9+
import time
10+
from argparse import ArgumentParser
11+
from collections import namedtuple
12+
from datetime import datetime
13+
from stat import S_ISREG
14+
15+
from chatmaild.config import read_config
16+
17+
FileEntry = namedtuple("FileEntry", ("relpath", "mtime", "size"))
18+
19+
20+
def iter_mailboxes(basedir, maxnum):
21+
if not os.path.exists(basedir):
22+
print_info(f"no mailboxes found at: {basedir}")
23+
return
24+
25+
for name in os.listdir(basedir)[:maxnum]:
26+
if "@" in name:
27+
yield MailboxStat(basedir + "/" + name)
28+
29+
30+
class MailboxStat:
31+
last_login = None
32+
33+
def __init__(self, basedir):
34+
self.basedir = str(basedir)
35+
# all detected messages in cur/new/tmp folders
36+
self.messages = []
37+
38+
# all detected files in mailbox top dir
39+
self.extrafiles = []
40+
41+
# scan all relevant files (without recursion)
42+
old_cwd = os.getcwd()
43+
os.chdir(self.basedir)
44+
for name in os.listdir("."):
45+
if name in ("cur", "new", "tmp"):
46+
for msg_name in os.listdir(name):
47+
relpath = name + "/" + msg_name
48+
st = os.stat(relpath)
49+
self.messages.append(FileEntry(relpath, st.st_mtime, st.st_size))
50+
else:
51+
st = os.stat(name)
52+
if S_ISREG(st.st_mode):
53+
self.extrafiles.append(FileEntry(name, st.st_mtime, st.st_size))
54+
if name == "password":
55+
self.last_login = st.st_mtime
56+
self.extrafiles.sort(key=lambda x: -x.size)
57+
os.chdir(old_cwd)
58+
59+
60+
def print_info(msg):
61+
print(msg, file=sys.stderr)
62+
63+
64+
class Expiry:
65+
def __init__(self, config, dry, now, verbose):
66+
self.config = config
67+
self.dry = dry
68+
self.now = now
69+
self.verbose = verbose
70+
self.del_mboxes = 0
71+
self.all_mboxes = 0
72+
self.del_files = 0
73+
self.all_files = 0
74+
self.start = time.time()
75+
76+
def remove_mailbox(self, mboxdir):
77+
if self.verbose:
78+
print_info(f"removing {mboxdir}")
79+
if not self.dry:
80+
shutil.rmtree(mboxdir)
81+
self.del_mboxes += 1
82+
83+
def remove_file(self, path):
84+
if self.verbose:
85+
print_info(f"removing {path}")
86+
if not self.dry:
87+
try:
88+
os.unlink(path)
89+
except FileNotFoundError:
90+
print_info(f"file not found/vanished {path}")
91+
self.del_files += 1
92+
93+
def process_mailbox_stat(self, mbox):
94+
cutoff_without_login = (
95+
self.now - int(self.config.delete_inactive_users_after) * 86400
96+
)
97+
cutoff_mails = self.now - int(self.config.delete_mails_after) * 86400
98+
cutoff_large_mails = self.now - int(self.config.delete_large_after) * 86400
99+
100+
self.all_mboxes += 1
101+
changed = False
102+
if mbox.last_login and mbox.last_login < cutoff_without_login:
103+
self.remove_mailbox(mbox.basedir)
104+
return
105+
106+
# all to-be-removed files are relative to the mailbox basedir
107+
os.chdir(mbox.basedir)
108+
mboxname = os.path.basename(mbox.basedir)
109+
if self.verbose:
110+
print_info(f"checking for mailbox messages in: {mboxname}")
111+
self.all_files += len(mbox.messages)
112+
for message in mbox.messages:
113+
if message.mtime < cutoff_mails:
114+
self.remove_file(message.relpath)
115+
elif message.size > 200000 and message.mtime < cutoff_large_mails:
116+
# we only remove noticed large files (not unnoticed ones in new/)
117+
if message.relpath.startswith("cur/"):
118+
self.remove_file(message.relpath)
119+
else:
120+
continue
121+
changed = True
122+
if changed:
123+
self.remove_file("maildirsize")
124+
125+
def get_summary(self):
126+
return (
127+
f"Removed {self.del_mboxes} out of {self.all_mboxes} mailboxes "
128+
f"and {self.del_files} out of {self.all_files} files in existing mailboxes "
129+
f"in {time.time() - self.start:2.2f} seconds"
130+
)
131+
132+
133+
def main(args=None):
134+
"""Expire mailboxes and messages according to chatmail config"""
135+
parser = ArgumentParser(description=main.__doc__)
136+
ini = "/usr/local/lib/chatmaild/chatmail.ini"
137+
parser.add_argument(
138+
"chatmail_ini",
139+
action="store",
140+
nargs="?",
141+
help=f"path pointing to chatmail.ini file, default: {ini}",
142+
default=ini,
143+
)
144+
parser.add_argument(
145+
"--days", action="store", help="assume date to be days older than now"
146+
)
147+
148+
parser.add_argument(
149+
"--maxnum",
150+
default=None,
151+
action="store",
152+
help="maximum number of mailboxes to iterate on",
153+
)
154+
parser.add_argument(
155+
"-v",
156+
dest="verbose",
157+
action="store_true",
158+
help="print out removed files and mailboxes",
159+
)
160+
161+
parser.add_argument(
162+
"--remove",
163+
dest="remove",
164+
action="store_true",
165+
help="actually remove all expired files and dirs",
166+
)
167+
args = parser.parse_args(args)
168+
169+
config = read_config(args.chatmail_ini)
170+
now = datetime.utcnow().timestamp()
171+
if args.days:
172+
now = now - 86400 * int(args.days)
173+
174+
maxnum = int(args.maxnum) if args.maxnum else None
175+
exp = Expiry(config, dry=not args.remove, now=now, verbose=args.verbose)
176+
for mailbox in iter_mailboxes(str(config.mailboxes_dir), maxnum=maxnum):
177+
exp.process_mailbox_stat(mailbox)
178+
print(exp.get_summary())
179+
180+
181+
if __name__ == "__main__":
182+
main(sys.argv[1:])

0 commit comments

Comments
 (0)