Skip to content

Commit 37d4837

Browse files
committed
make slursearch.py
various changes to slursearch and pretend i didnt do this with 30 commits increase limit cap Update slursearch.py brackets default to public channels only FUCJKING NEWLINES FUCKING INDENTS investigate command not working init cog properly i thought it was automatic missed a single whitespace line fuck i hate flake8 i hate flake8 i hate flake8 Create slursearch.py
1 parent 4aa2285 commit 37d4837

2 files changed

Lines changed: 382 additions & 1 deletion

File tree

cogs/slursearch.py

Lines changed: 380 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,380 @@
1+
import asyncio
2+
import re
3+
import unicodedata
4+
import json
5+
from dataclasses import dataclass
6+
from typing import Iterable, List, Set
7+
8+
import discord
9+
from discord.ext import commands
10+
from utils.checks import is_staff, check_staff
11+
12+
13+
def normalize_text(s: str) -> str:
14+
return unicodedata.normalize("NFKC", s).lower()
15+
16+
17+
def chunked(seq: List[str], n: int) -> Iterable[List[str]]:
18+
for i in range(0, len(seq), n):
19+
yield seq[i:i+n]
20+
21+
22+
@dataclass
23+
class Hit:
24+
channel_id: int
25+
channel_name: str
26+
message_id: int
27+
author_id: int
28+
author_tag: str
29+
created_at_iso: str
30+
jump_url: str
31+
excerpt: str
32+
33+
34+
class SlurAudit(commands.Cog):
35+
def __init__(self, bot: commands.Bot):
36+
self.bot = bot
37+
self._scan_lock = asyncio.Lock()
38+
self._pause_event = asyncio.Event()
39+
self._abort_event = asyncio.Event()
40+
self._pause_event.set()
41+
self._control_channel_id: int | None = None
42+
self._control_guild_id: int | None = None
43+
self._scan_started_at: str | None = None
44+
45+
async def _honor_pause_abort(self):
46+
if self._abort_event.is_set():
47+
raise RuntimeError("Scan aborted.")
48+
await self._pause_event.wait()
49+
if self._abort_event.is_set():
50+
raise RuntimeError("Scan aborted.")
51+
52+
def _is_superop_or_owner(self, guild: discord.Guild, user_id: int) -> bool:
53+
try:
54+
if check_staff(self.bot, "SuperOP", user_id):
55+
return True
56+
except Exception:
57+
pass
58+
return bool(guild and user_id == guild.owner_id)
59+
60+
@commands.Cog.listener()
61+
async def on_message(self, message: discord.Message):
62+
if not self._scan_lock.locked():
63+
return
64+
if message.author.bot or not message.guild:
65+
return
66+
if self._control_channel_id is None or self._control_guild_id is None:
67+
return
68+
if message.guild.id != self._control_guild_id:
69+
return
70+
if message.channel.id != self._control_channel_id:
71+
return
72+
if not self._is_superop_or_owner(message.guild, message.author.id):
73+
return
74+
75+
content = (message.content or "").strip().lower()
76+
77+
if content == ".slurprogress pause":
78+
self._pause_event.clear()
79+
await message.channel.send("Scan paused.")
80+
return
81+
82+
if content == ".slurprogress resume":
83+
self._pause_event.set()
84+
await message.channel.send("Scan resumed.")
85+
return
86+
87+
if content == ".slurprogress abort":
88+
self._abort_event.set()
89+
self._pause_event.set()
90+
await message.channel.send("Abort requested. Scan will stop shortly.")
91+
return
92+
93+
if content == ".slursearch status":
94+
state = "running"
95+
if self._abort_event.is_set():
96+
state = "aborting"
97+
elif not self._pause_event.is_set():
98+
state = "paused"
99+
await message.channel.send(
100+
f"Scan status: **{state}**\n"
101+
f"Started: **{self._scan_started_at or 'unknown'}**"
102+
)
103+
return
104+
105+
@commands.guild_only()
106+
@is_staff("SuperOP")
107+
@commands.command()
108+
async def slursearch(
109+
self,
110+
ctx: commands.Context,
111+
*,
112+
args: str
113+
):
114+
kv = self._parse_kv(args)
115+
raw_ids = kv.get("ids")
116+
raw_regex = kv.get("regex")
117+
118+
if not raw_ids or not raw_regex:
119+
await ctx.send(
120+
"Missing required args. Example:\n"
121+
'`.slursearch ids=123,456 regex="\\b(example)\\b" limit_per_channel=5000 page=500 include_threads=no delay=0.25 public_only=yes`'
122+
)
123+
return
124+
125+
target_ids: Set[int] = set()
126+
for part in raw_ids.replace(" ", "").split(","):
127+
if part.isdigit():
128+
target_ids.add(int(part))
129+
130+
if not target_ids:
131+
await ctx.send("No valid numeric IDs parsed from ids=...")
132+
return
133+
134+
include_threads = kv.get("include_threads", "no").lower() in ("yes", "true", "1", "y")
135+
page = int(kv.get("page", "500"))
136+
limit_per_channel = int(kv.get("limit_per_channel", "5000"))
137+
delay = float(kv.get("delay", "0.25"))
138+
public_only = kv.get("public_only", "yes").lower() not in ("no", "false", "0", "n")
139+
140+
try:
141+
pattern = re.compile(raw_regex, re.IGNORECASE)
142+
except re.error as e:
143+
await ctx.send(f"Regex compile error: {e}")
144+
return
145+
146+
page = max(50, min(page, 500))
147+
limit_per_channel = max(100, min(limit_per_channel, 600000))
148+
delay = max(0.0, min(delay, 5.0))
149+
150+
guild: discord.Guild = ctx.guild
151+
152+
if self._scan_lock.locked():
153+
await ctx.send("A scan is already running. Try again later.")
154+
return
155+
156+
self._abort_event.clear()
157+
self._pause_event.set()
158+
self._control_channel_id = ctx.channel.id
159+
self._control_guild_id = guild.id
160+
self._scan_started_at = discord.utils.utcnow().isoformat(timespec="seconds")
161+
162+
try:
163+
async with self._scan_lock:
164+
await ctx.send(
165+
f"Starting audit scan.\n"
166+
f"- guild: **{guild.name}**\n"
167+
f"- targets: **{len(target_ids)}** user IDs\n"
168+
f"- regex: `{raw_regex}`\n"
169+
f"- include_threads: **{include_threads}**\n"
170+
f"- per-channel cap: **{limit_per_channel}** messages\n"
171+
f"- page: **{page}**\n"
172+
f"- delay: **{delay}** seconds\n"
173+
f"- public_only: **{public_only}**\n"
174+
f"- started: **{self._scan_started_at}**"
175+
)
176+
177+
hits: List[Hit] = []
178+
scanned_channels = 0
179+
scanned_messages = 0
180+
skipped_channels = 0
181+
last_progress_report = 0
182+
183+
everyone = guild.default_role
184+
me = guild.get_member(self.bot.user.id) or await guild.fetch_member(self.bot.user.id)
185+
186+
channels_to_scan: List[discord.abc.Messageable] = []
187+
188+
for ch in guild.text_channels:
189+
bot_perms = ch.permissions_for(me)
190+
if not (bot_perms.view_channel and bot_perms.read_message_history):
191+
skipped_channels += 1
192+
continue
193+
194+
if public_only:
195+
pub_perms = ch.permissions_for(everyone)
196+
if not pub_perms.view_channel:
197+
skipped_channels += 1
198+
continue
199+
200+
channels_to_scan.append(ch)
201+
202+
for channel in channels_to_scan:
203+
await self._honor_pause_abort()
204+
205+
scanned_channels += 1
206+
channel_name = getattr(channel, "name", f"chan-{channel.id}")
207+
208+
seen = 0
209+
try:
210+
async for msg in channel.history(limit=limit_per_channel, oldest_first=False):
211+
seen += 1
212+
scanned_messages += 1
213+
214+
if scanned_messages - last_progress_report >= 5000:
215+
last_progress_report = scanned_messages
216+
await ctx.send(
217+
f"Progress: scanned **{scanned_channels}** channels, "
218+
f"**{scanned_messages}** messages, hits **{len(hits)}**"
219+
)
220+
221+
if msg.author and msg.author.id in target_ids:
222+
content = msg.content or ""
223+
norm = normalize_text(content)
224+
225+
if pattern.search(norm):
226+
excerpt = content.replace("\n", " ").strip()
227+
if len(excerpt) > 180:
228+
excerpt = excerpt[:177] + "..."
229+
230+
hits.append(
231+
Hit(
232+
channel_id=channel.id,
233+
channel_name=channel_name,
234+
message_id=msg.id,
235+
author_id=msg.author.id,
236+
author_tag=str(msg.author),
237+
created_at_iso=msg.created_at.isoformat(timespec="seconds"),
238+
jump_url=msg.jump_url,
239+
excerpt=excerpt,
240+
)
241+
)
242+
243+
if page and (seen % page == 0):
244+
await self._honor_pause_abort()
245+
await asyncio.sleep(delay)
246+
247+
await self._honor_pause_abort()
248+
await asyncio.sleep(delay)
249+
250+
except discord.Forbidden:
251+
skipped_channels += 1
252+
continue
253+
except json.JSONDecodeError:
254+
await ctx.send("Warning: JSON decode error (likely transient HTTP issue). Retrying...")
255+
await asyncio.sleep(min(5.0, max(1.0, delay)))
256+
continue
257+
except discord.HTTPException as e:
258+
await ctx.send(f"HTTPException: {e.status}")
259+
await asyncio.sleep(2.0)
260+
continue
261+
262+
except Exception as e:
263+
await ctx.send(f"Unexpected error: {type(e).__name__}: {e}")
264+
await asyncio.sleep(2.0)
265+
continue
266+
267+
finished = discord.utils.utcnow().isoformat(timespec="seconds")
268+
269+
if not hits:
270+
await ctx.send(
271+
f"Audit complete.\n"
272+
f"- scanned channels: **{scanned_channels}** (skipped **{skipped_channels}**)\n"
273+
f"- scanned messages: **{scanned_messages}**\n"
274+
f"- hits: **0**\n"
275+
f"- finished: **{finished}**"
276+
)
277+
return
278+
279+
by_channel: dict[int, List[Hit]] = {}
280+
for h in hits:
281+
by_channel.setdefault(h.channel_id, []).append(h)
282+
283+
summary_lines = []
284+
for cid, hs in sorted(by_channel.items(), key=lambda x: len(x[1]), reverse=True):
285+
cname = hs[0].channel_name
286+
summary_lines.append(f"- #{cname}: **{len(hs)}**")
287+
288+
await ctx.send(
289+
f"Audit complete.\n"
290+
f"- scanned channels: **{scanned_channels}** (skipped **{skipped_channels}**)\n"
291+
f"- scanned messages: **{scanned_messages}**\n"
292+
f"- total hits: **{len(hits)}**\n"
293+
f"- finished: **{finished}**\n\n"
294+
"Hits by channel:\n" + "\n".join(summary_lines[:30]) +
295+
(f"\n...and {max(0, len(summary_lines)-30)} more channels." if len(summary_lines) > 30 else "")
296+
)
297+
298+
detail_lines = []
299+
for h in hits:
300+
detail_lines.append(
301+
f"[{h.created_at_iso}] **{h.author_tag}** in **#{h.channel_name}**: {h.jump_url}\n"
302+
f"> {h.excerpt}"
303+
)
304+
305+
for block in chunked(detail_lines, 5):
306+
await self._honor_pause_abort()
307+
await ctx.send("\n\n".join(block))
308+
await asyncio.sleep(0.5)
309+
310+
except RuntimeError as e:
311+
if str(e) == "Scan aborted.":
312+
await ctx.send("Scan aborted.")
313+
return
314+
raise
315+
316+
finally:
317+
self._control_channel_id = None
318+
self._control_guild_id = None
319+
self._abort_event.clear()
320+
self._pause_event.set()
321+
self._scan_started_at = None
322+
323+
def _parse_kv(self, s: str) -> dict:
324+
out = {}
325+
token = ""
326+
in_quotes = False
327+
quote_char = ""
328+
329+
parts = []
330+
for ch in s:
331+
if ch in ("'", '"'):
332+
if not in_quotes:
333+
in_quotes = True
334+
quote_char = ch
335+
token += ch
336+
elif quote_char == ch:
337+
in_quotes = False
338+
token += ch
339+
else:
340+
token += ch
341+
elif ch.isspace() and not in_quotes:
342+
if token:
343+
parts.append(token)
344+
token = ""
345+
else:
346+
token += ch
347+
if token:
348+
parts.append(token)
349+
350+
for p in parts:
351+
if "=" not in p:
352+
continue
353+
k, v = p.split("=", 1)
354+
k = k.strip().lower()
355+
v = v.strip()
356+
if len(v) >= 2 and ((v[0] == v[-1]) and v[0] in ("'", '"')):
357+
v = v[1:-1]
358+
out[k] = v
359+
360+
return out
361+
362+
@slursearch.error
363+
async def slursearch_error(self, ctx: commands.Context, error: Exception):
364+
if isinstance(error, commands.CommandInvokeError) and getattr(error, "original", None):
365+
error = error.original
366+
367+
if type(error).__name__ == "InsufficientStaffRank":
368+
await ctx.send(str(error))
369+
return
370+
371+
if isinstance(error, commands.CheckFailure):
372+
await ctx.send(f"Blocked: `{type(error).__name__}`")
373+
return
374+
375+
await ctx.send(f"Error: `{type(error).__name__}: {error}`")
376+
raise error
377+
378+
379+
async def setup(bot: commands.Bot):
380+
await bot.add_cog(SlurAudit(bot))

kurisu.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,8 @@
6565
'cogs.newcomers',
6666
'cogs.server_logs',
6767
'cogs.soap',
68-
'cogs.titletxtparse'
68+
'cogs.titletxtparse',
69+
'cogs.slursearch'
6970
)
7071

7172
DEBUG = False

0 commit comments

Comments
 (0)