Skip to content

Commit 60d163b

Browse files
authored
Feature/security scanners (#49)
* Initial commit for security scanners. Discord, PyPi and GitHub included. * Update schema * Update scanners and scan positions at paste creation. * Change Line Numbers to Python side. * Invalidate caches * Fix line warnings due to bleach. * Fix paste highlighting and enter behaviour on Password pastes. * Add annotation tooltip. * Slightly change annotations. * Fix line warnings so they don't deny text selection. * Only handle tokens when configured to * Fix requested changes.
1 parent 5155273 commit 60d163b

File tree

15 files changed

+314
-50
lines changed

15 files changed

+314
-50
lines changed

core/database.py

Lines changed: 33 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
import asyncio
2222
import datetime
2323
import logging
24-
import re
2524
from typing import TYPE_CHECKING, Any, Self
2625

2726
import aiohttp
@@ -31,16 +30,18 @@
3130

3231
from . import utils
3332
from .models import FileModel, PasteModel
33+
from .scanners import SecurityInfo, Services
3434

3535

3636
if TYPE_CHECKING:
3737
_Pool = asyncpg.Pool[asyncpg.Record]
3838
from types_.config import Github
3939
from types_.github import PostGist
40+
from types_.scanner import ScannerSecret
4041
else:
4142
_Pool = asyncpg.Pool
4243

43-
DISCORD_TOKEN_REGEX: re.Pattern[str] = re.compile(r"[a-zA-Z0-9_-]{23,28}\.[a-zA-Z0-9_-]{6,7}\.[a-zA-Z0-9_-]{27,}")
44+
4445
LOGGER: logging.Logger = logging.getLogger(__name__)
4546

4647

@@ -53,7 +54,7 @@ def __init__(self, *, dsn: str, session: aiohttp.ClientSession | None = None, gi
5354
self._handling_tokens = bool(self.session and github_config)
5455

5556
if self._handling_tokens:
56-
LOGGER.info("Will handle compromised discord info.")
57+
LOGGER.info("Setup to handle Discord Tokens.")
5758
assert github_config # guarded by if here
5859

5960
self._gist_token = github_config["token"]
@@ -83,20 +84,15 @@ async def _token_task(self) -> None:
8384

8485
await asyncio.sleep(self._gist_timeout)
8586

86-
def _handle_discord_tokens(self, *bodies: dict[str, str], paste_id: str) -> None:
87-
formatted_bodies = "\n".join(b["content"] for b in bodies)
88-
89-
tokens = list(DISCORD_TOKEN_REGEX.finditer(formatted_bodies))
90-
91-
if not tokens:
87+
def _handle_discord_tokens(self, tokens: list[str], paste_id: str) -> None:
88+
if not self._handling_tokens or not tokens:
9289
return
9390

9491
LOGGER.info(
9592
"Discord bot token located and added to token bucket. Current bucket size is: %s", len(self.__tokens_bucket)
9693
)
9794

98-
tokens = "\n".join([m[0] for m in tokens])
99-
self.__tokens_bucket[paste_id] = tokens
95+
self.__tokens_bucket[paste_id] = "\n".join(tokens)
10096

10197
async def _post_gist_of_tokens(self) -> None:
10298
assert self.session # guarded in caller
@@ -211,8 +207,8 @@ async def create_paste(self, *, data: dict[str, Any]) -> PasteModel:
211207
"""
212208

213209
file_query: str = """
214-
INSERT INTO files (parent_id, content, filename, loc, annotation)
215-
VALUES ($1, $2, $3, $4, $5)
210+
INSERT INTO files (parent_id, content, filename, loc, annotation, warning_positions)
211+
VALUES ($1, $2, $3, $4, $5, $6)
216212
RETURNING *
217213
"""
218214

@@ -246,28 +242,39 @@ async def create_paste(self, *, data: dict[str, Any]) -> PasteModel:
246242
name: str = (file.get("filename") or f"file_{index}")[-CONFIG["PASTES"]["name_limit"] :]
247243
name = "_".join(name.splitlines())
248244

249-
content: str = file["content"]
245+
# Normalise newlines...
246+
content: str = file["content"].replace("\r\n", "\n").replace("\r", "\n")
250247
loc: int = file["content"].count("\n") + 1
251-
annotation: str = ""
252248

253-
tokens = [t for t in utils.TOKEN_REGEX.findall(content) if utils.validate_discord_token(t)]
254-
if tokens:
255-
annotation = "Contains possibly sensitive information: Discord Token(s)"
256-
if not password:
257-
annotation += ", which have now been invalidated."
249+
positions: list[int] = []
250+
extra: str = ""
251+
252+
secrets: list[ScannerSecret] = SecurityInfo.scan_file(content)
253+
for payload in secrets:
254+
service: Services = payload["service"]
255+
256+
extra += f"{service.value}, "
257+
positions += [t[0] for t in payload["tokens"]]
258+
259+
if not password and self._handling_tokens and service is Services.discord:
260+
self._handle_discord_tokens(tokens=[t[1] for t in payload["tokens"]], paste_id=paste.id)
261+
262+
extra = extra.removesuffix(", ")
263+
annotation = f"Contains possibly sensitive data from: {extra}" if extra else ""
258264

259265
row: asyncpg.Record | None = await connection.fetchrow(
260-
file_query, paste.id, content, name, loc, annotation
266+
file_query,
267+
paste.id,
268+
content,
269+
name,
270+
loc,
271+
annotation,
272+
sorted(positions),
261273
)
262274

263275
if row:
264276
paste.files.append(FileModel(row))
265277

266-
if not password:
267-
# if the user didn't provide a password (a public paste)
268-
# we check for discord tokens
269-
self._handle_discord_tokens(*data["files"], paste_id=paste.id)
270-
271278
return paste
272279

273280
async def fetch_paste_security(self, *, token: str) -> PasteModel | None:

core/models.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ def __init__(self, record: asyncpg.Record | dict[str, Any]) -> None:
6767
self.charcount: int = record["charcount"]
6868
self.index: int = record["file_index"]
6969
self.annotation: str = record["annotation"]
70+
self.warning_positions: list[int] = record["warning_positions"]
7071

7172

7273
class PasteModel(BaseModel):

core/scanners.py

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
"""MystBin. Share code easily.
2+
3+
Copyright (C) 2020-Current PythonistaGuild
4+
5+
This program is free software: you can redistribute it and/or modify
6+
it under the terms of the GNU General Public License as published by
7+
the Free Software Foundation, either version 3 of the License, or
8+
(at your option) any later version.
9+
10+
This program is distributed in the hope that it will be useful,
11+
but WITHOUT ANY WARRANTY; without even the implied warranty of
12+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13+
GNU General Public License for more details.
14+
15+
You should have received a copy of the GNU General Public License
16+
along with this program. If not, see <https://www.gnu.org/licenses/>.
17+
"""
18+
19+
from __future__ import annotations
20+
21+
import base64
22+
import binascii
23+
import enum
24+
import logging
25+
import re
26+
from typing import TYPE_CHECKING, ClassVar
27+
28+
29+
if TYPE_CHECKING:
30+
from types_.scanner import ScannerSecret
31+
32+
33+
logger: logging.Logger = logging.getLogger(__name__)
34+
35+
36+
class Services(enum.Enum):
37+
discord = "Discord"
38+
pypi = "PyPi"
39+
github = "GitHub"
40+
41+
42+
class BaseScanner:
43+
REGEX: ClassVar[re.Pattern[str]]
44+
SERVICE: ClassVar[Services]
45+
46+
@classmethod
47+
def match(cls, content: str) -> ScannerSecret:
48+
matches: list[tuple[int, str]] = [(m.start(0), m.group(0)) for m in cls.REGEX.finditer(content)]
49+
50+
payload: ScannerSecret = {
51+
"service": cls.SERVICE,
52+
"tokens": matches,
53+
}
54+
55+
return payload
56+
57+
58+
class DiscordScanner(BaseScanner):
59+
REGEX = re.compile(r"[a-zA-Z0-9_-]{23,28}\.[a-zA-Z0-9_-]{6,7}\.[a-zA-Z0-9_-]{27,}")
60+
SERVICE = Services.discord
61+
62+
@staticmethod
63+
def validate_discord_token(token: str) -> bool:
64+
try:
65+
# Just check if the first part validates as a user ID
66+
(user_id, _, _) = token.split(".")
67+
user_id = int(base64.b64decode(user_id + "==", validate=True))
68+
except (ValueError, binascii.Error):
69+
return False
70+
else:
71+
return True
72+
73+
@classmethod
74+
def match(cls, content: str) -> ScannerSecret:
75+
matches: list[tuple[int, str]] = [
76+
(m.start(0), m.group(0)) for m in cls.REGEX.finditer(content) if cls.validate_discord_token(m.group(0))
77+
]
78+
79+
payload: ScannerSecret = {
80+
"service": cls.SERVICE,
81+
"tokens": matches,
82+
}
83+
84+
return payload
85+
86+
87+
class PyPiScanner(BaseScanner):
88+
REGEX = re.compile(r"pypi-AgEIcHlwaS5vcmc[A-Za-z0-9-_]{70,}")
89+
SERVICE = Services.pypi
90+
91+
92+
class GitHubScanner(BaseScanner):
93+
REGEX = re.compile(r"((ghp|gho|ghu|ghs|ghr)_[A-Za-z0-9_]{36})")
94+
SERVICE = Services.github
95+
96+
97+
class SecurityInfo:
98+
__SERVICE_MAPPING: ClassVar[dict[Services, type[BaseScanner]]] = {
99+
Services.discord: DiscordScanner,
100+
Services.pypi: PyPiScanner,
101+
Services.github: GitHubScanner,
102+
}
103+
104+
@classmethod
105+
def scan_file(
106+
cls,
107+
file: str,
108+
/,
109+
*,
110+
allowed: list[Services] | None = None,
111+
disallowed: list[Services] | None = None,
112+
) -> list[ScannerSecret]:
113+
"""Scan for tokens in a given files content.
114+
115+
You may pass a list of allowed or disallowed Services.
116+
If both lists are empty (Default) all available services will be scanned.
117+
"""
118+
disallowed = disallowed or []
119+
allowed = allowed or list(Services)
120+
121+
services: list[Services] = [s for s in allowed if s not in disallowed]
122+
secrets: list[ScannerSecret] = []
123+
124+
for service in services:
125+
scanner: type[BaseScanner] | None = cls.__SERVICE_MAPPING.get(service, None)
126+
if not scanner:
127+
logging.warning("The provided service %r is not a supported or a valid service.", service)
128+
continue
129+
130+
found: ScannerSecret = scanner.match(file)
131+
if found["tokens"]:
132+
secrets.append(found)
133+
134+
return secrets

migration.sql

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ ALTER TABLE files ALTER COLUMN filename SET NOT NULL; -- always require filenam
1515
ALTER TABLE files DROP COLUMN IF EXISTS attachment; -- we don't have these anymore
1616
ALTER TABLE files ADD COLUMN IF NOT EXISTS annotation TEXT;
1717
ALTER TABLE files RENAME COLUMN index TO file_index; -- bad column name
18+
ALTER TABLE files ADD COLUMN IF NOT EXISTS warning_positions INTEGER[]; -- New line warning positions
1819

1920
SAVEPOINT drops;
2021
DROP TABLE IF EXISTS bans CASCADE; -- no longer needed

schema.sql

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,5 +20,6 @@ CREATE TABLE IF NOT EXISTS files (
2020
charcount INTEGER GENERATED ALWAYS AS (LENGTH(content)) STORED,
2121
file_index SERIAL NOT NULL,
2222
annotation TEXT,
23+
warning_positions INTEGER[],
2324
PRIMARY KEY (parent_id, file_index)
2425
);

types_/scanner.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
"""MystBin. Share code easily.
2+
3+
Copyright (C) 2020-Current PythonistaGuild
4+
5+
This program is free software: you can redistribute it and/or modify
6+
it under the terms of the GNU General Public License as published by
7+
the Free Software Foundation, either version 3 of the License, or
8+
(at your option) any later version.
9+
10+
This program is distributed in the hope that it will be useful,
11+
but WITHOUT ANY WARRANTY; without even the implied warranty of
12+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13+
GNU General Public License for more details.
14+
15+
You should have received a copy of the GNU General Public License
16+
along with this program. If not, see <https://www.gnu.org/licenses/>.
17+
"""
18+
19+
from __future__ import annotations
20+
21+
from typing import TYPE_CHECKING, TypedDict
22+
23+
24+
if TYPE_CHECKING:
25+
from core.scanners import Services
26+
27+
28+
class ScannerSecret(TypedDict):
29+
service: Services
30+
tokens: list[tuple[int, str]]

views/htmx.py

Lines changed: 38 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -55,12 +55,46 @@ def highlight_code(self, *, files: list[dict[str, Any]]) -> str:
5555

5656
raw_url: str = f'/raw/{file["parent_id"]}'
5757
annotation: str = file["annotation"]
58+
positions: list[int] = file.get("warning_positions", [])
59+
original: str = file["content"]
5860

59-
content = bleach.clean(
60-
file["content"].replace("<!", "&lt;&#33;"), attributes=[], tags=[], strip_comments=False
61+
parts: list[str] = annotation.split(":")
62+
annotation = parts.pop(0)
63+
64+
extra: str = (
65+
f"""<span class="annotationSecond" data-text="Discord tokens will be invalidated automatically">{parts[0]}"""
66+
if parts
67+
else ""
68+
)
69+
annotations: str = (
70+
f'<small class="annotations">❌ {annotation}{": " + extra if extra else ""}</small>'
71+
if annotation
72+
else ""
6173
)
62-
annotations: str = f'<small class="annotations">❌ {annotation}</small>' if annotation else ""
6374

75+
position: int = 0
76+
next_pos: int | None = positions.pop(0) if positions else None
77+
78+
numbers: list[str] = []
79+
for n, line in enumerate(original.splitlines(), 1):
80+
length: int = len(line)
81+
82+
if next_pos is not None and position <= next_pos <= position + length:
83+
numbers.append(f"""<tr><td class="lineNumRow">{n}</td><td class="lineWarn"></td></tr>""")
84+
85+
try:
86+
next_pos = positions.pop(0)
87+
except IndexError:
88+
next_pos = None
89+
90+
else:
91+
numbers.append(f"""<tr><td class="lineNumRow">{n}</td></tr>""")
92+
93+
position += length + 1
94+
95+
content = bleach.clean(original.replace("<!", "&lt;&#33;"), attributes=[], tags=[], strip_comments=False)
96+
97+
lines: str = f"""<table class="lineNums"><tbody>\n{"".join(numbers)}\n</tbody></table>"""
6498
html += f"""
6599
<div id="__paste_a_{index}" class="pasteArea">
66100
<div class="pasteHeader">
@@ -72,7 +106,7 @@ def highlight_code(self, *, files: list[dict[str, Any]]) -> str:
72106
</div>
73107
</div>
74108
{annotations}
75-
<pre id="__paste_c_{index}" class="fileContent" style="display: flex; flex-grow: 1;"><code>{content}</code></pre>
109+
<pre id="__paste_c_{index}" class="fileContent" style="display: flex; flex-grow: 1;">{lines}<code>{content}</code></pre>
76110
</div>"""
77111

78112
return html

web/index.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
<!-- STYLESHEETS -->
2424
<!-- <link rel="preload" href="static/styles/global.css" as="style" /> -->
2525
<!-- <link rel="preload" href="static/styles/highlights.css" as="style" /> -->
26-
<link rel="stylesheet" type="text/css" href="/static/styles/global.css?v=5" />
26+
<link rel="stylesheet" type="text/css" href="/static/styles/global.css?v=6" />
2727

2828
<!-- FONTS -->
2929
<link rel="preconnect" href="https://fonts.googleapis.com">

web/maint.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515

1616
<!-- STYLESHEETS -->
1717
<!-- <link rel="preload" href="static/styles/global.css" as="style" /> -->
18-
<link rel="stylesheet" type="text/css" href="static/styles/global.css?v=5" />
18+
<link rel="stylesheet" type="text/css" href="static/styles/global.css?v=6" />
1919

2020

2121
<!-- FONTS -->

0 commit comments

Comments
 (0)