2121import asyncio
2222import datetime
2323import logging
24- import re
2524from typing import TYPE_CHECKING , Any , Self
2625
2726import aiohttp
3130
3231from . import utils
3332from .models import FileModel , PasteModel
33+ from .scanners import SecurityInfo , Services
3434
3535
3636if TYPE_CHECKING :
3737 _Pool = asyncpg .Pool [asyncpg .Record ]
3838 from types_ .config import Github
3939 from types_ .github import PostGist
40+ from types_ .scanner import ScannerSecret
4041else :
4142 _Pool = asyncpg .Pool
4243
43- DISCORD_TOKEN_REGEX : re . Pattern [ str ] = re . compile ( r"[a-zA-Z0-9_-]{23,28}\.[a-zA-Z0-9_-]{6,7}\.[a-zA-Z0-9_-]{27,}" )
44+
4445LOGGER : logging .Logger = logging .getLogger (__name__ )
4546
4647
@@ -53,7 +54,7 @@ def __init__(self, *, dsn: str, session: aiohttp.ClientSession | None = None, gi
5354 self ._handling_tokens = bool (self .session and github_config )
5455
5556 if self ._handling_tokens :
56- LOGGER .info ("Will handle compromised discord info ." )
57+ LOGGER .info ("Setup to handle Discord Tokens ." )
5758 assert github_config # guarded by if here
5859
5960 self ._gist_token = github_config ["token" ]
@@ -83,20 +84,15 @@ async def _token_task(self) -> None:
8384
8485 await asyncio .sleep (self ._gist_timeout )
8586
86- def _handle_discord_tokens (self , * bodies : dict [str , str ], paste_id : str ) -> None :
87- formatted_bodies = "\n " .join (b ["content" ] for b in bodies )
88-
89- tokens = list (DISCORD_TOKEN_REGEX .finditer (formatted_bodies ))
90-
91- if not tokens :
87+ def _handle_discord_tokens (self , tokens : list [str ], paste_id : str ) -> None :
88+ if not self ._handling_tokens or not tokens :
9289 return
9390
9491 LOGGER .info (
9592 "Discord bot token located and added to token bucket. Current bucket size is: %s" , len (self .__tokens_bucket )
9693 )
9794
98- tokens = "\n " .join ([m [0 ] for m in tokens ])
99- self .__tokens_bucket [paste_id ] = tokens
95+ self .__tokens_bucket [paste_id ] = "\n " .join (tokens )
10096
10197 async def _post_gist_of_tokens (self ) -> None :
10298 assert self .session # guarded in caller
@@ -211,8 +207,8 @@ async def create_paste(self, *, data: dict[str, Any]) -> PasteModel:
211207 """
212208
213209 file_query : str = """
214- INSERT INTO files (parent_id, content, filename, loc, annotation)
215- VALUES ($1, $2, $3, $4, $5)
210+ INSERT INTO files (parent_id, content, filename, loc, annotation, warning_positions )
211+ VALUES ($1, $2, $3, $4, $5, $6 )
216212 RETURNING *
217213 """
218214
@@ -246,28 +242,39 @@ async def create_paste(self, *, data: dict[str, Any]) -> PasteModel:
246242 name : str = (file .get ("filename" ) or f"file_{ index } " )[- CONFIG ["PASTES" ]["name_limit" ] :]
247243 name = "_" .join (name .splitlines ())
248244
249- content : str = file ["content" ]
245+ # Normalise newlines...
246+ content : str = file ["content" ].replace ("\r \n " , "\n " ).replace ("\r " , "\n " )
250247 loc : int = file ["content" ].count ("\n " ) + 1
251- annotation : str = ""
252248
253- tokens = [t for t in utils .TOKEN_REGEX .findall (content ) if utils .validate_discord_token (t )]
254- if tokens :
255- annotation = "Contains possibly sensitive information: Discord Token(s)"
256- if not password :
257- annotation += ", which have now been invalidated."
249+ positions : list [int ] = []
250+ extra : str = ""
251+
252+ secrets : list [ScannerSecret ] = SecurityInfo .scan_file (content )
253+ for payload in secrets :
254+ service : Services = payload ["service" ]
255+
256+ extra += f"{ service .value } , "
257+ positions += [t [0 ] for t in payload ["tokens" ]]
258+
259+ if not password and self ._handling_tokens and service is Services .discord :
260+ self ._handle_discord_tokens (tokens = [t [1 ] for t in payload ["tokens" ]], paste_id = paste .id )
261+
262+ extra = extra .removesuffix (", " )
263+ annotation = f"Contains possibly sensitive data from: { extra } " if extra else ""
258264
259265 row : asyncpg .Record | None = await connection .fetchrow (
260- file_query , paste .id , content , name , loc , annotation
266+ file_query ,
267+ paste .id ,
268+ content ,
269+ name ,
270+ loc ,
271+ annotation ,
272+ sorted (positions ),
261273 )
262274
263275 if row :
264276 paste .files .append (FileModel (row ))
265277
266- if not password :
267- # if the user didn't provide a password (a public paste)
268- # we check for discord tokens
269- self ._handle_discord_tokens (* data ["files" ], paste_id = paste .id )
270-
271278 return paste
272279
273280 async def fetch_paste_security (self , * , token : str ) -> PasteModel | None :
0 commit comments