|
17 | 17 | import logbook |
18 | 18 | import functools |
19 | 19 |
|
| 20 | +# URLs regex created by http://twitter.com/imme_emosol |
| 21 | + |
20 | 22 | _username_re = re.compile(r"\@([a-zA-Z0-9_]{5}[a-zA-Z0-9_]*)") |
21 | 23 | _command_re = re.compile(r"^\/[a-zA-Z0-9_]+(\@[a-zA-Z0-9_]{5}[a-zA-Z0-9_]*)?$") |
22 | 24 | _email_re = re.compile(r"[a-zA-Z0-9_\.\+\-]+\@[a-zA-Z0-9_\.\-]+\.[a-zA-Z]+") |
| 25 | +_url_re = re.compile(r"https?://(-\.)?([^\s/?\.#-]+\.?)+(/[^\s]*)?") |
23 | 26 |
|
24 | 27 | # This small piece of global state will track if logbook was configured |
25 | 28 | _logger_configured = False |
@@ -139,15 +142,22 @@ def docstring_of(func, bot=None, component_id=None): |
139 | 142 | return format_docstr(docstring) |
140 | 143 |
|
141 | 144 |
|
| 145 | +def strip_urls(string): |
| 146 | + """Strip URLs and emails from a string""" |
| 147 | + string = _url_re.sub("", string) |
| 148 | + string = _email_re.sub("", string) |
| 149 | + return string |
| 150 | + |
| 151 | + |
142 | 152 | def usernames_in(message): |
143 | 153 | """Return all the matched usernames in the message""" |
144 | 154 | # Don't parse usernames in the commands |
145 | 155 | if _command_re.match(message.split(" ", 1)[0]): |
146 | 156 | message = message.split(" ", 1)[1] |
147 | 157 |
|
148 | 158 | # Strip email addresses from the message, in order to avoid matching the |
149 | | - # user's domain. This also happens to match username/passwords in URLs |
150 | | - message = _email_re.sub("", message) |
| 159 | + # user's domain. Also strip URLs, in order to avoid usernames in them. |
| 160 | + message = strip_urls(message) |
151 | 161 |
|
152 | 162 | results = [] |
153 | 163 | for result in _username_re.finditer(message): |
|
0 commit comments