Skip to content

Commit 1a470a8

Browse files
author
Pietro Albini
committed
Merge branch 'bugfixes-0.1.2' into prepare-release
Conflicts: docs/conf.py setup.py
2 parents 720335e + 7dc390e commit 1a470a8

File tree

7 files changed

+59
-5
lines changed

7 files changed

+59
-5
lines changed

botogram/syntaxes.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88

99
import re
1010

11+
from . import utils
12+
1113

1214
_markdown_re = re.compile(r".*("
1315
r"\*(.*)\*|"
@@ -30,11 +32,16 @@
3032

3133
def is_markdown(message):
3234
"""Check if a string is actually markdown"""
35+
# Don't mark part of URLs or email addresses as Markdown
36+
message = utils.strip_urls(message)
37+
3338
return bool(_markdown_re.match(message))
3439

3540

3641
def is_html(message):
3742
"""Check if a string is actually HTML"""
43+
# Here URLs are not stripped because no sane URL contains HTML tags in it,
44+
# and for a few cases the speed penality is not worth
3845
return bool(_html_re.match(message))
3946

4047

@@ -48,7 +55,9 @@ def guess_syntax(message, provided):
4855
else:
4956
return None
5057

51-
if provided in ("md", "markdown", "Markdown"):
58+
if provided in ("plain",):
59+
return None
60+
elif provided in ("md", "markdown", "Markdown"):
5261
return "Markdown"
5362
elif provided in ("html", "HTML"):
5463
return "HTML"

botogram/utils.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,12 @@
1717
import logbook
1818
import functools
1919

20+
# URLs regex created by http://twitter.com/imme_emosol
21+
2022
_username_re = re.compile(r"\@([a-zA-Z0-9_]{5}[a-zA-Z0-9_]*)")
2123
_command_re = re.compile(r"^\/[a-zA-Z0-9_]+(\@[a-zA-Z0-9_]{5}[a-zA-Z0-9_]*)?$")
2224
_email_re = re.compile(r"[a-zA-Z0-9_\.\+\-]+\@[a-zA-Z0-9_\.\-]+\.[a-zA-Z]+")
25+
_url_re = re.compile(r"https?://(-\.)?([^\s/?\.#-]+\.?)+(/[^\s]*)?")
2326

2427
# This small piece of global state will track if logbook was configured
2528
_logger_configured = False
@@ -139,15 +142,22 @@ def docstring_of(func, bot=None, component_id=None):
139142
return format_docstr(docstring)
140143

141144

145+
def strip_urls(string):
146+
"""Strip URLs and emails from a string"""
147+
string = _url_re.sub("", string)
148+
string = _email_re.sub("", string)
149+
return string
150+
151+
142152
def usernames_in(message):
143153
"""Return all the matched usernames in the message"""
144154
# Don't parse usernames in the commands
145155
if _command_re.match(message.split(" ", 1)[0]):
146156
message = message.split(" ", 1)[1]
147157

148158
# Strip email addresses from the message, in order to avoid matching the
149-
# user's domain. This also happens to match username/passwords in URLs
150-
message = _email_re.sub("", message)
159+
# user's domain. Also strip URLs, in order to avoid usernames in them.
160+
message = strip_urls(message)
151161

152162
results = []
153163
for result in _username_re.finditer(message):

docs/buildthedocs.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ versions:
1313
source:
1414
provider: git
1515
url: .
16-
checkout: release-0.1
16+
checkout: v0.1.1
1717
directory: docs
1818
title: botogram 0.1
1919
notice: alpha

docs/changelog.rst

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,28 @@ botogram changelog
99

1010
Here you can see what changed in every botogram release.
1111

12+
.. _changelog-0.2:
13+
14+
botogram 0.2
15+
=============
16+
17+
*Alpha release, not yet released*
18+
19+
No changes yet.
20+
21+
.. _changelog-0.1.2:
22+
23+
botogram 0.1.2
24+
==============
25+
26+
*Bugfix release, not yet released*
27+
28+
* Add a way to disable the syntax detector (`issue 27`_)
29+
* Fix automatic syntax detector recognizing markdown in URLs (`issue 28`_)
30+
31+
.. _issue 27: https://github.com/pietroalbini/botogram/issues/27
32+
.. _issue 28: https://github.com/pietroalbini/botogram/issues/28
33+
1234
.. _changelog-0.1.1:
1335

1436
botogram 0.1.1

docs/tricks.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,13 @@ That parameter accepts the following values:
7171
* ``markdown``, or its aliases ``md`` and ``Markdown``
7272
* ``html``, or its alias ``HTML``
7373

74+
Also, if you don't want to use any rich formatting but the detector spots
75+
something, you can disable it providing the special syntax ``plain`` to it:
76+
77+
.. code-block:: python
78+
79+
chat.send("*I don't want this to be detected*", syntax="plain")
80+
7481
.. note::
7582

7683
Support for rich formatting depends on your users' Telegram client. If

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ def hello_command(chat, message, args):
2626
if __name__ == "__main__":
2727
bot.run()
2828
29-
Want to get started? `Go to the documentation_`
29+
Want to get started? `Go to the documentation`_
3030
3131
.. _Telegram bots: https://core.telegram.org/bots
3232
.. _Go to the documentation: https://botogram.pietroalbini.io/docs

tests/test_syntaxes.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@ def test_is_markdown():
2121
assert botogram.syntaxes.is_markdown("[a](b)")
2222
assert botogram.syntaxes.is_markdown("![a](b)!")
2323

24+
assert not botogram.syntaxes.is_markdown("hey@this_is_awesome.com")
25+
assert not botogram.syntaxes.is_markdown("https://www.this_is_awesome.com")
26+
2427

2528
def test_is_html():
2629
assert not botogram.syntaxes.is_html("not HTML, sorry!")
@@ -38,6 +41,9 @@ def test_is_html():
3841

3942
def test_guess_syntax():
4043
# Provided syntax name
44+
for name in ("plain",):
45+
assert botogram.syntaxes.guess_syntax("", name) is None
46+
4147
for name in ("md", "markdown", "Markdown"):
4248
assert botogram.syntaxes.guess_syntax("", name) == "Markdown"
4349

0 commit comments

Comments
 (0)