Skip to content

Commit ba9126a

Browse files
author
Pietro Albini
committed
Add support for Message.parse_text
This new attribute contains a list of all the parsed entities contained in the message (if they're provided by Telegram). This allows, for example, to get all the usernames contained in the message, or generate a render of the message including bold, italics and so. Fixes: GH-59
1 parent 092be24 commit ba9126a

File tree

4 files changed

+492
-0
lines changed

4 files changed

+492
-0
lines changed

botogram/objects/messages.py

Lines changed: 220 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,211 @@
1616
Location
1717

1818

19+
def _require_message(func):
20+
"""Decorator which forces the object to have an attached message"""
21+
@utils.wraps(func)
22+
def __(self, *args, **kwargs):
23+
if not hasattr(self, "_message") or self._message is None:
24+
raise RuntimeError("A message must be attached to this object")
25+
return func(self, *args, **kwargs)
26+
return __
27+
28+
29+
class ParsedTextEntity(BaseObject):
30+
"""Telegram API representation of an entity in a text message
31+
32+
This was originally called MessageEntity by Telegram
33+
https://core.telegram.org/bots/api#messageentity
34+
"""
35+
36+
required = {
37+
"type": str,
38+
"offset": int,
39+
"length": int,
40+
}
41+
optional = {
42+
"url": str,
43+
}
44+
replace_keys = {
45+
"url": "_url", # Dynamically implemented
46+
47+
# Private attributes, use the ``text`` one
48+
"offset": "_offset",
49+
"length": "_length",
50+
}
51+
52+
53+
def __init__(self, data, api=None, message=None):
54+
super().__init__(data, api)
55+
56+
self._message = message
57+
58+
def __str__(self):
59+
return self.text
60+
61+
def __repr__(self):
62+
if self._message is not None:
63+
return '<ParsedTextEntity %s: "%s">' % (self.type, self.text)
64+
else:
65+
return '<ParsedTextEntity %s from %s to %s>' % (
66+
self.type,
67+
self._offset,
68+
self._offset + self._length
69+
)
70+
71+
def __len__(self):
72+
return self._length
73+
74+
def set_message(self, message):
75+
"""Set the message instance related to this object"""
76+
self._message = message
77+
78+
@property
79+
@_require_message
80+
def text(self):
81+
"""Get the text of the message"""
82+
if self._message.text is None:
83+
raise ValueError("The message must have a text")
84+
85+
start = self._offset
86+
stop = start + self._length
87+
88+
if stop > len(self._message.text):
89+
raise ValueError("The message is too short!")
90+
91+
return self._message.text[start:stop]
92+
93+
@property
94+
@_require_message
95+
def url(self):
96+
"""Get the URL attached to the message"""
97+
# Use the provided if available
98+
if self._url is not None:
99+
return self._url
100+
101+
if self.type == "url":
102+
# Standard URLs
103+
return self.text
104+
elif self.type == "mention":
105+
# telegram.me URLs
106+
return "https://telegram.me/%s" % self.text[1:]
107+
elif self.type == "email":
108+
# mailto: URL
109+
return "mailto:%s" % self.text
110+
else:
111+
# Sorry!
112+
return None
113+
114+
class ParsedText:
115+
"""Collection of ParsedTextEntity.
116+
117+
This is a list-like object, and mimics the List<MessageEntity> Telegram
118+
object, but increases its functionalities.
119+
"""
120+
121+
def __init__(self, data, api=None, message=None):
122+
self._api = api
123+
# Accept only list of entites
124+
if not isinstance(data, list):
125+
raise ValueError("You must provide a list of ParsedTextEntity")
126+
127+
# Create ParsedTextEntity instances from the data
128+
self._original_entities = []
129+
for entity in data:
130+
parsed = ParsedTextEntity(entity, api, message)
131+
self._original_entities.append(parsed)
132+
133+
# Original entities are separated from the exposed entities because
134+
# plaintext entities are calculated and added to the exposend entities
135+
self._entities = None
136+
137+
self.set_message(message)
138+
139+
def __repr__(self):
140+
return '<ParsedText %s>' % repr(self._calculate_entities())
141+
142+
def set_api(self, api):
143+
"""Change the API instance"""
144+
self._api = api
145+
146+
def set_message(self, message):
147+
"""Change the message instance"""
148+
if message is not None and message.text is None:
149+
raise ValueError("The message must have some text")
150+
151+
self._message = message
152+
for entity in self._original_entities:
153+
entity.set_message(message)
154+
155+
# Refresh the calculated entities list
156+
self._entities = None
157+
158+
def serialize(self):
159+
"""Serialize this object"""
160+
result = []
161+
for entity in self._original_entities:
162+
result.append(entity.serialize())
163+
164+
return result
165+
166+
@_require_message
167+
def _calculate_entities(self):
168+
"""Calculate the correct list of entities"""
169+
# Return the cached result if possible; the cached result is nullified
170+
# when a new instance of Message is attached
171+
if self._entities is not None:
172+
return self._entities
173+
174+
offset = 0
175+
self._entities = []
176+
for entity in self._original_entities:
177+
# If there was some text before the current entity, add an extra
178+
# plaintext entity
179+
if offset < entity._offset:
180+
self._entities.append(ParsedTextEntity({
181+
"type": "plain",
182+
"offset": offset,
183+
"length": entity._offset - offset,
184+
}, self._api, self._message))
185+
186+
self._entities.append(entity)
187+
offset = entity._offset + entity._length
188+
189+
# Then add the last few bits as plaintext if they're present
190+
if offset < len(self._message.text):
191+
self._entities.append(ParsedTextEntity({
192+
"type": "plain",
193+
"offset": offset,
194+
"length": len(self._message.text) - offset,
195+
}, self._api, self._message))
196+
197+
return self._entities
198+
199+
def filter(self, *types, exclude=False):
200+
"""Get only some types of entities"""
201+
result = []
202+
for entity in self._calculate_entities():
203+
# If the entity type is in the allowed ones and exclude is False OR
204+
# if the entity type isn't in the allowed ones and exclude is True
205+
if (entity.type in types) ^ exclude:
206+
result.append(entity)
207+
208+
return result
209+
210+
# Provide a basic list-like interface; you can always mutate this object to
211+
# a list with list(self) if you need more advanced methods
212+
213+
def __iter__(self):
214+
return iter(self._calculate_entities())
215+
216+
def __getitem__(self, index):
217+
return self._calculate_entities()[index]
218+
219+
def __contains__(self, key):
220+
# This checks if a given type is in the entities list
221+
return key in (entity.type for entity in self._entities)
222+
223+
19224
class Message(BaseObject, mixins.MessageMixin):
20225
"""Telegram API representation of a message
21226
@@ -35,6 +240,7 @@ def from_(self):
35240
"chat": Chat,
36241
}
37242
optional = {
243+
"entities": ParsedText,
38244
"forward_from": User,
39245
"forward_date": int,
40246
"reply_to_message": _itself,
@@ -62,8 +268,22 @@ def from_(self):
62268
}
63269
replace_keys = {
64270
"from": "sender",
271+
"entities": "parsed_text",
65272
}
66273

274+
def __init__(self, data, api=None):
275+
super().__init__(data, api)
276+
277+
# Create the parsed_text instance even if there are no entities in the
278+
# current text
279+
if self.text is not None and self.parsed_text is None:
280+
self.parsed_text = ParsedText([], api, self)
281+
282+
# Be sure to set this as the Message instance in the parsed text
283+
# The instance is needed to calculate the content of each entity
284+
if self.parsed_text is not None:
285+
self.parsed_text.set_message(self)
286+
67287
@property
68288
@utils.deprecated("Message.new_chat_participant", "1.0",
69289
"Rename property to Message.new_chat_member")

docs/api/telegram.rst

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -595,6 +595,101 @@ about its business.
595595

596596
Now the method returns the sent message
597597

598+
.. py:class:: botogram.ParsedText
599+
600+
This class contains the parsed representation of the text of a received
601+
message. This allows you to work with the rich-formatted text the user sent,
602+
in addition to the plaintext provided by the :py:class:`~botogram.Message`
603+
class.
604+
605+
This class behaves as a list of :py:class:`~botogram.ParsedTextEntity`, so
606+
you can access its items as you would do with any other list (indexed
607+
access, iteration...), but it also provides some other utility tools.
608+
609+
.. versionadded:: 0.3
610+
611+
.. describe:: type in parsed
612+
613+
Check if a given entity type is contained in the message. For example,
614+
with the following code you can check if the user sent links in his
615+
message:
616+
617+
.. code-block:: python
618+
619+
if "url" in message.parsed_text:
620+
chat.send("Hey, you sent me a link!")
621+
622+
.. py:method:: filter(\*types, [exclude=False])
623+
624+
This method returns a list of all the
625+
:py:class:`~botogram.ParsedTextEntity` in a message of a given
626+
type. This allows you to get only some types of entities, and exclude the
627+
other ones in a simple way. You can also just **exclude** from the result
628+
the types you provide.
629+
630+
.. code-block:: python
631+
632+
# Get only the URLs
633+
urls = message.parsed_text.filter("url")
634+
635+
# Get usernames and hashtags
636+
usernames_hashtags = message.parsed_text.filter("mention", "hashtag")
637+
638+
# Exclude plaintext
639+
entities = message.parsed_text.filter("plaintext", exclude=True)
640+
641+
.. py:class:: botogram.ParsedTextEntity
642+
643+
This class represent a single entity contained in a text message.
644+
645+
.. versionadded:: 0.3
646+
647+
.. describe:: str(entity)
648+
649+
An handy alias for the :py:attr:`~botogram.ParsedTextEntity.text`
650+
attribute.
651+
652+
.. describe:: len(entity)
653+
654+
Return the length of the entity.
655+
656+
.. py:attribute:: type
657+
658+
The type of the entity. This can be one of those:
659+
660+
* **plain**: a plain string (with no formatting or special meaning)
661+
662+
* **mention**: a mention to another user (for example ``@pietroalbini``)
663+
664+
* **hashtag**: an hashtag (for example ``#pythonftw``)
665+
666+
* **bot_command**: a command sent to a bot (for example ``/help``)
667+
668+
* **url**: a textual URL (for example ``https://pietroalbini.io``)
669+
670+
* **email**: an email address (for example ``[email protected]``)
671+
672+
* **bold**: a bold-formatted text
673+
674+
* **italic**: an italic-formatted text
675+
676+
* **code**: a monospace-formatted text
677+
678+
* **pre**: a monospace-formatted block
679+
680+
* **text_link**: a link with a label
681+
682+
.. py:attribute:: text
683+
684+
Return the plaintext content of the entity. In pair with the type you can
685+
recreate the original formatting of the entity.
686+
687+
.. py:attribute:: url
688+
689+
The attached URL for the entity. This includes the raw URL for the
690+
**url** and **text_link** types, the ``telegram.me`` link for the
691+
**mention** type, and the ``mailto:`` link for **email** type.
692+
598693
.. py:class:: botogram.Message
599694
600695
This class represents messages received by and sent from your bot. Messages
@@ -658,6 +753,15 @@ about its business.
658753

659754
*This attribute can be None if it's not provided by Telegram.*
660755

756+
.. py:attribute:: parsed_text
757+
758+
The :py:class:`~botogram.ParsedText` representation of the text of this
759+
message.
760+
761+
*This attribute is None if the text attribute is also None.*
762+
763+
.. versionaddedd: 0.3
764+
661765
.. py:attribute:: audio
662766
663767
An :py:class:`~botogram.Audio` object, for when this message is an audio

docs/changelog.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,12 @@ New features
3434
* New method :py:meth:`botogram.Bot.edit_message`
3535
* New method :py:meth:`botogram.Bot.edit_caption`
3636

37+
* Added support for working with parsed text in a message:
38+
39+
* New attribute :py:attr:`botogram.Message.parsed_text`
40+
* New class :py:class:`botogram.ParsedText`
41+
* New class :py:class:`botogram.ParsedTextEntity`
42+
3743
* Added new attribute :py:attr:`botogram.Message.pinned_message`
3844
* Every method which sends something to a chat now returns the sent
3945
:py:class:`~botogram.Message`

0 commit comments

Comments
 (0)