Skip to content

Commit 3e08674

Browse files
author
Pietro Albini
committed
Merge branch 'feature/parsed-text'
2 parents 0fb8f1f + ff7273b commit 3e08674

File tree

4 files changed

+602
-0
lines changed

4 files changed

+602
-0
lines changed

botogram/objects/messages.py

Lines changed: 257 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,248 @@
1616
Location
1717

1818

19+
def _require_message(func):
20+
"""Decorator which forces the object to have an attached message"""
21+
@utils.wraps(func)
22+
def __(self, *args, **kwargs):
23+
if not hasattr(self, "_message") or self._message is None:
24+
raise RuntimeError("A message must be attached to this object")
25+
return func(self, *args, **kwargs)
26+
return __
27+
28+
29+
class ParsedTextEntity(BaseObject):
30+
"""Telegram API representation of an entity in a text message
31+
32+
This was originally called MessageEntity by Telegram
33+
https://core.telegram.org/bots/api#messageentity
34+
"""
35+
36+
required = {
37+
"type": str,
38+
"offset": int,
39+
"length": int,
40+
}
41+
optional = {
42+
"url": str,
43+
}
44+
replace_keys = {
45+
"url": "_url", # Dynamically implemented
46+
"type": "_type", # Dynamically implemented
47+
48+
# Private attributes, use the ``text`` one
49+
"offset": "_offset",
50+
"length": "_length",
51+
}
52+
53+
# Bring some sanity to the Bot API
54+
replace_types = {
55+
"bot_command": "command",
56+
"text_link": "link",
57+
"url": "link",
58+
}
59+
replace_types_inverse = {
60+
"command": "bot_command",
61+
"link": "text_link",
62+
}
63+
64+
def __init__(self, data, api=None, message=None):
65+
super().__init__(data, api)
66+
67+
self._message = message
68+
69+
def __str__(self):
70+
return self.text
71+
72+
def __repr__(self):
73+
if self._message is not None:
74+
return '<ParsedTextEntity %s: "%s">' % (self.type, self.text)
75+
else:
76+
return '<ParsedTextEntity %s from %s to %s>' % (
77+
self.type,
78+
self._offset,
79+
self._offset + self._length
80+
)
81+
82+
def __len__(self):
83+
return self._length
84+
85+
def set_message(self, message):
86+
"""Set the message instance related to this object"""
87+
self._message = message
88+
89+
@property
90+
def type(self):
91+
"""Get the type of the entity"""
92+
# Bring some sanity to the Bot API
93+
if self._type in self.replace_types:
94+
return self.replace_types[self._type]
95+
return self._type
96+
97+
@type.setter
98+
def type(self, value):
99+
"""Set the type of the entity"""
100+
# Special check for link, because two original types points to it
101+
if value == "link":
102+
# If the URL is not set or it's the same as the text, then it's a
103+
# normal URL, else it has a label
104+
if self.text == self._url or self._url is None:
105+
self._type = "url"
106+
else:
107+
self._type = "text_link"
108+
return
109+
110+
if value in self.replace_types_inverse:
111+
self._type = self.replace_types_inverse[value]
112+
else:
113+
self._type = value
114+
115+
@property
116+
@_require_message
117+
def text(self):
118+
"""Get the text of the message"""
119+
if self._message.text is None:
120+
raise ValueError("The message must have a text")
121+
122+
start = self._offset
123+
stop = start + self._length
124+
125+
if stop > len(self._message.text):
126+
raise ValueError("The message is too short!")
127+
128+
return self._message.text[start:stop]
129+
130+
@property
131+
@_require_message
132+
def url(self):
133+
"""Get the URL attached to the message"""
134+
# Use the provided if available
135+
if self._url is not None:
136+
return self._url
137+
138+
if self.type == "link":
139+
# Standard URLs
140+
return self.text
141+
elif self.type == "mention":
142+
# telegram.me URLs
143+
return "https://telegram.me/%s" % self.text[1:]
144+
elif self.type == "email":
145+
# mailto: URL
146+
return "mailto:%s" % self.text
147+
else:
148+
# Sorry!
149+
return None
150+
151+
class ParsedText:
152+
"""Collection of ParsedTextEntity.
153+
154+
This is a list-like object, and mimics the List<MessageEntity> Telegram
155+
object, but increases its functionalities.
156+
"""
157+
158+
def __init__(self, data, api=None, message=None):
159+
self._api = api
160+
# Accept only list of entites
161+
if not isinstance(data, list):
162+
raise ValueError("You must provide a list of ParsedTextEntity")
163+
164+
# Create ParsedTextEntity instances from the data
165+
self._original_entities = []
166+
for entity in data:
167+
parsed = ParsedTextEntity(entity, api, message)
168+
self._original_entities.append(parsed)
169+
170+
# Original entities are separated from the exposed entities because
171+
# plaintext entities are calculated and added to the exposend entities
172+
self._entities = None
173+
174+
self.set_message(message)
175+
176+
def __repr__(self):
177+
return '<ParsedText %s>' % repr(self._calculate_entities())
178+
179+
def set_api(self, api):
180+
"""Change the API instance"""
181+
self._api = api
182+
183+
def set_message(self, message):
184+
"""Change the message instance"""
185+
if message is not None and message.text is None:
186+
raise ValueError("The message must have some text")
187+
188+
self._message = message
189+
for entity in self._original_entities:
190+
entity.set_message(message)
191+
192+
# Refresh the calculated entities list
193+
self._entities = None
194+
195+
def serialize(self):
196+
"""Serialize this object"""
197+
result = []
198+
for entity in self._original_entities:
199+
result.append(entity.serialize())
200+
201+
return result
202+
203+
@_require_message
204+
def _calculate_entities(self):
205+
"""Calculate the correct list of entities"""
206+
# Return the cached result if possible; the cached result is nullified
207+
# when a new instance of Message is attached
208+
if self._entities is not None:
209+
return self._entities
210+
211+
offset = 0
212+
self._entities = []
213+
for entity in self._original_entities:
214+
# If there was some text before the current entity, add an extra
215+
# plaintext entity
216+
if offset < entity._offset:
217+
self._entities.append(ParsedTextEntity({
218+
"type": "plain",
219+
"offset": offset,
220+
"length": entity._offset - offset,
221+
}, self._api, self._message))
222+
223+
self._entities.append(entity)
224+
offset = entity._offset + entity._length
225+
226+
# Then add the last few bits as plaintext if they're present
227+
if offset < len(self._message.text):
228+
self._entities.append(ParsedTextEntity({
229+
"type": "plain",
230+
"offset": offset,
231+
"length": len(self._message.text) - offset,
232+
}, self._api, self._message))
233+
234+
return self._entities
235+
236+
def filter(self, *types, exclude=False):
237+
"""Get only some types of entities"""
238+
result = []
239+
for entity in self._calculate_entities():
240+
# If the entity type is in the allowed ones and exclude is False OR
241+
# if the entity type isn't in the allowed ones and exclude is True
242+
if (entity.type in types) ^ exclude:
243+
result.append(entity)
244+
245+
return result
246+
247+
# Provide a basic list-like interface; you can always mutate this object to
248+
# a list with list(self) if you need more advanced methods
249+
250+
def __iter__(self):
251+
return iter(self._calculate_entities())
252+
253+
def __getitem__(self, index):
254+
return self._calculate_entities()[index]
255+
256+
def __contains__(self, key):
257+
# This checks if a given type is in the entities list
258+
return key in (entity.type for entity in self._entities)
259+
260+
19261
class Message(BaseObject, mixins.MessageMixin):
20262
"""Telegram API representation of a message
21263
@@ -35,6 +277,7 @@ def from_(self):
35277
}
36278
optional = {
37279
"from": User,
280+
"entities": ParsedText,
38281
"forward_from": User,
39282
"forward_date": int,
40283
"reply_to_message": _itself,
@@ -62,8 +305,22 @@ def from_(self):
62305
}
63306
replace_keys = {
64307
"from": "sender",
308+
"entities": "parsed_text",
65309
}
66310

311+
def __init__(self, data, api=None):
312+
super().__init__(data, api)
313+
314+
# Create the parsed_text instance even if there are no entities in the
315+
# current text
316+
if self.text is not None and self.parsed_text is None:
317+
self.parsed_text = ParsedText([], api, self)
318+
319+
# Be sure to set this as the Message instance in the parsed text
320+
# The instance is needed to calculate the content of each entity
321+
if self.parsed_text is not None:
322+
self.parsed_text.set_message(self)
323+
67324
@property
68325
@utils.deprecated("Message.new_chat_participant", "1.0",
69326
"Rename property to Message.new_chat_member")

docs/api/telegram.rst

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -595,6 +595,99 @@ about its business.
595595

596596
Now the method returns the sent message
597597

598+
.. py:class:: botogram.ParsedText
599+
600+
This class contains the parsed representation of the text of a received
601+
message. This allows you to work with the rich-formatted text the user sent,
602+
in addition to the plaintext provided by the :py:class:`~botogram.Message`
603+
class.
604+
605+
This class behaves as a list of :py:class:`~botogram.ParsedTextEntity`, so
606+
you can access its items as you would do with any other list (indexed
607+
access, iteration...), but it also provides some other utility tools.
608+
609+
.. versionadded:: 0.3
610+
611+
.. describe:: type in parsed
612+
613+
Check if a given entity type is contained in the message. For example,
614+
with the following code you can check if the user sent links in his
615+
message:
616+
617+
.. code-block:: python
618+
619+
if "url" in message.parsed_text:
620+
chat.send("Hey, you sent me a link!")
621+
622+
.. py:method:: filter(\*types, [exclude=False])
623+
624+
This method returns a list of all the
625+
:py:class:`~botogram.ParsedTextEntity` in a message of a given
626+
type. This allows you to get only some types of entities, and exclude the
627+
other ones in a simple way. You can also just **exclude** from the result
628+
the types you provide.
629+
630+
.. code-block:: python
631+
632+
# Get only the URLs
633+
urls = message.parsed_text.filter("url")
634+
635+
# Get usernames and hashtags
636+
usernames_hashtags = message.parsed_text.filter("mention", "hashtag")
637+
638+
# Exclude plaintext
639+
entities = message.parsed_text.filter("plaintext", exclude=True)
640+
641+
.. py:class:: botogram.ParsedTextEntity
642+
643+
This class represent a single entity contained in a text message.
644+
645+
.. versionadded:: 0.3
646+
647+
.. describe:: str(entity)
648+
649+
An handy alias for the :py:attr:`~botogram.ParsedTextEntity.text`
650+
attribute.
651+
652+
.. describe:: len(entity)
653+
654+
Return the length of the entity.
655+
656+
.. py:attribute:: type
657+
658+
The type of the entity. This can be one of those:
659+
660+
* **plain**: a plain string (with no formatting or special meaning)
661+
662+
* **mention**: a mention to another user (for example ``@pietroalbini``)
663+
664+
* **hashtag**: an hashtag (for example ``#pythonftw``)
665+
666+
* **command**: a command sent to a bot (for example ``/help``)
667+
668+
* **url**: a link (the text can contain its label)
669+
670+
* **email**: an email address (for example ``[email protected]``)
671+
672+
* **bold**: a bold-formatted text
673+
674+
* **italic**: an italic-formatted text
675+
676+
* **code**: a monospace-formatted text
677+
678+
* **pre**: a monospace-formatted block
679+
680+
.. py:attribute:: text
681+
682+
Return the plaintext content of the entity. In pair with the type you can
683+
recreate the original formatting of the entity.
684+
685+
.. py:attribute:: url
686+
687+
The attached URL for the entity. This includes the raw URL for the
688+
**url** and **text_link** types, the ``telegram.me`` link for the
689+
**mention** type, and the ``mailto:`` link for **email** type.
690+
598691
.. py:class:: botogram.Message
599692
600693
This class represents messages received by and sent from your bot. Messages
@@ -658,6 +751,15 @@ about its business.
658751

659752
*This attribute can be None if it's not provided by Telegram.*
660753

754+
.. py:attribute:: parsed_text
755+
756+
The :py:class:`~botogram.ParsedText` representation of the text of this
757+
message.
758+
759+
*This attribute is None if the text attribute is also None.*
760+
761+
.. versionaddedd: 0.3
762+
661763
.. py:attribute:: audio
662764
663765
An :py:class:`~botogram.Audio` object, for when this message is an audio

0 commit comments

Comments
 (0)