Skip to content

Commit 5103c8f

Browse files
authored
fix: telegram multiformat (#89)
This PR adds: - support for messages that contain special objects such as links, phone, email, formatting. `message.content` can be in the form of a list which raises the error when checking for type string - support for chats with deleted accounts by adding the value "Deleted Account" if `from` field is empty. This is an issue when trying to call `map_ai_messages()` as it won't be able to determine the sender resulting in no `AIMessage` Reproducing the bug: 1. export a Telegram chat that contains links or other special formatting like bold text in JSON format 2. import and run the function `TelegramChatLoader()` - it will throw the error `Chat Loaders only support messages with content type string, got ...`
1 parent 84d2026 commit 5103c8f

File tree

4 files changed

+74
-0
lines changed

4 files changed

+74
-0
lines changed

libs/community/langchain_community/chat_loaders/telegram.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,8 @@ def _load_single_chat_session_json(file_path: str) -> ChatSession:
102102
text = message.get("text", "")
103103
timestamp = message.get("date", "")
104104
from_name = message.get("from", "")
105+
if from_name is None:
106+
from_name = "Deleted Account"
105107

106108
results.append(
107109
HumanMessage(

libs/community/langchain_community/chat_loaders/utils.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,14 @@ def merge_chat_runs_in_session(
2222
"""
2323
messages: List[BaseMessage] = []
2424
for message in chat_session["messages"]:
25+
if isinstance(message.content, list):
26+
text = ""
27+
for content in message.content:
28+
if isinstance(content, dict):
29+
text += content.get("text", None)
30+
else:
31+
text += content
32+
message.content = text
2533
if not isinstance(message.content, str):
2634
raise ValueError(
2735
"Chat Loaders only support messages with content type string, "

libs/community/tests/unit_tests/chat_loaders/data/telegram_chat_json/result.json

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,56 @@
6262
"text": "you will not trick me this time"
6363
}
6464
]
65+
},
66+
{
67+
"id": 5,
68+
"type": "message",
69+
"date": "2023-08-23T13:16:20",
70+
"date_unixtime": "1692821780",
71+
"from": "Batman & Robin",
72+
"from_id": "user6565661032",
73+
"text": [
74+
"this is bold text: ",
75+
{
76+
"type": "bold",
77+
"text": "BOLD TEXT"
78+
}
79+
],
80+
"text_entities": [
81+
{
82+
"type": "plain",
83+
"text": "this is bold text: "
84+
},
85+
{
86+
"type": "bold",
87+
"text": "BOLD TEXT"
88+
}
89+
]
90+
},
91+
{
92+
"id": 6,
93+
"type": "message",
94+
"date": "2023-08-23T13:17:10",
95+
"date_unixtime": "1692821830",
96+
"from": "Jimmeny Marvelton",
97+
"from_id": "user123450513",
98+
"text": [
99+
"this is an email: ",
100+
{
101+
"type": "email",
102+
103+
}
104+
],
105+
"text_entities": [
106+
{
107+
"type": "plain",
108+
"text": "this is an email: "
109+
},
110+
{
111+
"type": "email",
112+
113+
}
114+
]
65115
}
66116
]
67117
}

libs/community/tests/unit_tests/chat_loaders/test_telegram.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,20 @@ def _check_telegram_chat_loader(path: str) -> None:
6969
"events": [{"message_time": "23.08.2023 13:15:35 UTC-08:00"}],
7070
},
7171
),
72+
AIMessage(
73+
content="this is bold text: BOLD TEXT",
74+
additional_kwargs={
75+
"sender": "Batman & Robin",
76+
"events": [{"message_time": "23.08.2023 13:16:20 UTC-08:00"}],
77+
},
78+
),
79+
HumanMessage(
80+
content="this is an email: [email protected]",
81+
additional_kwargs={
82+
"sender": "Jimmeny Marvelton",
83+
"events": [{"message_time": "23.08.2023 13:17:10 UTC-08:00"}],
84+
},
85+
),
7286
]
7387
_assert_messages_are_equal(session["messages"], expected_content)
7488

0 commit comments

Comments
 (0)