Skip to content

Commit 1cd5866

Browse files
njzjzOracle Public Cloud User
andauthored
fix: split Wecom messages exceeding 2048-byte limit (#1901)
Co-authored-by: Oracle Public Cloud User <opc@arm1.subnet.vcn.oraclevcn.com>
1 parent 45bedbe commit 1cd5866

File tree

2 files changed

+58
-4
lines changed

2 files changed

+58
-4
lines changed

src/langbot/libs/wecom_api/api.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,7 @@ async def send_private_msg(self, user_id: str, agent_id: int, content: str):
197197
self.access_token = await self.get_access_token(self.secret)
198198

199199
url = self.base_url + '/message/send?access_token=' + self.access_token
200-
async with httpx.AsyncClient() as client:
200+
async with httpx.AsyncClient(timeout=None) as client:
201201
params = {
202202
'touser': user_id,
203203
'msgtype': 'text',

src/langbot/pkg/platform/sources/wecom.py

Lines changed: 57 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,19 +15,73 @@
1515
import langbot_plugin.api.entities.builtin.platform.entities as platform_entities
1616

1717

18+
def split_string_by_bytes(text, limit=2048, encoding='utf-8'):
19+
"""
20+
Splits a string into a list of strings, where each part is at most 'limit' bytes.
21+
22+
Args:
23+
text (str): The original string to split.
24+
limit (int): The maximum byte size for each split part.
25+
encoding (str): The encoding to use (default is 'utf-8').
26+
27+
Returns:
28+
list: A list of split strings.
29+
"""
30+
# 1. Encode the entire string into bytes
31+
bytes_data = text.encode(encoding)
32+
total_len = len(bytes_data)
33+
34+
parts = []
35+
start = 0
36+
37+
while start < total_len:
38+
# 2. Determine the end index for the current chunk
39+
# It shouldn't exceed the total length
40+
end = min(start + limit, total_len)
41+
42+
# 3. Slice the byte array
43+
chunk = bytes_data[start:end]
44+
45+
# 4. Attempt to decode the chunk
46+
# Use errors='ignore' to drop any partial bytes at the end of the chunk
47+
# (e.g., if a 3-byte character was cut after the 2nd byte)
48+
part_str = chunk.decode(encoding, errors='ignore')
49+
50+
# 5. Calculate the actual byte length of the successfully decoded string
51+
# This tells us exactly where the valid character boundary ended
52+
part_bytes = part_str.encode(encoding)
53+
part_len = len(part_bytes)
54+
55+
# Safety check: Prevent infinite loop if limit is too small (e.g., limit=1 for a Chinese char)
56+
if part_len == 0 and end < total_len:
57+
# Force advance by 1 byte to consume the un-decodable byte or raise error
58+
# Here we just treat it as a part to avoid stuck loops, though it might be invalid
59+
start += 1
60+
continue
61+
62+
parts.append(part_str)
63+
64+
# 6. Move the start pointer by the actual length consumed
65+
start += part_len
66+
67+
return parts
68+
69+
1870
class WecomMessageConverter(abstract_platform_adapter.AbstractMessageConverter):
1971
@staticmethod
2072
async def yiri2target(message_chain: platform_message.MessageChain, bot: WecomClient):
2173
content_list = []
2274

2375
for msg in message_chain:
2476
if type(msg) is platform_message.Plain:
25-
content_list.append(
77+
chunks = split_string_by_bytes(msg.text)
78+
content_list.extend([
2679
{
2780
'type': 'text',
28-
'content': msg.text,
81+
'content': chunk,
2982
}
30-
)
83+
for chunk in chunks
84+
])
3185
elif type(msg) is platform_message.Image:
3286
content_list.append(
3387
{

0 commit comments

Comments
 (0)