fix: split Wecom messages exceeding 2048-byte limit (#1901)

njzjz · Oracle Public Cloud User · web-flow · commit 1cd586634dff · 2026-01-05T15:04:46.000+08:00
Co-authored-by: Oracle Public Cloud User &lt;opc@arm1.subnet.vcn.oraclevcn.com&gt;
diff --git a/src/langbot/libs/wecom_api/api.py b/src/langbot/libs/wecom_api/api.py
@@ -197,7 +197,7 @@ async def send_private_msg(self, user_id: str, agent_id: int, content: str):
             self.access_token = await self.get_access_token(self.secret)
 
         url = self.base_url + '/message/send?access_token=' + self.access_token
-        async with httpx.AsyncClient() as client:
+        async with httpx.AsyncClient(timeout=None) as client:
             params = {
                 'touser': user_id,
                 'msgtype': 'text',
diff --git a/src/langbot/pkg/platform/sources/wecom.py b/src/langbot/pkg/platform/sources/wecom.py
@@ -15,19 +15,73 @@
 import langbot_plugin.api.entities.builtin.platform.entities as platform_entities
 
 
+def split_string_by_bytes(text, limit=2048, encoding='utf-8'):
+    """
+    Splits a string into a list of strings, where each part is at most 'limit' bytes.
+    
+    Args:
+        text (str): The original string to split.
+        limit (int): The maximum byte size for each split part.
+        encoding (str): The encoding to use (default is 'utf-8').
+        
+    Returns:
+        list: A list of split strings.
+    """
+    # 1. Encode the entire string into bytes
+    bytes_data = text.encode(encoding)
+    total_len = len(bytes_data)
+    
+    parts = []
+    start = 0
+    
+    while start < total_len:
+        # 2. Determine the end index for the current chunk
+        # It shouldn't exceed the total length
+        end = min(start + limit, total_len)
+        
+        # 3. Slice the byte array
+        chunk = bytes_data[start:end]
+        
+        # 4. Attempt to decode the chunk
+        # Use errors='ignore' to drop any partial bytes at the end of the chunk
+        # (e.g., if a 3-byte character was cut after the 2nd byte)
+        part_str = chunk.decode(encoding, errors='ignore')
+        
+        # 5. Calculate the actual byte length of the successfully decoded string
+        # This tells us exactly where the valid character boundary ended
+        part_bytes = part_str.encode(encoding)
+        part_len = len(part_bytes)
+        
+        # Safety check: Prevent infinite loop if limit is too small (e.g., limit=1 for a Chinese char)
+        if part_len == 0 and end < total_len:
+            # Force advance by 1 byte to consume the un-decodable byte or raise error
+            # Here we just treat it as a part to avoid stuck loops, though it might be invalid
+            start += 1 
+            continue
+
+        parts.append(part_str)
+        
+        # 6. Move the start pointer by the actual length consumed
+        start += part_len
+        
+    return parts
+
+
 class WecomMessageConverter(abstract_platform_adapter.AbstractMessageConverter):
     @staticmethod
     async def yiri2target(message_chain: platform_message.MessageChain, bot: WecomClient):
         content_list = []
 
         for msg in message_chain:
             if type(msg) is platform_message.Plain:
-                content_list.append(
+                chunks = split_string_by_bytes(msg.text)
+                content_list.extend([
                     {
                         'type': 'text',
-                        'content': msg.text,
+                        'content': chunk,
                     }
-                )
+                    for chunk in chunks
+                ])
             elif type(msg) is platform_message.Image:
                 content_list.append(
                     {