|
15 | 15 | import langbot_plugin.api.entities.builtin.platform.entities as platform_entities |
16 | 16 |
|
17 | 17 |
|
| 18 | +def split_string_by_bytes(text, limit=2048, encoding='utf-8'): |
| 19 | + """ |
| 20 | + Splits a string into a list of strings, where each part is at most 'limit' bytes. |
| 21 | + |
| 22 | + Args: |
| 23 | + text (str): The original string to split. |
| 24 | + limit (int): The maximum byte size for each split part. |
| 25 | + encoding (str): The encoding to use (default is 'utf-8'). |
| 26 | + |
| 27 | + Returns: |
| 28 | + list: A list of split strings. |
| 29 | + """ |
| 30 | + # 1. Encode the entire string into bytes |
| 31 | + bytes_data = text.encode(encoding) |
| 32 | + total_len = len(bytes_data) |
| 33 | + |
| 34 | + parts = [] |
| 35 | + start = 0 |
| 36 | + |
| 37 | + while start < total_len: |
| 38 | + # 2. Determine the end index for the current chunk |
| 39 | + # It shouldn't exceed the total length |
| 40 | + end = min(start + limit, total_len) |
| 41 | + |
| 42 | + # 3. Slice the byte array |
| 43 | + chunk = bytes_data[start:end] |
| 44 | + |
| 45 | + # 4. Attempt to decode the chunk |
| 46 | + # Use errors='ignore' to drop any partial bytes at the end of the chunk |
| 47 | + # (e.g., if a 3-byte character was cut after the 2nd byte) |
| 48 | + part_str = chunk.decode(encoding, errors='ignore') |
| 49 | + |
| 50 | + # 5. Calculate the actual byte length of the successfully decoded string |
| 51 | + # This tells us exactly where the valid character boundary ended |
| 52 | + part_bytes = part_str.encode(encoding) |
| 53 | + part_len = len(part_bytes) |
| 54 | + |
| 55 | + # Safety check: Prevent infinite loop if limit is too small (e.g., limit=1 for a Chinese char) |
| 56 | + if part_len == 0 and end < total_len: |
| 57 | + # Force advance by 1 byte to consume the un-decodable byte or raise error |
| 58 | + # Here we just treat it as a part to avoid stuck loops, though it might be invalid |
| 59 | + start += 1 |
| 60 | + continue |
| 61 | + |
| 62 | + parts.append(part_str) |
| 63 | + |
| 64 | + # 6. Move the start pointer by the actual length consumed |
| 65 | + start += part_len |
| 66 | + |
| 67 | + return parts |
| 68 | + |
| 69 | + |
18 | 70 | class WecomMessageConverter(abstract_platform_adapter.AbstractMessageConverter): |
19 | 71 | @staticmethod |
20 | 72 | async def yiri2target(message_chain: platform_message.MessageChain, bot: WecomClient): |
21 | 73 | content_list = [] |
22 | 74 |
|
23 | 75 | for msg in message_chain: |
24 | 76 | if type(msg) is platform_message.Plain: |
25 | | - content_list.append( |
| 77 | + chunks = split_string_by_bytes(msg.text) |
| 78 | + content_list.extend([ |
26 | 79 | { |
27 | 80 | 'type': 'text', |
28 | | - 'content': msg.text, |
| 81 | + 'content': chunk, |
29 | 82 | } |
30 | | - ) |
| 83 | + for chunk in chunks |
| 84 | + ]) |
31 | 85 | elif type(msg) is platform_message.Image: |
32 | 86 | content_list.append( |
33 | 87 | { |
|
0 commit comments