Skip to content

Commit a9b8af8

Browse files
committed
(#322) Telegram: do not split parts of surrogate pairs during flattening
1 parent 1de0fec commit a9b8af8

File tree

2 files changed

+36
-7
lines changed

2 files changed

+36
-7
lines changed

Emulsion.Telegram/Funogram.fs

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// SPDX-FileCopyrightText: 2024 Emulsion contributors <https://github.com/codingteam/emulsion>
1+
// SPDX-FileCopyrightText: 2025 Emulsion contributors <https://github.com/codingteam/emulsion>
22
//
33
// SPDX-License-Identifier: MIT
44

@@ -105,16 +105,30 @@ module MessageConverter =
105105
pos <- linkEndOffset
106106
result.Append(text.Substring(pos, text.Length - pos)).ToString()
107107

108-
let private applyLimits limits text =
108+
let private applyLimits limits (text: string) =
109109
let applyMessageLengthLimit (original: {| text: string; wasLimited: bool |}) =
110110
match limits.messageLengthLimit with
111111
| None -> original
112112
| Some limit when original.text.Length <= limit -> original
113113
| Some limit ->
114-
let newText = original.text.Substring(0,
115-
Math.Clamp(limit - limits.dataRedactedMessage.Length,
116-
0,
117-
original.text.Length))
114+
assert (limit >= limits.dataRedactedMessage.Length)
115+
116+
let mutable newTextLength = Math.Clamp(
117+
limit - limits.dataRedactedMessage.Length,
118+
0,
119+
original.text.Length
120+
)
121+
122+
// We should never split surrogate pairs present in the initial message. So, if the message ends with a
123+
// high part of such a pair, cut it more, to remove the part of the pair.
124+
//
125+
// Technically, this will also strip a part of an invalid Unicode sequence if the message originally
126+
// contained such an orphan part of the pair without even following it by a high surrogate. But we don't
127+
// care.
128+
if newTextLength > 0 && Char.IsHighSurrogate(text[newTextLength - 1]) then
129+
newTextLength <- newTextLength - 1
130+
131+
let newText = original.text.Substring(0, newTextLength)
118132
{| text = newText; wasLimited = true |}
119133

120134
let applyLineLimit (original: {| text: string; wasLimited: bool |}) =

Emulsion.Tests/Telegram/FunogramTests.fs

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// SPDX-FileCopyrightText: 2024 Emulsion contributors <https://github.com/codingteam/emulsion>
1+
// SPDX-FileCopyrightText: 2025 Emulsion contributors <https://github.com/codingteam/emulsion>
22
//
33
// SPDX-License-Identifier: MIT
44

@@ -736,6 +736,21 @@ module FlattenMessageTests =
736736
flattenMessage replyMessage
737737
)
738738

739+
[<Fact>]
740+
let ``Flattening should not split surrogate pairs``() =
741+
let originalMessage = authoredTelegramMessage "@originalUser" "🐙🐙🐙🐙"
742+
let limit = 6
743+
let replyMessage = authoredTelegramReplyMessage "@replyingUser" "Reply text" originalMessage.main
744+
let flattener = MessageConverter.flatten {
745+
MessageConverter.DefaultQuoteSettings with
746+
limits.messageLengthLimit = Some limit
747+
}
748+
let flattened = flattener replyMessage
749+
Assert.Equal(
750+
Authored { author = "@replyingUser"; text = ">> <@originalUser> 🐙[…]\n\nReply text" },
751+
flattened
752+
)
753+
739754
[<Fact>]
740755
let flattenReplyEventMessage() =
741756
let originalMessage = eventTelegramMessage "@originalUser has entered the chat"

0 commit comments

Comments
 (0)