Skip to content

Commit fe89d6b

Browse files
committed
Add forward spam detection via quote text and external reply photo OCR
Spam forwarded as "reply to external message" was bypassing ML because the bot only analyzed message.Text/Caption and message.Photo. Now we extract text from message.Quote and run OCR on message.ExternalReply.Photo, prepending the result before the original message text so the ML pipeline sees the full content. Feature-flagged via FORWARD_SPAM_DETECTION_ENABLED (default: true). Made-with: Cursor
1 parent 1dad254 commit fe89d6b

File tree

6 files changed

+175
-4
lines changed

6 files changed

+175
-4
lines changed

src/VahterBanBot.Tests/ContainerTestBase.fs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,12 +128,15 @@ type VahterTestContainers(mlEnabled: bool) =
128128
.WithEnvironment("REACTION_SPAM_ENABLED", "true")
129129
.WithEnvironment("REACTION_SPAM_MIN_MESSAGES", "3")
130130
.WithEnvironment("REACTION_SPAM_MAX_REACTIONS", "5")
131+
// Forward spam detection
132+
.WithEnvironment("FORWARD_SPAM_DETECTION_ENABLED", "true")
131133
.Build()
132134
else
133135
builder
134136
.WithEnvironment("ML_ENABLED", "false")
135137
.WithEnvironment("OCR_ENABLED", "false")
136138
.WithEnvironment("REACTION_SPAM_ENABLED", "false")
139+
.WithEnvironment("FORWARD_SPAM_DETECTION_ENABLED", "false")
137140
.Build()
138141

139142
let startContainers() = task {

src/VahterBanBot.Tests/MLBanTests.fs

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -494,4 +494,70 @@ type MLBanTests(fixture: MlEnabledVahterTestContainers, _unused: MlAwaitFixture)
494494
Assert.True(userBanned, "User should be auto-banned after reaching karma threshold via soft spam")
495495
}
496496

497+
[<Fact>]
498+
let ``Message with spam in quote text triggers auto-delete`` () = task {
499+
let msgUpdate = Tg.quickMsg(
500+
chat = fixture.ChatsToMonitor[0],
501+
text = "hello",
502+
quote = Tg.textQuote("2222222")
503+
)
504+
let! _ = fixture.SendMessage msgUpdate
505+
506+
let! msgBanned = fixture.MessageIsAutoDeleted msgUpdate.Message
507+
Assert.True msgBanned
508+
}
509+
510+
[<Fact>]
511+
let ``Message with non-spam quote text does NOT trigger auto-delete`` () = task {
512+
let msgUpdate = Tg.quickMsg(
513+
chat = fixture.ChatsToMonitor[0],
514+
text = "hello",
515+
quote = Tg.textQuote("b")
516+
)
517+
let! _ = fixture.SendMessage msgUpdate
518+
519+
let! msgBanned = fixture.MessageIsAutoDeleted msgUpdate.Message
520+
Assert.False msgBanned
521+
}
522+
523+
[<Fact>]
524+
let ``Quote text is prepended to message text`` () = task {
525+
let msgUpdate = Tg.quickMsg(
526+
chat = fixture.ChatsToMonitor[0],
527+
text = "hello",
528+
quote = Tg.textQuote("b")
529+
)
530+
let! _ = fixture.SendMessage msgUpdate
531+
532+
let! dbMsg = fixture.TryGetDbMessage msgUpdate.Message
533+
Assert.True dbMsg.IsSome
534+
Assert.Equal("b\nhello", dbMsg.Value.text)
535+
}
536+
537+
[<Fact>]
538+
let ``Spam in external reply photo triggers auto-delete via OCR`` () = task {
539+
let msgUpdate = Tg.quickMsg(
540+
chat = fixture.ChatsToMonitor[0],
541+
text = "hello",
542+
externalReply = Tg.externalReply(photos = [| Tg.spamPhoto |])
543+
)
544+
let! _ = fixture.SendMessage msgUpdate
545+
546+
let! msgBanned = fixture.MessageIsAutoDeleted msgUpdate.Message
547+
Assert.True msgBanned
548+
}
549+
550+
[<Fact>]
551+
let ``Ham in external reply photo does NOT trigger auto-delete`` () = task {
552+
let msgUpdate = Tg.quickMsg(
553+
chat = fixture.ChatsToMonitor[0],
554+
text = "hello",
555+
externalReply = Tg.externalReply(photos = [| Tg.hamPhoto |])
556+
)
557+
let! _ = fixture.SendMessage msgUpdate
558+
559+
let! msgBanned = fixture.MessageIsAutoDeleted msgUpdate.Message
560+
Assert.False msgBanned
561+
}
562+
497563
interface IClassFixture<MlAwaitFixture>

src/VahterBanBot.Tests/TgMessageUtils.fs

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,16 @@ type Tg() =
6060
static member emoji(?offset: int) = MessageEntity(Type = MessageEntityType.CustomEmoji, Offset = defaultArg offset 0 , Length = 1)
6161
static member emojies(n: int) = Array.init n (fun i -> Tg.emoji i)
6262

63-
static member quickMsg (?text: string, ?chat: Chat, ?from: User, ?date: DateTime, ?callback: CallbackQuery, ?caption: string, ?editedText: string, ?entities: MessageEntity[], ?photos: PhotoSize[], ?isAutomaticForward: bool, ?senderChat: Chat) =
63+
static member textQuote(text: string) =
64+
TextQuote(Text = text, Position = 0)
65+
66+
static member externalReply(?photos: PhotoSize[], ?chat: Chat) =
67+
ExternalReplyInfo(
68+
Photo = (photos |> Option.defaultValue null),
69+
Chat = (chat |> Option.defaultValue null)
70+
)
71+
72+
static member quickMsg (?text: string, ?chat: Chat, ?from: User, ?date: DateTime, ?callback: CallbackQuery, ?caption: string, ?editedText: string, ?entities: MessageEntity[], ?photos: PhotoSize[], ?isAutomaticForward: bool, ?senderChat: Chat, ?quote: TextQuote, ?externalReply: ExternalReplyInfo) =
6473
let updateId = next()
6574
let msgId = next()
6675
Update(
@@ -77,7 +86,9 @@ type Tg() =
7786
Entities = (entities |> Option.defaultValue null),
7887
Photo = (photos |> Option.defaultValue null),
7988
IsAutomaticForward = (isAutomaticForward |> Option.defaultValue false),
80-
SenderChat = (senderChat |> Option.defaultValue null)
89+
SenderChat = (senderChat |> Option.defaultValue null),
90+
Quote = (quote |> Option.defaultValue null),
91+
ExternalReply = (externalReply |> Option.defaultValue null)
8192
),
8293
EditedMessage =
8394
if editedText |> Option.isSome then

src/VahterBanBot/Bot.fs

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -889,6 +889,92 @@ let onMessage
889889
do! justMessage botUser botClient botConfig logger ml message
890890
}
891891

892+
let private ocrPhotos
893+
(botClient: ITelegramBotClient)
894+
(botConfig: BotConfiguration)
895+
(computerVision: IComputerVision)
896+
(logger: ILogger)
897+
(photos: PhotoSize array)
898+
(messageId: int) = task {
899+
let candidatePhotos =
900+
photos
901+
|> Array.filter (fun p ->
902+
let size = int64 p.FileSize
903+
size = 0L || size <= botConfig.OcrMaxFileSizeBytes)
904+
905+
if candidatePhotos.Length = 0 then
906+
logger.LogWarning(
907+
"No photos under OCR limit of {LimitBytes} bytes for message {MessageId}",
908+
botConfig.OcrMaxFileSizeBytes,
909+
messageId)
910+
return None
911+
else
912+
let largestPhoto =
913+
candidatePhotos
914+
|> Seq.filter (fun p -> p.FileSize.HasValue)
915+
|> Seq.maxBy (fun p -> p.FileSize.Value)
916+
917+
let! file = botClient.GetFile(largestPhoto.FileId)
918+
919+
if String.IsNullOrWhiteSpace file.FilePath then
920+
logger.LogWarning("Failed to resolve file path for photo {PhotoId}", largestPhoto.FileId)
921+
return None
922+
else
923+
let fileUrl = $"https://api.telegram.org/file/bot{botConfig.BotToken}/{file.FilePath}"
924+
let! ocrText = computerVision.TextFromImageUrl fileUrl
925+
if String.IsNullOrWhiteSpace ocrText then
926+
return None
927+
else
928+
return Some ocrText
929+
}
930+
931+
let tryEnrichMessageWithForwardedContent
932+
(botClient: ITelegramBotClient)
933+
(botConfig: BotConfiguration)
934+
(computerVision: IComputerVision)
935+
(logger: ILogger)
936+
(update: Update) = task {
937+
if botConfig.ForwardSpamDetectionEnabled then
938+
let message = update.EditedOrMessage
939+
if not (isNull message) then
940+
use activity = botActivity.StartActivity("forwardedContentEnrichment")
941+
try
942+
let mutable forwardedText: string = null
943+
944+
if not (isNull message.Quote)
945+
&& not (String.IsNullOrWhiteSpace message.Quote.Text) then
946+
forwardedText <- message.Quote.Text
947+
%activity.SetTag("quoteTextLength", message.Quote.Text.Length)
948+
949+
if botConfig.OcrEnabled
950+
&& not (isNull message.ExternalReply)
951+
&& not (isNull message.ExternalReply.Photo)
952+
&& message.ExternalReply.Photo.Length > 0 then
953+
let! ocrText = ocrPhotos botClient botConfig computerVision logger message.ExternalReply.Photo message.MessageId
954+
match ocrText with
955+
| Some text ->
956+
forwardedText <-
957+
if isNull forwardedText then text
958+
else $"{forwardedText}\n{text}"
959+
%activity.SetTag("externalReplyOcrLength", text.Length)
960+
| None -> ()
961+
962+
if not (String.IsNullOrWhiteSpace forwardedText) then
963+
let baseText = message.TextOrCaption
964+
let enrichedText =
965+
if String.IsNullOrWhiteSpace baseText then forwardedText
966+
else $"{forwardedText}\n{baseText}"
967+
logger.LogDebug(
968+
"Enriched message {MessageId} with forwarded content of length {ForwardedLength}",
969+
message.MessageId,
970+
forwardedText.Length
971+
)
972+
message.Text <- enrichedText
973+
%activity.SetTag("enrichedTextLength", enrichedText.Length)
974+
with ex ->
975+
logger.LogError(ex, "Failed to process forwarded content for message {MessageId}", update.EditedOrMessage.MessageId)
976+
}
977+
892978
let tryEnrichMessageWithOcr
893979
(botClient: ITelegramBotClient)
894980
(botConfig: BotConfiguration)
@@ -1222,6 +1308,7 @@ let onUpdate
12221308
elif update.MessageReaction <> null then
12231309
do! onMessageReaction botClient botConfig logger update.MessageReaction
12241310
elif update.EditedOrMessage <> null then
1311+
do! tryEnrichMessageWithForwardedContent botClient botConfig computerVision logger update
12251312
do! tryEnrichMessageWithOcr botClient botConfig computerVision logger update
12261313
do! onMessage botUser botClient botConfig logger ml update.EditedOrMessage
12271314
elif update.ChatMember <> null || update.MyChatMember <> null then

src/VahterBanBot/Program.fs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,9 @@ let botConf =
101101
// Reaction spam detection
102102
ReactionSpamEnabled = getEnvOr "REACTION_SPAM_ENABLED" "false" |> bool.Parse
103103
ReactionSpamMinMessages = getEnvOr "REACTION_SPAM_MIN_MESSAGES" "10" |> int
104-
ReactionSpamMaxReactions = getEnvOr "REACTION_SPAM_MAX_REACTIONS" "5" |> int }
104+
ReactionSpamMaxReactions = getEnvOr "REACTION_SPAM_MAX_REACTIONS" "5" |> int
105+
// Forward spam detection
106+
ForwardSpamDetectionEnabled = getEnvOr "FORWARD_SPAM_DETECTION_ENABLED" "true" |> bool.Parse }
105107

106108
let validateApiKey (ctx : HttpContext) =
107109
match ctx.TryGetRequestHeader "X-Telegram-Bot-Api-Secret-Token" with

src/VahterBanBot/Types.fs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,9 @@ type BotConfiguration =
6060
// Reaction spam detection
6161
ReactionSpamEnabled: bool
6262
ReactionSpamMinMessages: int
63-
ReactionSpamMaxReactions: int }
63+
ReactionSpamMaxReactions: int
64+
// Forward spam detection
65+
ForwardSpamDetectionEnabled: bool }
6466

6567
[<CLIMutable>]
6668
type DbUser =

0 commit comments

Comments
 (0)