Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/VahterBanBot.Tests/ContainerTestBase.fs
Original file line number Diff line number Diff line change
Expand Up @@ -128,12 +128,15 @@ type VahterTestContainers(mlEnabled: bool) =
.WithEnvironment("REACTION_SPAM_ENABLED", "true")
.WithEnvironment("REACTION_SPAM_MIN_MESSAGES", "3")
.WithEnvironment("REACTION_SPAM_MAX_REACTIONS", "5")
// Forward spam detection
.WithEnvironment("FORWARD_SPAM_DETECTION_ENABLED", "true")
.Build()
else
builder
.WithEnvironment("ML_ENABLED", "false")
.WithEnvironment("OCR_ENABLED", "false")
.WithEnvironment("REACTION_SPAM_ENABLED", "false")
.WithEnvironment("FORWARD_SPAM_DETECTION_ENABLED", "false")
.Build()

let startContainers() = task {
Expand Down
66 changes: 66 additions & 0 deletions src/VahterBanBot.Tests/MLBanTests.fs
Original file line number Diff line number Diff line change
Expand Up @@ -494,4 +494,70 @@ type MLBanTests(fixture: MlEnabledVahterTestContainers, _unused: MlAwaitFixture)
Assert.True(userBanned, "User should be auto-banned after reaching karma threshold via soft spam")
}

[<Fact>]
let ``Message with spam in quote text triggers auto-delete`` () = task {
let msgUpdate = Tg.quickMsg(
chat = fixture.ChatsToMonitor[0],
text = "hello",
quote = Tg.textQuote("2222222")
)
let! _ = fixture.SendMessage msgUpdate

let! msgBanned = fixture.MessageIsAutoDeleted msgUpdate.Message
Assert.True msgBanned
}

[<Fact>]
let ``Message with non-spam quote text does NOT trigger auto-delete`` () = task {
let msgUpdate = Tg.quickMsg(
chat = fixture.ChatsToMonitor[0],
text = "hello",
quote = Tg.textQuote("b")
)
let! _ = fixture.SendMessage msgUpdate

let! msgBanned = fixture.MessageIsAutoDeleted msgUpdate.Message
Assert.False msgBanned
}

[<Fact>]
let ``Quote text is prepended to message text`` () = task {
let msgUpdate = Tg.quickMsg(
chat = fixture.ChatsToMonitor[0],
text = "hello",
quote = Tg.textQuote("b")
)
let! _ = fixture.SendMessage msgUpdate

let! dbMsg = fixture.TryGetDbMessage msgUpdate.Message
Assert.True dbMsg.IsSome
Assert.Equal("b\nhello", dbMsg.Value.text)
}

[<Fact>]
let ``Spam in external reply photo triggers auto-delete via OCR`` () = task {
let msgUpdate = Tg.quickMsg(
chat = fixture.ChatsToMonitor[0],
text = "hello",
externalReply = Tg.externalReply(photos = [| Tg.spamPhoto |])
)
let! _ = fixture.SendMessage msgUpdate

let! msgBanned = fixture.MessageIsAutoDeleted msgUpdate.Message
Assert.True msgBanned
}

[<Fact>]
let ``Ham in external reply photo does NOT trigger auto-delete`` () = task {
let msgUpdate = Tg.quickMsg(
chat = fixture.ChatsToMonitor[0],
text = "hello",
externalReply = Tg.externalReply(photos = [| Tg.hamPhoto |])
)
let! _ = fixture.SendMessage msgUpdate

let! msgBanned = fixture.MessageIsAutoDeleted msgUpdate.Message
Assert.False msgBanned
}

interface IClassFixture<MlAwaitFixture>
15 changes: 13 additions & 2 deletions src/VahterBanBot.Tests/TgMessageUtils.fs
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,16 @@ type Tg() =
static member emoji(?offset: int) = MessageEntity(Type = MessageEntityType.CustomEmoji, Offset = defaultArg offset 0 , Length = 1)
static member emojies(n: int) = Array.init n (fun i -> Tg.emoji i)

static member quickMsg (?text: string, ?chat: Chat, ?from: User, ?date: DateTime, ?callback: CallbackQuery, ?caption: string, ?editedText: string, ?entities: MessageEntity[], ?photos: PhotoSize[], ?isAutomaticForward: bool, ?senderChat: Chat) =
static member textQuote(text: string) =
TextQuote(Text = text, Position = 0)

static member externalReply(?photos: PhotoSize[], ?chat: Chat) =
ExternalReplyInfo(
Photo = (photos |> Option.defaultValue null),
Chat = (chat |> Option.defaultValue null)
)

static member quickMsg (?text: string, ?chat: Chat, ?from: User, ?date: DateTime, ?callback: CallbackQuery, ?caption: string, ?editedText: string, ?entities: MessageEntity[], ?photos: PhotoSize[], ?isAutomaticForward: bool, ?senderChat: Chat, ?quote: TextQuote, ?externalReply: ExternalReplyInfo) =
let updateId = next()
let msgId = next()
Update(
Expand All @@ -77,7 +86,9 @@ type Tg() =
Entities = (entities |> Option.defaultValue null),
Photo = (photos |> Option.defaultValue null),
IsAutomaticForward = (isAutomaticForward |> Option.defaultValue false),
SenderChat = (senderChat |> Option.defaultValue null)
SenderChat = (senderChat |> Option.defaultValue null),
Quote = (quote |> Option.defaultValue null),
ExternalReply = (externalReply |> Option.defaultValue null)
),
EditedMessage =
if editedText |> Option.isSome then
Expand Down
152 changes: 108 additions & 44 deletions src/VahterBanBot/Bot.fs
Original file line number Diff line number Diff line change
Expand Up @@ -889,6 +889,96 @@ let onMessage
do! justMessage botUser botClient botConfig logger ml message
}

let private selectLargestPhoto (photos: PhotoSize array) =
let withSize = photos |> Array.filter (fun p -> p.FileSize.HasValue)
if withSize.Length > 0 then
withSize |> Array.maxBy (fun p -> p.FileSize.Value)
else
photos |> Array.maxBy (fun p -> p.Width * p.Height)

let private ocrPhotos
(botClient: ITelegramBotClient)
(botConfig: BotConfiguration)
(computerVision: IComputerVision)
(logger: ILogger)
(photos: PhotoSize array)
(messageId: int) = task {
let candidatePhotos =
photos
|> Array.filter (fun p ->
let size = int64 p.FileSize
size = 0L || size <= botConfig.OcrMaxFileSizeBytes)

if candidatePhotos.Length = 0 then
logger.LogWarning(
"No photos under OCR limit of {LimitBytes} bytes for message {MessageId}",
botConfig.OcrMaxFileSizeBytes,
messageId)
return None
else
let largestPhoto = selectLargestPhoto candidatePhotos

let! file = botClient.GetFile(largestPhoto.FileId)

if String.IsNullOrWhiteSpace file.FilePath then
logger.LogWarning("Failed to resolve file path for photo {PhotoId}", largestPhoto.FileId)
return None
else
let fileUrl = $"https://api.telegram.org/file/bot{botConfig.BotToken}/{file.FilePath}"
let! ocrText = computerVision.TextFromImageUrl fileUrl
if String.IsNullOrWhiteSpace ocrText then
return None
else
return Some ocrText
}

let tryEnrichMessageWithForwardedContent
(botClient: ITelegramBotClient)
(botConfig: BotConfiguration)
(computerVision: IComputerVision)
(logger: ILogger)
(update: Update) = task {
if botConfig.ForwardSpamDetectionEnabled then
let message = update.EditedOrMessage
if not (isNull message) && isMessageFromAllowedChats botConfig message then
use activity = botActivity.StartActivity("forwardedContentEnrichment")
try
let mutable forwardedText: string = null

if not (isNull message.Quote)
&& not (String.IsNullOrWhiteSpace message.Quote.Text) then
forwardedText <- message.Quote.Text
%activity.SetTag("quoteTextLength", message.Quote.Text.Length)

if botConfig.OcrEnabled
&& not (isNull message.ExternalReply)
&& not (isNull message.ExternalReply.Photo)
&& message.ExternalReply.Photo.Length > 0 then
let! ocrText = ocrPhotos botClient botConfig computerVision logger message.ExternalReply.Photo message.MessageId
match ocrText with
| Some text ->
forwardedText <-
if isNull forwardedText then text
else $"{forwardedText}\n{text}"
%activity.SetTag("externalReplyOcrLength", text.Length)
| None -> ()

if not (String.IsNullOrWhiteSpace forwardedText) then
let baseText = message.TextOrCaption
let enrichedText =
if String.IsNullOrWhiteSpace baseText then forwardedText
else $"{forwardedText}\n{baseText}"
logger.LogDebug(
"Enriched message {MessageId} with forwarded content of length {ForwardedLength}",
message.MessageId,
forwardedText.Length
)
message.Text <- enrichedText
%activity.SetTag("enrichedTextLength", enrichedText.Length)
with ex ->
logger.LogError(ex, "Failed to process forwarded content for message {MessageId}", update.EditedOrMessage.MessageId)
}

let tryEnrichMessageWithOcr
(botClient: ITelegramBotClient)
(botConfig: BotConfiguration)
Expand All @@ -897,52 +987,25 @@ let tryEnrichMessageWithOcr
(update: Update) = task {
if botConfig.OcrEnabled then
let message = update.EditedOrMessage
if not (isNull message.Photo) && message.Photo.Length > 0 then
if not (isNull message.Photo) && message.Photo.Length > 0 && isMessageFromAllowedChats botConfig message then
use activity = botActivity.StartActivity("ocrEnrichment")
try
let candidatePhotos =
message.Photo
|> Array.filter (fun p ->
let size = int64 p.FileSize
size = 0L || size <= botConfig.OcrMaxFileSizeBytes)

if candidatePhotos.Length = 0 then
logger.LogWarning(
"No photos under OCR limit of {LimitBytes} bytes for message {MessageId}",
botConfig.OcrMaxFileSizeBytes,
message.MessageId)
else
let largestPhoto =
candidatePhotos
|> Seq.filter (fun p -> p.FileSize.HasValue)
|> Seq.maxBy (fun p -> p.FileSize.Value)

%activity.SetTag("photoId", largestPhoto.FileId)

let! file = botClient.GetFile(largestPhoto.FileId)

if String.IsNullOrWhiteSpace file.FilePath then
logger.LogWarning("Failed to resolve file path for photo {PhotoId}", largestPhoto.FileId)
else
let fileUrl = $"https://api.telegram.org/file/bot{botConfig.BotToken}/{file.FilePath}"
%activity.SetTag("fileUrl", fileUrl)
let! ocrText = computerVision.TextFromImageUrl fileUrl

if not (String.IsNullOrWhiteSpace ocrText) then
let baseText = message.TextOrCaption
let enrichedText =
if String.IsNullOrWhiteSpace baseText then
ocrText
else
$"{baseText}\n{ocrText}"
logger.LogDebug (
"Enriched message {MessageId} with OCR text {EnrichedText} of length {OcrTextLength}",
update.EditedOrMessage.MessageId,
enrichedText,
ocrText.Length
)
message.Text <- enrichedText
%activity.SetTag("ocrTextLength", enrichedText.Length)
let! ocrResult = ocrPhotos botClient botConfig computerVision logger message.Photo message.MessageId
match ocrResult with
| Some ocrText ->
let baseText = message.TextOrCaption
let enrichedText =
if String.IsNullOrWhiteSpace baseText then ocrText
else $"{baseText}\n{ocrText}"
logger.LogDebug (
"Enriched message {MessageId} with OCR text {EnrichedText} of length {OcrTextLength}",
update.EditedOrMessage.MessageId,
enrichedText,
ocrText.Length
)
message.Text <- enrichedText
%activity.SetTag("ocrTextLength", enrichedText.Length)
| None -> ()
with ex ->
logger.LogError(ex, "Failed to process OCR for message {MessageId}", update.EditedOrMessage.MessageId)
}
Expand Down Expand Up @@ -1222,6 +1285,7 @@ let onUpdate
elif update.MessageReaction <> null then
do! onMessageReaction botClient botConfig logger update.MessageReaction
elif update.EditedOrMessage <> null then
do! tryEnrichMessageWithForwardedContent botClient botConfig computerVision logger update
do! tryEnrichMessageWithOcr botClient botConfig computerVision logger update
do! onMessage botUser botClient botConfig logger ml update.EditedOrMessage
elif update.ChatMember <> null || update.MyChatMember <> null then
Expand Down
4 changes: 3 additions & 1 deletion src/VahterBanBot/Program.fs
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,9 @@ let botConf =
// Reaction spam detection
ReactionSpamEnabled = getEnvOr "REACTION_SPAM_ENABLED" "false" |> bool.Parse
ReactionSpamMinMessages = getEnvOr "REACTION_SPAM_MIN_MESSAGES" "10" |> int
ReactionSpamMaxReactions = getEnvOr "REACTION_SPAM_MAX_REACTIONS" "5" |> int }
ReactionSpamMaxReactions = getEnvOr "REACTION_SPAM_MAX_REACTIONS" "5" |> int
// Forward spam detection
ForwardSpamDetectionEnabled = getEnvOr "FORWARD_SPAM_DETECTION_ENABLED" "true" |> bool.Parse }

let validateApiKey (ctx : HttpContext) =
match ctx.TryGetRequestHeader "X-Telegram-Bot-Api-Secret-Token" with
Expand Down
4 changes: 3 additions & 1 deletion src/VahterBanBot/Types.fs
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,9 @@ type BotConfiguration =
// Reaction spam detection
ReactionSpamEnabled: bool
ReactionSpamMinMessages: int
ReactionSpamMaxReactions: int }
ReactionSpamMaxReactions: int
// Forward spam detection
ForwardSpamDetectionEnabled: bool }

[<CLIMutable>]
type DbUser =
Expand Down