Skip to content

Commit d1c22c0

Browse files
committed
add a new field to track the origin of training data
1 parent 1fdfd9a commit d1c22c0

File tree

10 files changed

+47
-5
lines changed

10 files changed

+47
-5
lines changed

app/controllers/trained_messages_controller.rb

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,10 @@ def index
1717
@trained_messages = @trained_messages.where(group_name: params[:group_name])
1818
end
1919

20+
if params[:source].present? && params[:source] != "all"
21+
@trained_messages = @trained_messages.where(source: params[:source])
22+
end
23+
2024
if params[:search].present?
2125
@trained_messages = @trained_messages.where("message LIKE ?", "%#{params[:search]}%")
2226
end
@@ -41,12 +45,13 @@ def index
4145
@total_pages = (@total_count.to_f / @per_page).ceil
4246

4347
# Using unscoped ensures we get all possible options, not just the filtered ones.
44-
filter_data = TrainedMessage.unscoped.distinct.pluck(:message_type, :training_target, :group_name)
48+
filter_data = TrainedMessage.unscoped.distinct.pluck(:message_type, :training_target, :group_name, :source)
4549

4650
# Get filter options
4751
@message_types = filter_data.map(&:first).uniq.compact.sort
4852
@training_targets = filter_data.map(&:second).uniq.compact.sort
4953
@group_names = filter_data.map(&:third).uniq.compact.sort
54+
@sources = filter_data.map(&:fourth).uniq.compact.sort
5055
end
5156

5257
# GET /trained_messages/1 or /trained_messages/1.json

app/models/trained_message.rb

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ class TrainedMessage < ApplicationRecord
22
enum :message_type, { spam: 0, ham: 1, untrained: 2, maybe_spam: 3, maybe_ham: 4 }
33
# New enum for what is being trained
44
enum :training_target, { message_content: 0, user_name: 1 }
5+
enum :source, { chat: 0, feedspam_command: 1, import: 2 }
56
module MessageType
67
SPAM = "spam"
78
HAM = "ham"
@@ -14,6 +15,11 @@ module TrainingTarget
1415
MESSAGE_CONTENT = "message_content"
1516
USER_NAME = "user_name"
1617
end
18+
module Source
19+
CHAT = "chat"
20+
FEEDSPAM_COMMAND = "feedspam_command"
21+
IMPORT = "import"
22+
end
1723
GLOBAL_SHARED_MESSAGE = 0
1824

1925
scope :shared, -> { where(group_id: GLOBAL_SHARED_MESSAGE) }
@@ -45,7 +51,7 @@ def should_ban_user
4551
end
4652

4753
spam_ban_threshold = Rails.application.config.spam_ban_threshold
48-
spam_count = TrainedMessage.where(group_id: self.group_id, sender_chat_id: self.sender_chat_id, message_type: :spam).count
54+
spam_count = TrainedMessage.where(group_id: self.group_id, sender_chat_id: self.sender_chat_id, message_type: :spam, source: :chat).count
4955
chat_member = TelegramMemberFetcher.get_bot_chat_member(self.group_id)
5056
can_ban_user = [ "administrator", "creator" ].include?(chat_member&.status) && chat_member&.can_restrict_members
5157
if spam_count >= spam_ban_threshold && can_ban_user

app/services/telegram_botter.rb

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -283,7 +283,8 @@ def execute_spam_training(bot, message, spam_text)
283283
message: spam_text,
284284
sender_chat_id: message.from.id,
285285
sender_user_name: user_name,
286-
message_type: :maybe_spam
286+
message_type: :maybe_spam,
287+
source: :feedspam_command
287288
)
288289

289290
# Show a preview of what was learned (truncated if too long)

app/views/trained_messages/_form.html.erb

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,14 @@
3737
{ class: ["block shadow-sm rounded-md border px-3 py-2 mt-2 w-full", {"border-gray-400 focus:outline-blue-600": trained_message.errors[:training_target].none?, "border-red-400 focus:outline-red-600": trained_message.errors[:training_target].any?}] } %>
3838
</div>
3939

40+
<div class="my-5">
41+
<%= form.label :source %>
42+
<%= form.select :source,
43+
options_for_select([['Chat', 'chat'], ['Feedspam Command', 'feedspam_command']], @trained_message.feedspam_command),
44+
{ prompt: 'Select training target' },
45+
{ class: ["block shadow-sm rounded-md border px-3 py-2 mt-2 w-full", {"border-gray-400 focus:outline-blue-600": trained_message.errors[:source].none?, "border-red-400 focus:outline-red-600": trained_message.errors[:source].any?}] } %>
46+
</div>
47+
4048
<div class="my-5">
4149
<%= form.label :sender_chat_id %>
4250
<%= form.number_field :sender_chat_id, class: ["block shadow-sm rounded-md border px-3 py-2 mt-2 w-full", {"border-gray-400 focus:outline-blue-600": trained_message.errors[:sender_chat_id].none?, "border-red-400 focus:outline-red-600": trained_message.errors[:sender_chat_id].any?}] %>

app/views/trained_messages/_trained_message.html.erb

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@
1919
<strong class="block font-medium mb-1">Training target:</strong>
2020
<%= trained_message.training_target %>
2121
</div>
22+
<div>
23+
<strong class="block font-medium mb-1">Source:</strong>
24+
<%= trained_message.source %>
25+
</div>
2226
<div>
2327
<strong class="block font-medium mb-1">Sender chat id:</strong>
2428
<%= trained_message.sender_chat_id %>

app/views/trained_messages/index.html.erb

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,15 @@
4545
{ class: "w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500" } %>
4646
</div>
4747

48+
<!-- Source Filter -->
49+
<div>
50+
<%= form.label :source, "Source", class: "block text-sm font-medium text-gray-700 mb-1" %>
51+
<%= form.select :source,
52+
options_for_select([['All Sources', 'all']] + @sources.map { |source| [source&.humanize || 'Unknown', source] }, params[:source]),
53+
{},
54+
{ class: "w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500" } %>
55+
</div>
56+
4857
<!-- Group Filter -->
4958
<div>
5059
<%= form.label :group_name, "Group", class: "block text-sm font-medium text-gray-700 mb-1" %>
@@ -130,6 +139,7 @@
130139
<% end %>
131140
<% end %>
132141
</th>
142+
133143
<th scope="col" class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
134144
<%= link_to trained_messages_path(params.permit!.merge(sort: 'group_name', direction: params[:sort] == 'group_name' && params[:direction] == 'asc' ? 'desc' : 'asc')), class: "hover:text-gray-700 flex items-center space-x-1" do %>
135145
<span>Group</span>
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
class AddSourceToTrainedMessages < ActiveRecord::Migration[8.0]
2+
def change
3+
add_column :trained_messages, :source, :integer, default: 0, null: false
4+
end
5+
end

db/schema.rb

Lines changed: 2 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

lib/tasks/import.rake

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ namespace :import do
3838
record.sender_chat_id = 0
3939
record.sender_user_name = "CSV Import"
4040
record.training_target = row["target"] || "message_content"
41+
record.source = :import
4142

4243
record.save!
4344

lib/tasks/telegram_data_collector.rake

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,8 @@ namespace :telegram do
188188
message_type: is_spam ? :maybe_spam : :maybe_ham,
189189
sender_user_name: user_name || "Telegram collector",
190190
training_target: training_target,
191-
sender_chat_id: user_id
191+
sender_chat_id: user_id,
192+
source: :import
192193
)
193194
end
194195
end

0 commit comments

Comments
 (0)