1
+ require "ffi"
2
+ require "json"
3
+
4
+ # Minimal TDLib FFI wrapper, tdlib-ruby is conflict with
5
+ # telegram-bot-ruby as they depends on dry-core
6
+ module TDJson
7
+ extend FFI ::Library
8
+ lib_name = "tdjson"
9
+ if FFI ::Platform . windows?
10
+ ffi_lib File . join ( ENV . fetch ( "TDLIB_PATH" ) , "#{ lib_name } .dll" )
11
+ elsif FFI ::Platform . mac?
12
+ ffi_lib File . join ( ENV . fetch ( "TDLIB_PATH" ) , "lib#{ lib_name } .dylib" )
13
+ else
14
+ ffi_lib File . join ( ENV . fetch ( "TDLIB_PATH" ) , "lib#{ lib_name } .so" )
15
+ end
16
+
17
+ attach_function :td_json_client_create , [ ] , :pointer
18
+ attach_function :td_json_client_send , [ :pointer , :string ] , :void
19
+ attach_function :td_json_client_receive , [ :pointer , :double ] , :string
20
+ attach_function :td_json_client_execute , [ :pointer , :string ] , :string
21
+ attach_function :td_json_client_destroy , [ :pointer ] , :void
22
+ end
23
+
24
+ class TDClient
25
+ def initialize
26
+ @client = TDJson . td_json_client_create
27
+ @request_queue = { }
28
+ end
29
+
30
+ def send_async ( query , &block )
31
+ request_id = SecureRandom . uuid
32
+ @request_queue [ request_id ] = block
33
+ query [ "@extra" ] = { request_id : request_id } . to_json
34
+ TDJson . td_json_client_send ( @client , JSON . dump ( query ) )
35
+ end
36
+
37
+ def receive ( timeout = 1.0 )
38
+ raw = TDJson . td_json_client_receive ( @client , timeout )
39
+ return unless raw
40
+
41
+ update = JSON . parse ( raw )
42
+ if update [ "@extra" ]
43
+ extra = JSON . parse ( update [ "@extra" ] )
44
+ if extra [ "request_id" ]
45
+ callback = @request_queue . delete ( extra [ "request_id" ] )
46
+ callback . call ( update ) if callback
47
+ end
48
+ end
49
+ update
50
+ end
51
+
52
+ def execute ( query )
53
+ raw = TDJson . td_json_client_execute ( @client , JSON . dump ( query ) )
54
+ raw && JSON . parse ( raw )
55
+ end
56
+
57
+ def send ( query )
58
+ TDJson . td_json_client_send ( @client , JSON . dump ( query ) )
59
+ end
60
+
61
+ def close
62
+ TDJson . td_json_client_destroy ( @client )
63
+ end
64
+
65
+ def get_chat ( chat_id )
66
+ execute ( {
67
+ "@type" => "getChat" ,
68
+ "chat_id" => chat_id
69
+ } )
70
+ end
71
+
72
+ def get_user ( user_id )
73
+ execute ( {
74
+ "@type" => "getUser" ,
75
+ "user_id" => user_id
76
+ } )
77
+ end
78
+ end
1
79
2
80
namespace :telegram do
3
81
desc "Starts the TDLib client to listen for telegram messages"
@@ -23,53 +101,6 @@ namespace :telegram do
23
101
next
24
102
end
25
103
26
- require "ffi"
27
- require "json"
28
-
29
- # Minimal TDLib FFI wrapper, tdlib-ruby is conflict with
30
- # telegram-bot-ruby as they depends on dry-core
31
- module TDJson
32
- extend FFI ::Library
33
- lib_name = "tdjson"
34
- if FFI ::Platform . windows?
35
- ffi_lib File . join ( ENV . fetch ( "TDLIB_PATH" ) , "#{ lib_name } .dll" )
36
- elsif FFI ::Platform . mac?
37
- ffi_lib File . join ( ENV . fetch ( "TDLIB_PATH" ) , "lib#{ lib_name } .dylib" )
38
- else
39
- ffi_lib File . join ( ENV . fetch ( "TDLIB_PATH" ) , "lib#{ lib_name } .so" )
40
- end
41
-
42
- attach_function :td_json_client_create , [ ] , :pointer
43
- attach_function :td_json_client_send , [ :pointer , :string ] , :void
44
- attach_function :td_json_client_receive , [ :pointer , :double ] , :string
45
- attach_function :td_json_client_execute , [ :pointer , :string ] , :string
46
- attach_function :td_json_client_destroy , [ :pointer ] , :void
47
- end
48
-
49
- class TDClient
50
- def initialize
51
- @client = TDJson . td_json_client_create
52
- end
53
-
54
- def send ( query )
55
- TDJson . td_json_client_send ( @client , JSON . dump ( query ) )
56
- end
57
-
58
- def receive ( timeout = 1.0 )
59
- raw = TDJson . td_json_client_receive ( @client , timeout )
60
- raw && JSON . parse ( raw )
61
- end
62
-
63
- def execute ( query )
64
- raw = TDJson . td_json_client_execute ( @client , JSON . dump ( query ) )
65
- raw && JSON . parse ( raw )
66
- end
67
-
68
- def close
69
- TDJson . td_json_client_destroy ( @client )
70
- end
71
- end
72
-
73
104
client = TDClient . new
74
105
75
106
# Set log level
@@ -131,34 +162,8 @@ namespace :telegram do
131
162
else
132
163
puts "Unhandled authorization state: #{ new_state } "
133
164
end
134
-
135
165
when "updateNewMessage"
136
- message = update [ "message" ]
137
- chat_id = message [ "chat_id" ]
138
- content = message [ "content" ]
139
-
140
- if content [ "@type" ] == "messageText"
141
- message_content = content [ "text" ] [ "text" ]
142
- process_message ( message_content )
143
- puts "Chat ID: #{ chat_id } | Text: #{ message_content } "
144
- else
145
- puts "Chat ID: #{ chat_id } | Type: #{ content [ '@type' ] } "
146
- end
147
- puts "----------------------"
148
-
149
- when "updateMessageContent"
150
- chat_id = update [ "chat_id" ]
151
- new_content = update [ "new_content" ]
152
-
153
- if new_content [ "@type" ] == "messageText"
154
- message_content = new_content [ "text" ] [ "text" ]
155
- process_message ( message_content )
156
- puts "Chat ID: #{ chat_id } | New Text: #{ message_content } "
157
- else
158
- puts "Chat ID: #{ chat_id } | New Type: #{ new_content [ '@type' ] } "
159
- end
160
- puts "----------------------"
161
-
166
+ handleUpdateNewMessage ( update , client )
162
167
else
163
168
# ignore other updates
164
169
end
@@ -172,9 +177,7 @@ namespace :telegram do
172
177
end
173
178
end
174
179
175
- def process_message ( message_content )
176
- group_id = GroupClassifierState ::TELEGRAM_DATA_COLLECTOR_GROUP_ID
177
- group_name = GroupClassifierState ::TELEGRAM_DATA_COLLECTOR_GROUP_NAME
180
+ def process_message ( message_content , group_id , group_name , user_id , user_name )
178
181
classifier = SpamClassifierService . new ( group_id , group_name )
179
182
message_hash = Digest ::SHA256 . hexdigest ( message_content . to_s )
180
183
existing_message = TrainedMessage . find_by ( message_hash : message_hash )
@@ -190,30 +193,51 @@ def process_message(message_content)
190
193
# reduces bias and improves accuracy
191
194
is_spam , spam_score , ham_score = classifier . classify ( message_content )
192
195
puts "classified result: #{ is_spam ? "maybe_spam" : "maybe_ham" } "
193
- if spam_count > ham_count
194
- # only interested in ham
195
- if !is_spam
196
- TrainedMessage . create! (
197
- group_id : group_id ,
198
- group_name : group_name ,
199
- message : message_content ,
200
- message_type : :maybe_ham ,
201
- sender_chat_id : 0 ,
202
- sender_user_name : "Telegram collector"
203
- )
204
- end
196
+ if ( spam_count > ham_count && !is_spam ) || ( spam_count <= ham_count && is_spam )
197
+ TrainedMessage . create! (
198
+ group_id : group_id ,
199
+ group_name : group_name ,
200
+ message : message_content ,
201
+ message_type : is_spam ? :maybe_spam : :maybe_ham ,
202
+ sender_user_name : user_name || "Telegram collector" ,
203
+ sender_chat_id : user_id
204
+ )
205
+ end
206
+ end
205
207
206
- else
207
- # interested in spam
208
- if is_spam
209
- TrainedMessage . create! (
210
- group_id : group_id ,
211
- group_name : group_name ,
212
- message : message_content ,
213
- message_type : :maybe_spam ,
214
- sender_chat_id : 0 ,
215
- sender_user_name : "Telegram collector"
216
- )
208
+ def handleUpdateNewMessage ( update , client )
209
+ message = update [ "message" ]
210
+ chat_id = message [ "chat_id" ]
211
+ sender_id = message [ "sender_id" ] [ "user_id" ] rescue nil
212
+ content = message [ "content" ]
213
+ user_name = "Unknown User"
214
+ message_content = content [ "text" ] [ "text" ] if content [ "@type" ] == "messageText"
215
+
216
+ # Send asynchronous requests for chat and user data
217
+ client . send_async ( { "@type" => "getChat" , "chat_id" => chat_id } ) do |chat_update |
218
+ chat = chat_update
219
+ group_name = chat [ "title" ] || "Unknown Group"
220
+
221
+ if sender_id
222
+ client . send_async ( { "@type" => "getUser" , "user_id" => sender_id } ) do |user_update |
223
+ user = user_update
224
+ user_name = user [ "first_name" ]
225
+ user_name += " " + user [ "last_name" ] if user [ "last_name" ]
226
+ user_name = user [ "username" ] if user [ "username" ]
227
+
228
+ unless message_content . blank?
229
+ process_message ( message_content , chat_id , group_name , sender_id , user_name )
230
+ puts "Group(#{ chat_id } ): #{ group_name } | User: #{ user_name } | Text: #{ message_content } "
231
+ puts "----------------------"
232
+ end
233
+ end
234
+ else
235
+ # Handle cases where there's no sender_id (e.g., channel posts)
236
+ unless message_content . blank?
237
+ process_message ( message_content , chat_id , group_name , sender_id , user_name )
238
+ puts "Group: #{ group_name } | User: #{ user_name } | Text: #{ message_content } "
239
+ puts "----------------------"
240
+ end
217
241
end
218
242
end
219
243
end
0 commit comments