33module DiscourseAi
44 module Sentiment
55 class PostClassification
6+ def self . backfill_query ( from_post_id : nil , max_age_days : nil )
7+ available_classifier_names =
8+ DiscourseAi ::Sentiment ::SentimentSiteSettingJsonSchema
9+ . values
10+ . map { |mc | mc . model_name . downcase }
11+ . sort
12+
13+ base_query =
14+ Post
15+ . includes ( :sentiment_classifications )
16+ . joins ( "INNER JOIN topics ON topics.id = posts.topic_id" )
17+ . where ( post_type : Post . types [ :regular ] )
18+ . where . not ( topics : { archetype : Archetype . private_message } )
19+ . where ( posts : { deleted_at : nil } )
20+ . where ( topics : { deleted_at : nil } )
21+ . joins ( <<~SQL )
22+ LEFT JOIN classification_results crs
23+ ON crs.target_id = posts.id
24+ AND crs.target_type = 'Post'
25+ AND crs.classification_type = 'sentiment'
26+ SQL
27+ . group ( "posts.id" )
28+ . having ( <<~SQL , available_classifier_names )
29+ COUNT(crs.model_used) = 0
30+ OR array_agg(
31+ DISTINCT LOWER(crs.model_used) ORDER BY LOWER(crs.model_used)
32+ )::text[] IS DISTINCT FROM array[?]
33+ SQL
34+
35+ base_query = base_query . where ( "posts.id >= ?" , from_post_id . to_i ) if from_post_id . present?
36+
37+ if max_age_days . present?
38+ base_query =
39+ base_query . where (
40+ "posts.created_at > current_date - INTERVAL '#{ max_age_days . to_i } DAY'" ,
41+ )
42+ end
43+
44+ base_query
45+ end
46+
647 def bulk_classify! ( relation )
748 http_pool_size = 100
849 pool =
@@ -13,6 +54,7 @@ def bulk_classify!(relation)
1354 )
1455
1556 available_classifiers = classifiers
57+ return if available_classifiers . blank?
1658 base_url = Discourse . base_url
1759
1860 promised_classifications =
@@ -25,9 +67,13 @@ def bulk_classify!(relation)
2567 . fulfilled_future ( { target : record , text : text } , pool )
2668 . then_on ( pool ) do |w_text |
2769 results = Concurrent ::Hash . new
70+ already_classified = w_text [ :target ] . sentiment_classifications . map ( &:model_used )
71+
72+ classifiers_for_target =
73+ available_classifiers . reject { |ac | already_classified . include? ( ac . model_name ) }
2874
2975 promised_target_results =
30- available_classifiers . map do |c |
76+ classifiers_for_target . map do |c |
3177 Concurrent ::Promises . future_on ( pool ) do
3278 results [ c . model_name ] = request_with ( w_text [ :text ] , c , base_url )
3379 end
@@ -52,12 +98,17 @@ def bulk_classify!(relation)
5298
5399 def classify! ( target )
54100 return if target . blank?
101+ return if classifiers . blank?
55102
56103 to_classify = prepare_text ( target )
57104 return if to_classify . blank?
58105
106+ already_classified = target . sentiment_classifications . map ( &:model_used )
107+ classifiers_for_target =
108+ classifiers . reject { |ac | already_classified . include? ( ac . model_name ) }
109+
59110 results =
60- classifiers . reduce ( { } ) do |memo , model |
111+ classifiers_for_target . reduce ( { } ) do |memo , model |
61112 memo [ model . model_name ] = request_with ( to_classify , model )
62113 memo
63114 end
@@ -69,6 +120,10 @@ def classifiers
69120 DiscourseAi ::Sentiment ::SentimentSiteSettingJsonSchema . values
70121 end
71122
123+ def has_classifiers?
124+ classifiers . present?
125+ end
126+
72127 private
73128
74129 def prepare_text ( target )
0 commit comments