33module DiscourseAi
44 module Sentiment
55 class PostClassification
6+ def self . backfill_query ( from_post_id : nil , max_age_days : nil )
7+ available_classifier_names =
8+ DiscourseAi ::Sentiment ::SentimentSiteSettingJsonSchema . values . map ( &:model_name ) . sort
9+
10+ base_query =
11+ Post
12+ . includes ( :sentiment_classifications )
13+ . joins ( "INNER JOIN topics ON topics.id = posts.topic_id" )
14+ . where ( post_type : Post . types [ :regular ] )
15+ . where . not ( topics : { archetype : Archetype . private_message } )
16+ . where ( posts : { deleted_at : nil } )
17+ . where ( topics : { deleted_at : nil } )
18+ . joins ( <<~SQL )
19+ LEFT JOIN classification_results crs
20+ ON crs.target_id = posts.id
21+ AND crs.target_type = 'Post'
22+ AND crs.classification_type = 'sentiment'
23+ SQL
24+ . group ( "posts.id" )
25+ . having ( <<~SQL , available_classifier_names )
26+ COUNT(crs.model_used) = 0
27+ OR array_agg(DISTINCT crs.model_used ORDER BY crs.model_used)::text[] IS DISTINCT FROM array[?]
28+ SQL
29+
30+ base_query = base_query . where ( "posts.id >= ?" , from_post_id . to_i ) if from_post_id . present?
31+
32+ if max_age_days . present?
33+ base_query =
34+ base_query . where (
35+ "posts.created_at > current_date - INTERVAL '#{ max_age_days . to_i } DAY'" ,
36+ )
37+ end
38+
39+ base_query
40+ end
41+
642 def bulk_classify! ( relation )
743 http_pool_size = 100
844 pool =
@@ -13,6 +49,7 @@ def bulk_classify!(relation)
1349 )
1450
1551 available_classifiers = classifiers
52+ return if available_classifiers . blank?
1653 base_url = Discourse . base_url
1754
1855 promised_classifications =
@@ -25,9 +62,13 @@ def bulk_classify!(relation)
2562 . fulfilled_future ( { target : record , text : text } , pool )
2663 . then_on ( pool ) do |w_text |
2764 results = Concurrent ::Hash . new
65+ already_classified = w_text [ :target ] . sentiment_classifications . map ( &:model_used )
66+
67+ classifiers_for_target =
68+ available_classifiers . reject { |ac | already_classified . include? ( ac . model_name ) }
2869
2970 promised_target_results =
30- available_classifiers . map do |c |
71+ classifiers_for_target . map do |c |
3172 Concurrent ::Promises . future_on ( pool ) do
3273 results [ c . model_name ] = request_with ( w_text [ :text ] , c , base_url )
3374 end
@@ -52,12 +93,17 @@ def bulk_classify!(relation)
5293
5394 def classify! ( target )
5495 return if target . blank?
96+ return if classifiers . blank?
5597
5698 to_classify = prepare_text ( target )
5799 return if to_classify . blank?
58100
101+ already_classified = target . sentiment_classifications . map ( &:model_used )
102+ classifiers_for_target =
103+ classifiers . reject { |ac | already_classified . include? ( ac . model_name ) }
104+
59105 results =
60- classifiers . reduce ( { } ) do |memo , model |
106+ classifiers_for_target . reduce ( { } ) do |memo , model |
61107 memo [ model . model_name ] = request_with ( to_classify , model )
62108 memo
63109 end
@@ -69,6 +115,10 @@ def classifiers
69115 DiscourseAi ::Sentiment ::SentimentSiteSettingJsonSchema . values
70116 end
71117
118+ def has_classifiers?
119+ classifiers . present?
120+ end
121+
72122 private
73123
74124 def prepare_text ( target )
0 commit comments