@@ -5,43 +5,43 @@ module Sentiment
55 class PostClassification
66 def self . backfill_query ( from_post_id : nil , max_age_days : nil )
77 available_classifier_names =
8- DiscourseAi ::Sentiment ::SentimentSiteSettingJsonSchema
9- . values
10- . map { | mc | mc . model_name . downcase }
11- . sort
12-
13- base_query =
14- Post
15- . includes ( :sentiment_classifications )
16- . joins ( "INNER JOIN topics ON topics.id = posts.topic_id" )
17- . where ( post_type : Post . types [ :regular ] )
18- . where . not ( topics : { archetype : Archetype . private_message } )
19- . where ( posts : { deleted_at : nil } )
20- . where ( topics : { deleted_at : nil } )
21- . joins ( <<~SQL )
22- LEFT JOIN classification_results crs
23- ON crs.target_id = posts.id
24- AND crs.target_type = 'Post '
25- AND crs.classification_type = 'sentiment '
26- SQL
27- . group ( "posts .id")
28- . having ( <<~SQL , available_classifier_names )
29- COUNT(crs.model_used) = 0
30- OR array_agg(
31- DISTINCT LOWER(crs.model_used) ORDER BY LOWER(crs.model_used)
32- )::text[] IS DISTINCT FROM array[?]
33- SQL
34-
35- base_query = base_query . where ( "posts.id >= ?" , from_post_id . to_i ) if from_post_id . present?
36-
37- if max_age_days . present?
38- base_query =
39- base_query . where (
40- "posts.created_at > current_date - INTERVAL ' #{ max_age_days . to_i } DAY'" ,
41- )
42- end
43-
44- base_query
8+ DiscourseAi ::Sentiment ::SentimentSiteSettingJsonSchema . values . map { _1 . model_name }
9+
10+ queries =
11+ available_classifier_names . map do | classifier_name |
12+ base_query =
13+ Post
14+ . includes ( :sentiment_classifications )
15+ . joins ( "INNER JOIN topics ON topics.id = posts.topic_id" )
16+ . where ( post_type : Post . types [ :regular ] )
17+ . where . not ( topics : { archetype : Archetype . private_message } )
18+ . where ( posts : { deleted_at : nil } )
19+ . where ( topics : { deleted_at : nil } )
20+ . joins ( <<~SQL )
21+ LEFT JOIN classification_results crs
22+ ON crs.target_id = posts.id
23+ AND crs.target_type = 'Post'
24+ AND crs.classification_type = 'sentiment '
25+ AND crs.model_used = '#{ classifier_name } '
26+ SQL
27+ . where ( "crs .id IS NULL ")
28+
29+ base_query =
30+ base_query . where ( "posts.id >= ?" , from_post_id . to_i ) if from_post_id . present?
31+
32+ if max_age_days . present?
33+ base_query =
34+ base_query . where (
35+ "posts.created_at > current_date - INTERVAL ' #{ max_age_days . to_i } DAY'" ,
36+ )
37+ end
38+
39+ base_query
40+ end
41+
42+ unioned_queries = queries . map ( & :to_sql ) . join ( " UNION " )
43+
44+ Post . from ( Arel . sql ( "( #{ unioned_queries } ) as posts" ) )
4545 end
4646
4747 def bulk_classify! ( relation )
0 commit comments