Skip to content
This repository was archived by the owner on Jul 22, 2025. It is now read-only.

Commit 938d4c0

Browse files
authored
FIX: More resilient sentiment backfill query (#998)
1 parent e7c2cd8 commit 938d4c0

File tree

1 file changed

+37
-37
lines changed

1 file changed

+37
-37
lines changed

lib/sentiment/post_classification.rb

Lines changed: 37 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -5,43 +5,43 @@ module Sentiment
55
class PostClassification
66
def self.backfill_query(from_post_id: nil, max_age_days: nil)
77
available_classifier_names =
8-
DiscourseAi::Sentiment::SentimentSiteSettingJsonSchema
9-
.values
10-
.map { |mc| mc.model_name.downcase }
11-
.sort
12-
13-
base_query =
14-
Post
15-
.includes(:sentiment_classifications)
16-
.joins("INNER JOIN topics ON topics.id = posts.topic_id")
17-
.where(post_type: Post.types[:regular])
18-
.where.not(topics: { archetype: Archetype.private_message })
19-
.where(posts: { deleted_at: nil })
20-
.where(topics: { deleted_at: nil })
21-
.joins(<<~SQL)
22-
LEFT JOIN classification_results crs
23-
ON crs.target_id = posts.id
24-
AND crs.target_type = 'Post'
25-
AND crs.classification_type = 'sentiment'
26-
SQL
27-
.group("posts.id")
28-
.having(<<~SQL, available_classifier_names)
29-
COUNT(crs.model_used) = 0
30-
OR array_agg(
31-
DISTINCT LOWER(crs.model_used) ORDER BY LOWER(crs.model_used)
32-
)::text[] IS DISTINCT FROM array[?]
33-
SQL
34-
35-
base_query = base_query.where("posts.id >= ?", from_post_id.to_i) if from_post_id.present?
36-
37-
if max_age_days.present?
38-
base_query =
39-
base_query.where(
40-
"posts.created_at > current_date - INTERVAL '#{max_age_days.to_i} DAY'",
41-
)
42-
end
43-
44-
base_query
8+
DiscourseAi::Sentiment::SentimentSiteSettingJsonSchema.values.map { _1.model_name }
9+
10+
queries =
11+
available_classifier_names.map do |classifier_name|
12+
base_query =
13+
Post
14+
.includes(:sentiment_classifications)
15+
.joins("INNER JOIN topics ON topics.id = posts.topic_id")
16+
.where(post_type: Post.types[:regular])
17+
.where.not(topics: { archetype: Archetype.private_message })
18+
.where(posts: { deleted_at: nil })
19+
.where(topics: { deleted_at: nil })
20+
.joins(<<~SQL)
21+
LEFT JOIN classification_results crs
22+
ON crs.target_id = posts.id
23+
AND crs.target_type = 'Post'
24+
AND crs.classification_type = 'sentiment'
25+
AND crs.model_used = '#{classifier_name}'
26+
SQL
27+
.where("crs.id IS NULL")
28+
29+
base_query =
30+
base_query.where("posts.id >= ?", from_post_id.to_i) if from_post_id.present?
31+
32+
if max_age_days.present?
33+
base_query =
34+
base_query.where(
35+
"posts.created_at > current_date - INTERVAL '#{max_age_days.to_i} DAY'",
36+
)
37+
end
38+
39+
base_query
40+
end
41+
42+
unioned_queries = queries.map(&:to_sql).join(" UNION ")
43+
44+
Post.from(Arel.sql("(#{unioned_queries}) as posts"))
4545
end
4646

4747
def bulk_classify!(relation)

0 commit comments

Comments
 (0)