Skip to content
This repository was archived by the owner on Jul 22, 2025. It is now read-only.

Commit 772ee93

Browse files
authored
Migrate sentiment to a TEI backend (#886)
1 parent bffe9df commit 772ee93

14 files changed

+287
-92
lines changed

config/settings.yml

Lines changed: 2 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -56,22 +56,9 @@ discourse_ai:
5656
ai_sentiment_enabled:
5757
default: false
5858
client: true
59-
ai_sentiment_inference_service_api_endpoint:
60-
default: "https://sentiment-testing.demo-by-discourse.com"
61-
ai_sentiment_inference_service_api_endpoint_srv:
59+
ai_sentiment_model_configs:
6260
default: ""
63-
hidden: true
64-
ai_sentiment_inference_service_api_key:
65-
default: ""
66-
secret: true
67-
ai_sentiment_models:
68-
type: list
69-
list_type: compact
70-
default: "emotion|sentiment"
71-
allow_any: false
72-
choices:
73-
- sentiment
74-
- emotion
61+
json_schema: DiscourseAi::Sentiment::SentimentSiteSettingJsonSchema
7562

7663
ai_nsfw_detection_enabled:
7764
default: false
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# frozen_string_literal: true
2+
class MigrateSentimentClassificationResultFormat < ActiveRecord::Migration[7.1]
3+
def up
4+
DB.exec(<<~SQL)
5+
UPDATE classification_results
6+
SET
7+
model_used = 'cardiffnlp/twitter-roberta-base-sentiment-latest',
8+
classification = jsonb_build_object(
9+
'neutral', (classification->>'neutral')::float / 100,
10+
'negative', (classification->>'negative')::float / 100,
11+
'positive', (classification->>'positive')::float / 100
12+
)
13+
WHERE model_used = 'sentiment';
14+
15+
UPDATE classification_results
16+
SET
17+
model_used = 'j-hartmann/emotion-english-distilroberta-base',
18+
classification = jsonb_build_object(
19+
'sadness', (classification->>'sadness')::float / 100,
20+
'surprise', (classification->>'surprise')::float / 100,
21+
'fear', (classification->>'fear')::float / 100,
22+
'anger', (classification->>'anger')::float / 100,
23+
'joy', (classification->>'joy')::float / 100,
24+
'disgust', (classification->>'disgust')::float / 100,
25+
'neutral', (classification->>'neutral')::float / 100
26+
)
27+
WHERE model_used = 'emotion';
28+
SQL
29+
end
30+
end

lib/inference/hugging_face_text_embeddings.rb

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,29 @@ def rerank(content, candidates)
5858
JSON.parse(response.body, symbolize_names: true)
5959
end
6060

61+
def classify(content, model_config)
62+
headers = { "Referer" => Discourse.base_url, "Content-Type" => "application/json" }
63+
headers["X-API-KEY"] = model_config.api_key
64+
headers["Authorization"] = "Bearer #{model_config.api_key}"
65+
66+
body = { inputs: content, truncate: true }.to_json
67+
68+
api_endpoint = model_config.endpoint
69+
if api_endpoint.present? && api_endpoint.start_with?("srv://")
70+
service = DiscourseAi::Utils::DnsSrv.lookup(api_endpoint.delete_prefix("srv://"))
71+
api_endpoint = "https://#{service.target}:#{service.port}"
72+
end
73+
74+
conn = Faraday.new { |f| f.adapter FinalDestination::FaradayAdapter }
75+
response = conn.post(api_endpoint, body, headers)
76+
77+
if response.status != 200
78+
raise Net::HTTPBadResponse.new("Status: #{response.status}\n\n#{response.body}")
79+
end
80+
81+
JSON.parse(response.body, symbolize_names: true)
82+
end
83+
6184
def reranker_configured?
6285
SiteSetting.ai_hugging_face_tei_reranker_endpoint.present? ||
6386
SiteSetting.ai_hugging_face_tei_reranker_endpoint_srv.present?

lib/sentiment/entry_point.rb

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,11 @@ def inject_into(plugin)
1616

1717
plugin.add_report("overall_sentiment") do |report|
1818
report.modes = [:stacked_chart]
19-
threshold = 60
19+
threshold = 0.6
2020

2121
sentiment_count_sql = Proc.new { |sentiment| <<~SQL }
2222
COUNT(
23-
CASE WHEN (cr.classification::jsonb->'#{sentiment}')::integer > :threshold THEN 1 ELSE NULL END
23+
CASE WHEN (cr.classification::jsonb->'#{sentiment}')::float > :threshold THEN 1 ELSE NULL END
2424
) AS #{sentiment}_count
2525
SQL
2626

@@ -39,7 +39,7 @@ def inject_into(plugin)
3939
WHERE
4040
t.archetype = 'regular' AND
4141
p.user_id > 0 AND
42-
cr.model_used = 'sentiment' AND
42+
cr.model_used = 'cardiffnlp/twitter-roberta-base-sentiment-latest' AND
4343
(p.created_at > :report_start AND p.created_at < :report_end)
4444
GROUP BY DATE_TRUNC('day', p.created_at)
4545
SQL
@@ -68,11 +68,11 @@ def inject_into(plugin)
6868

6969
plugin.add_report("post_emotion") do |report|
7070
report.modes = [:stacked_line_chart]
71-
threshold = 30
71+
threshold = 0.3
7272

7373
emotion_count_clause = Proc.new { |emotion| <<~SQL }
7474
COUNT(
75-
CASE WHEN (cr.classification::jsonb->'#{emotion}')::integer > :threshold THEN 1 ELSE NULL END
75+
CASE WHEN (cr.classification::jsonb->'#{emotion}')::float > :threshold THEN 1 ELSE NULL END
7676
) AS #{emotion}_count
7777
SQL
7878

@@ -96,7 +96,7 @@ def inject_into(plugin)
9696
WHERE
9797
t.archetype = 'regular' AND
9898
p.user_id > 0 AND
99-
cr.model_used = 'emotion' AND
99+
cr.model_used = 'j-hartmann/emotion-english-distilroberta-base' AND
100100
(p.created_at > :report_start AND p.created_at < :report_end)
101101
GROUP BY DATE_TRUNC('day', p.created_at)
102102
SQL

lib/sentiment/sentiment_classification.rb

Lines changed: 15 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -7,17 +7,17 @@ def type
77
:sentiment
88
end
99

10-
def available_models
11-
SiteSetting.ai_sentiment_models.split("|")
10+
def available_classifiers
11+
DiscourseAi::Sentiment::SentimentSiteSettingJsonSchema.values
1212
end
1313

1414
def can_classify?(target)
1515
content_of(target).present?
1616
end
1717

1818
def get_verdicts(_)
19-
available_models.reduce({}) do |memo, model|
20-
memo[model] = false
19+
available_classifiers.reduce({}) do |memo, model|
20+
memo[model.model_name] = false
2121
memo
2222
end
2323
end
@@ -30,21 +30,23 @@ def should_flag_based_on?(_verdicts)
3030
def request(target_to_classify)
3131
target_content = content_of(target_to_classify)
3232

33-
available_models.reduce({}) do |memo, model|
34-
memo[model] = request_with(model, target_content)
33+
available_classifiers.reduce({}) do |memo, model|
34+
memo[model.model_name] = request_with(target_content, model)
3535
memo
3636
end
3737
end
3838

39+
def transform_result(result)
40+
hash_result = {}
41+
result.each { |r| hash_result[r[:label]] = r[:score] }
42+
hash_result
43+
end
44+
3945
private
4046

41-
def request_with(model, content)
42-
::DiscourseAi::Inference::DiscourseClassifier.perform!(
43-
"#{endpoint}/api/v1/classify",
44-
model,
45-
content,
46-
SiteSetting.ai_sentiment_inference_service_api_key,
47-
)
47+
def request_with(content, model_config)
48+
result = ::DiscourseAi::Inference::HuggingFaceTextEmbeddings.classify(content, model_config)
49+
transform_result(result)
4850
end
4951

5052
def content_of(target_to_classify)
@@ -57,18 +59,6 @@ def content_of(target_to_classify)
5759

5860
Tokenizer::BertTokenizer.truncate(content, 512)
5961
end
60-
61-
def endpoint
62-
if SiteSetting.ai_sentiment_inference_service_api_endpoint_srv.present?
63-
service =
64-
DiscourseAi::Utils::DnsSrv.lookup(
65-
SiteSetting.ai_sentiment_inference_service_api_endpoint_srv,
66-
)
67-
"https://#{service.target}:#{service.port}"
68-
else
69-
SiteSetting.ai_sentiment_inference_service_api_endpoint
70-
end
71-
end
7262
end
7363
end
7464
end
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# frozen_string_literal: true
2+
3+
module DiscourseAi
4+
module Sentiment
5+
class SentimentSiteSettingJsonSchema
6+
def self.schema
7+
@schema ||= {
8+
type: "array",
9+
items: {
10+
type: "object",
11+
format: "table",
12+
title: "model",
13+
properties: {
14+
model_name: {
15+
type: "string",
16+
},
17+
endpoint: {
18+
type: "string",
19+
},
20+
api_key: {
21+
type: "string",
22+
},
23+
},
24+
required: %w[model_name endpoint api_key],
25+
},
26+
}
27+
end
28+
29+
def self.values
30+
JSON.parse(SiteSetting.ai_sentiment_model_configs, object_class: OpenStruct)
31+
end
32+
end
33+
end
34+
end
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
# frozen_string_literal: true
2+
3+
require "rails_helper"
4+
require Rails.root.join(
5+
"plugins/discourse-ai/db/post_migrate/20241031041242_migrate_sentiment_classification_result_format",
6+
)
7+
8+
RSpec.describe MigrateSentimentClassificationResultFormat do
9+
let(:connection) { ActiveRecord::Base.connection }
10+
11+
before { connection.execute(<<~SQL) }
12+
INSERT INTO classification_results (model_used, classification, created_at, updated_at) VALUES
13+
('sentiment', '{"neutral": 65, "negative": 20, "positive": 14}', NOW(), NOW()),
14+
('emotion', '{"sadness": 10, "surprise": 15, "fear": 5, "anger": 20, "joy": 30, "disgust": 8, "neutral": 10}', NOW(), NOW());
15+
SQL
16+
17+
after { connection.execute("DELETE FROM classification_results") }
18+
19+
describe "#up" do
20+
before { described_class.new.up }
21+
22+
it "migrates sentiment classifications correctly" do
23+
sentiment_result = connection.execute(<<~SQL).first
24+
SELECT * FROM classification_results
25+
WHERE model_used = 'cardiffnlp/twitter-roberta-base-sentiment-latest';
26+
SQL
27+
28+
expected_sentiment = { "neutral" => 0.65, "negative" => 0.20, "positive" => 0.14 }
29+
30+
expect(JSON.parse(sentiment_result["classification"])).to eq(expected_sentiment)
31+
end
32+
33+
it "migrates emotion classifications correctly" do
34+
emotion_result = connection.execute(<<~SQL).first
35+
SELECT * FROM classification_results
36+
WHERE model_used = 'j-hartmann/emotion-english-distilroberta-base';
37+
SQL
38+
39+
expected_emotion = {
40+
"sadness" => 0.10,
41+
"surprise" => 0.15,
42+
"fear" => 0.05,
43+
"anger" => 0.20,
44+
"joy" => 0.30,
45+
"disgust" => 0.08,
46+
"neutral" => 0.10,
47+
}
48+
49+
expect(JSON.parse(emotion_result["classification"])).to eq(expected_emotion)
50+
end
51+
end
52+
end

spec/fabricators/classification_result_fabricator.rb

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,13 @@
66
end
77

88
Fabricator(:sentiment_classification, from: :classification_result) do
9-
model_used "sentiment"
10-
classification { { negative: 72, neutral: 23, positive: 4 } }
9+
model_used "cardiffnlp/twitter-roberta-base-sentiment-latest"
10+
classification { { negative: 0.72, neutral: 0.23, positive: 0.4 } }
1111
end
1212

1313
Fabricator(:emotion_classification, from: :classification_result) do
14-
model_used "emotion"
15-
classification { { negative: 72, neutral: 23, positive: 4 } }
14+
model_used "j-hartmann/emotion-english-distilroberta-base"
15+
classification do
16+
{ sadness: 0.72, surprise: 0.23, fear: 0.4, anger: 0.87, joy: 0.22, disgust: 0.70 }
17+
end
1618
end

spec/lib/modules/sentiment/entry_point_spec.rb

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -53,16 +53,19 @@
5353
end
5454

5555
describe "custom reports" do
56-
before { SiteSetting.ai_sentiment_inference_service_api_endpoint = "http://test.com" }
56+
before do
57+
SiteSetting.ai_sentiment_model_configs =
58+
"[{\"model_name\":\"SamLowe/roberta-base-go_emotions\",\"endpoint\":\"http://samlowe-emotion.com\",\"api_key\":\"123\"},{\"model_name\":\"j-hartmann/emotion-english-distilroberta-base\",\"endpoint\":\"http://jhartmann-emotion.com\",\"api_key\":\"123\"},{\"model_name\":\"cardiffnlp/twitter-roberta-base-sentiment-latest\",\"endpoint\":\"http://cardiffnlp-sentiment.com\",\"api_key\":\"123\"}]"
59+
end
5760

5861
fab!(:pm) { Fabricate(:private_message_post) }
5962

6063
fab!(:post_1) { Fabricate(:post) }
6164
fab!(:post_2) { Fabricate(:post) }
6265

6366
describe "overall_sentiment report" do
64-
let(:positive_classification) { { negative: 2, neutral: 30, positive: 70 } }
65-
let(:negative_classification) { { negative: 65, neutral: 2, positive: 10 } }
67+
let(:positive_classification) { { negative: 0.2, neutral: 0.3, positive: 0.7 } }
68+
let(:negative_classification) { { negative: 0.65, neutral: 0.2, positive: 0.1 } }
6669

6770
def sentiment_classification(post, classification)
6871
Fabricate(:sentiment_classification, target: post, classification: classification)
@@ -84,12 +87,28 @@ def sentiment_classification(post, classification)
8487

8588
describe "post_emotion report" do
8689
let(:emotion_1) do
87-
{ sadness: 49, surprise: 23, neutral: 6, fear: 34, anger: 87, joy: 22, disgust: 70 }
90+
{
91+
sadness: 0.49,
92+
surprise: 0.23,
93+
neutral: 0.6,
94+
fear: 0.34,
95+
anger: 0.87,
96+
joy: 0.22,
97+
disgust: 0.70,
98+
}
8899
end
89100
let(:emotion_2) do
90-
{ sadness: 19, surprise: 63, neutral: 45, fear: 44, anger: 27, joy: 62, disgust: 30 }
101+
{
102+
sadness: 0.19,
103+
surprise: 0.63,
104+
neutral: 0.45,
105+
fear: 0.44,
106+
anger: 0.27,
107+
joy: 0.62,
108+
disgust: 0.30,
109+
}
91110
end
92-
let(:model_used) { "emotion" }
111+
let(:model_used) { "j-hartmann/emotion-english-distilroberta-base" }
93112

94113
def emotion_classification(post, classification)
95114
Fabricate(
@@ -106,7 +125,7 @@ def strip_emoji_and_downcase(str)
106125
end
107126

108127
it "calculate averages using only public posts" do
109-
threshold = 30
128+
threshold = 0.30
110129

111130
emotion_classification(post_1, emotion_1)
112131
emotion_classification(post_2, emotion_2)

spec/lib/modules/sentiment/jobs/regular/post_sentiment_analysis_spec.rb

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@
88

99
before do
1010
SiteSetting.ai_sentiment_enabled = true
11-
SiteSetting.ai_sentiment_inference_service_api_endpoint = "http://test.com"
11+
SiteSetting.ai_sentiment_model_configs =
12+
"[{\"model_name\":\"SamLowe/roberta-base-go_emotions\",\"endpoint\":\"http://samlowe-emotion.com\",\"api_key\":\"123\"},{\"model_name\":\"j-hartmann/emotion-english-distilroberta-base\",\"endpoint\":\"http://jhartmann-emotion.com\",\"api_key\":\"123\"},{\"model_name\":\"cardiffnlp/twitter-roberta-base-sentiment-latest\",\"endpoint\":\"http://cardiffnlp-sentiment.com\",\"api_key\":\"123\"}]"
1213
end
1314

1415
describe "scenarios where we return early without doing anything" do
@@ -42,7 +43,8 @@
4243
end
4344

4445
it "successfully classifies the post" do
45-
expected_analysis = SiteSetting.ai_sentiment_models.split("|").length
46+
expected_analysis =
47+
DiscourseAi::Sentiment::SentimentClassification.new.available_classifiers.length
4648
SentimentInferenceStubs.stub_classification(post)
4749

4850
subject.execute({ post_id: post.id })

0 commit comments

Comments
 (0)