Skip to content
This repository was archived by the owner on Jul 22, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 2 additions & 15 deletions config/settings.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,22 +56,9 @@ discourse_ai:
ai_sentiment_enabled:
default: false
client: true
ai_sentiment_inference_service_api_endpoint:
default: "https://sentiment-testing.demo-by-discourse.com"
ai_sentiment_inference_service_api_endpoint_srv:
ai_sentiment_model_configs:
default: ""
hidden: true
ai_sentiment_inference_service_api_key:
default: ""
secret: true
ai_sentiment_models:
type: list
list_type: compact
default: "emotion|sentiment"
allow_any: false
choices:
- sentiment
- emotion
json_schema: DiscourseAi::Sentiment::SentimentSiteSettingJsonSchema

ai_nsfw_detection_enabled:
default: false
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# frozen_string_literal: true
class MigrateSentimentClassificationResultFormat < ActiveRecord::Migration[7.1]
def up
DB.exec(<<~SQL)
UPDATE classification_results
SET
model_used = 'cardiffnlp/twitter-roberta-base-sentiment-latest',
classification = jsonb_build_object(
'neutral', (classification->>'neutral')::float / 100,
'negative', (classification->>'negative')::float / 100,
'positive', (classification->>'positive')::float / 100
)
WHERE model_used = 'sentiment';

UPDATE classification_results
SET
model_used = 'j-hartmann/emotion-english-distilroberta-base',
classification = jsonb_build_object(
'sadness', (classification->>'sadness')::float / 100,
'surprise', (classification->>'surprise')::float / 100,
'fear', (classification->>'fear')::float / 100,
'anger', (classification->>'anger')::float / 100,
'joy', (classification->>'joy')::float / 100,
'disgust', (classification->>'disgust')::float / 100,
'neutral', (classification->>'neutral')::float / 100
)
WHERE model_used = 'emotion';
SQL
end
end
23 changes: 23 additions & 0 deletions lib/inference/hugging_face_text_embeddings.rb
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,29 @@ def rerank(content, candidates)
JSON.parse(response.body, symbolize_names: true)
end

def classify(content, model_config)
headers = { "Referer" => Discourse.base_url, "Content-Type" => "application/json" }
headers["X-API-KEY"] = model_config.api_key
headers["Authorization"] = "Bearer #{model_config.api_key}"

body = { inputs: content, truncate: true }.to_json

api_endpoint = model_config.endpoint
if api_endpoint.present? && api_endpoint.start_with?("srv://")
service = DiscourseAi::Utils::DnsSrv.lookup(api_endpoint.delete_prefix("srv://"))
api_endpoint = "https://#{service.target}:#{service.port}"
end

conn = Faraday.new { |f| f.adapter FinalDestination::FaradayAdapter }
response = conn.post(api_endpoint, body, headers)

if response.status != 200
raise Net::HTTPBadResponse.new("Status: #{response.status}\n\n#{response.body}")
end

JSON.parse(response.body, symbolize_names: true)
end

def reranker_configured?
SiteSetting.ai_hugging_face_tei_reranker_endpoint.present? ||
SiteSetting.ai_hugging_face_tei_reranker_endpoint_srv.present?
Expand Down
12 changes: 6 additions & 6 deletions lib/sentiment/entry_point.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@ def inject_into(plugin)

plugin.add_report("overall_sentiment") do |report|
report.modes = [:stacked_chart]
threshold = 60
threshold = 0.6

sentiment_count_sql = Proc.new { |sentiment| <<~SQL }
COUNT(
CASE WHEN (cr.classification::jsonb->'#{sentiment}')::integer > :threshold THEN 1 ELSE NULL END
CASE WHEN (cr.classification::jsonb->'#{sentiment}')::float > :threshold THEN 1 ELSE NULL END
) AS #{sentiment}_count
SQL

Expand All @@ -39,7 +39,7 @@ def inject_into(plugin)
WHERE
t.archetype = 'regular' AND
p.user_id > 0 AND
cr.model_used = 'sentiment' AND
cr.model_used = 'cardiffnlp/twitter-roberta-base-sentiment-latest' AND
(p.created_at > :report_start AND p.created_at < :report_end)
GROUP BY DATE_TRUNC('day', p.created_at)
SQL
Expand Down Expand Up @@ -68,11 +68,11 @@ def inject_into(plugin)

plugin.add_report("post_emotion") do |report|
report.modes = [:stacked_line_chart]
threshold = 30
threshold = 0.3

emotion_count_clause = Proc.new { |emotion| <<~SQL }
COUNT(
CASE WHEN (cr.classification::jsonb->'#{emotion}')::integer > :threshold THEN 1 ELSE NULL END
CASE WHEN (cr.classification::jsonb->'#{emotion}')::float > :threshold THEN 1 ELSE NULL END
) AS #{emotion}_count
SQL

Expand All @@ -96,7 +96,7 @@ def inject_into(plugin)
WHERE
t.archetype = 'regular' AND
p.user_id > 0 AND
cr.model_used = 'emotion' AND
cr.model_used = 'j-hartmann/emotion-english-distilroberta-base' AND
(p.created_at > :report_start AND p.created_at < :report_end)
GROUP BY DATE_TRUNC('day', p.created_at)
SQL
Expand Down
40 changes: 15 additions & 25 deletions lib/sentiment/sentiment_classification.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,17 @@ def type
:sentiment
end

def available_models
SiteSetting.ai_sentiment_models.split("|")
def available_classifiers
DiscourseAi::Sentiment::SentimentSiteSettingJsonSchema.values
end

def can_classify?(target)
content_of(target).present?
end

def get_verdicts(_)
available_models.reduce({}) do |memo, model|
memo[model] = false
available_classifiers.reduce({}) do |memo, model|
memo[model.model_name] = false
memo
end
end
Expand All @@ -30,21 +30,23 @@ def should_flag_based_on?(_verdicts)
def request(target_to_classify)
target_content = content_of(target_to_classify)

available_models.reduce({}) do |memo, model|
memo[model] = request_with(model, target_content)
available_classifiers.reduce({}) do |memo, model|
memo[model.model_name] = request_with(target_content, model)
memo
end
end

def transform_result(result)
hash_result = {}
result.each { |r| hash_result[r[:label]] = r[:score] }
hash_result
end

private

def request_with(model, content)
::DiscourseAi::Inference::DiscourseClassifier.perform!(
"#{endpoint}/api/v1/classify",
model,
content,
SiteSetting.ai_sentiment_inference_service_api_key,
)
def request_with(content, model_config)
result = ::DiscourseAi::Inference::HuggingFaceTextEmbeddings.classify(content, model_config)
transform_result(result)
end

def content_of(target_to_classify)
Expand All @@ -57,18 +59,6 @@ def content_of(target_to_classify)

Tokenizer::BertTokenizer.truncate(content, 512)
end

def endpoint
if SiteSetting.ai_sentiment_inference_service_api_endpoint_srv.present?
service =
DiscourseAi::Utils::DnsSrv.lookup(
SiteSetting.ai_sentiment_inference_service_api_endpoint_srv,
)
"https://#{service.target}:#{service.port}"
else
SiteSetting.ai_sentiment_inference_service_api_endpoint
end
end
end
end
end
34 changes: 34 additions & 0 deletions lib/sentiment/sentiment_site_setting_json_schema.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# frozen_string_literal: true

module DiscourseAi
module Sentiment
class SentimentSiteSettingJsonSchema
def self.schema
@schema ||= {
type: "array",
items: {
type: "object",
format: "table",
title: "model",
properties: {
model_name: {
type: "string",
},
endpoint: {
type: "string",
},
api_key: {
type: "string",
},
},
required: %w[model_name endpoint api_key],
},
}
end

def self.values
JSON.parse(SiteSetting.ai_sentiment_model_configs, object_class: OpenStruct)
end
end
end
end
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# frozen_string_literal: true

require "rails_helper"
require Rails.root.join(
"plugins/discourse-ai/db/post_migrate/20241031041242_migrate_sentiment_classification_result_format",
)

RSpec.describe MigrateSentimentClassificationResultFormat do
let(:connection) { ActiveRecord::Base.connection }

before { connection.execute(<<~SQL) }
INSERT INTO classification_results (model_used, classification, created_at, updated_at) VALUES
('sentiment', '{"neutral": 65, "negative": 20, "positive": 14}', NOW(), NOW()),
('emotion', '{"sadness": 10, "surprise": 15, "fear": 5, "anger": 20, "joy": 30, "disgust": 8, "neutral": 10}', NOW(), NOW());
SQL

after { connection.execute("DELETE FROM classification_results") }

describe "#up" do
before { described_class.new.up }

it "migrates sentiment classifications correctly" do
sentiment_result = connection.execute(<<~SQL).first
SELECT * FROM classification_results
WHERE model_used = 'cardiffnlp/twitter-roberta-base-sentiment-latest';
SQL

expected_sentiment = { "neutral" => 0.65, "negative" => 0.20, "positive" => 0.14 }

expect(JSON.parse(sentiment_result["classification"])).to eq(expected_sentiment)
end

it "migrates emotion classifications correctly" do
emotion_result = connection.execute(<<~SQL).first
SELECT * FROM classification_results
WHERE model_used = 'j-hartmann/emotion-english-distilroberta-base';
SQL

expected_emotion = {
"sadness" => 0.10,
"surprise" => 0.15,
"fear" => 0.05,
"anger" => 0.20,
"joy" => 0.30,
"disgust" => 0.08,
"neutral" => 0.10,
}

expect(JSON.parse(emotion_result["classification"])).to eq(expected_emotion)
end
end
end
10 changes: 6 additions & 4 deletions spec/fabricators/classification_result_fabricator.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@
end

Fabricator(:sentiment_classification, from: :classification_result) do
model_used "sentiment"
classification { { negative: 72, neutral: 23, positive: 4 } }
model_used "cardiffnlp/twitter-roberta-base-sentiment-latest"
classification { { negative: 0.72, neutral: 0.23, positive: 0.4 } }
end

Fabricator(:emotion_classification, from: :classification_result) do
model_used "emotion"
classification { { negative: 72, neutral: 23, positive: 4 } }
model_used "j-hartmann/emotion-english-distilroberta-base"
classification do
{ sadness: 0.72, surprise: 0.23, fear: 0.4, anger: 0.87, joy: 0.22, disgust: 0.70 }
end
end
33 changes: 26 additions & 7 deletions spec/lib/modules/sentiment/entry_point_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -53,16 +53,19 @@
end

describe "custom reports" do
before { SiteSetting.ai_sentiment_inference_service_api_endpoint = "http://test.com" }
before do
SiteSetting.ai_sentiment_model_configs =
"[{\"model_name\":\"SamLowe/roberta-base-go_emotions\",\"endpoint\":\"http://samlowe-emotion.com\",\"api_key\":\"123\"},{\"model_name\":\"j-hartmann/emotion-english-distilroberta-base\",\"endpoint\":\"http://jhartmann-emotion.com\",\"api_key\":\"123\"},{\"model_name\":\"cardiffnlp/twitter-roberta-base-sentiment-latest\",\"endpoint\":\"http://cardiffnlp-sentiment.com\",\"api_key\":\"123\"}]"
end

fab!(:pm) { Fabricate(:private_message_post) }

fab!(:post_1) { Fabricate(:post) }
fab!(:post_2) { Fabricate(:post) }

describe "overall_sentiment report" do
let(:positive_classification) { { negative: 2, neutral: 30, positive: 70 } }
let(:negative_classification) { { negative: 65, neutral: 2, positive: 10 } }
let(:positive_classification) { { negative: 0.2, neutral: 0.3, positive: 0.7 } }
let(:negative_classification) { { negative: 0.65, neutral: 0.2, positive: 0.1 } }

def sentiment_classification(post, classification)
Fabricate(:sentiment_classification, target: post, classification: classification)
Expand All @@ -84,12 +87,28 @@ def sentiment_classification(post, classification)

describe "post_emotion report" do
let(:emotion_1) do
{ sadness: 49, surprise: 23, neutral: 6, fear: 34, anger: 87, joy: 22, disgust: 70 }
{
sadness: 0.49,
surprise: 0.23,
neutral: 0.6,
fear: 0.34,
anger: 0.87,
joy: 0.22,
disgust: 0.70,
}
end
let(:emotion_2) do
{ sadness: 19, surprise: 63, neutral: 45, fear: 44, anger: 27, joy: 62, disgust: 30 }
{
sadness: 0.19,
surprise: 0.63,
neutral: 0.45,
fear: 0.44,
anger: 0.27,
joy: 0.62,
disgust: 0.30,
}
end
let(:model_used) { "emotion" }
let(:model_used) { "j-hartmann/emotion-english-distilroberta-base" }

def emotion_classification(post, classification)
Fabricate(
Expand All @@ -106,7 +125,7 @@ def strip_emoji_and_downcase(str)
end

it "calculate averages using only public posts" do
threshold = 30
threshold = 0.30

emotion_classification(post_1, emotion_1)
emotion_classification(post_2, emotion_2)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@

before do
SiteSetting.ai_sentiment_enabled = true
SiteSetting.ai_sentiment_inference_service_api_endpoint = "http://test.com"
SiteSetting.ai_sentiment_model_configs =
"[{\"model_name\":\"SamLowe/roberta-base-go_emotions\",\"endpoint\":\"http://samlowe-emotion.com\",\"api_key\":\"123\"},{\"model_name\":\"j-hartmann/emotion-english-distilroberta-base\",\"endpoint\":\"http://jhartmann-emotion.com\",\"api_key\":\"123\"},{\"model_name\":\"cardiffnlp/twitter-roberta-base-sentiment-latest\",\"endpoint\":\"http://cardiffnlp-sentiment.com\",\"api_key\":\"123\"}]"
end

describe "scenarios where we return early without doing anything" do
Expand Down Expand Up @@ -42,7 +43,8 @@
end

it "successfully classifies the post" do
expected_analysis = SiteSetting.ai_sentiment_models.split("|").length
expected_analysis =
DiscourseAi::Sentiment::SentimentClassification.new.available_classifiers.length
SentimentInferenceStubs.stub_classification(post)

subject.execute({ post_id: post.id })
Expand Down
Loading