diff --git a/plugin.rb b/plugin.rb index 0f379ab8..d7f8205f 100644 --- a/plugin.rb +++ b/plugin.rb @@ -17,6 +17,7 @@ module ::DiscourseTranslator PLUGIN_NAME = "discourse_translator".freeze DETECTED_LANG_CUSTOM_FIELD = "post_detected_lang".freeze TRANSLATED_CUSTOM_FIELD = "translated_text".freeze + LANG_DETECT_NEEDED = "lang_detect_needed".freeze autoload :Microsoft, "#{Rails.root}/plugins/discourse-translator/services/discourse_translator/microsoft" @@ -134,9 +135,44 @@ def execute(args) end end end - end - on(:post_process) { |post| Jobs.enqueue(:detect_translation, post_id: post.id) } + class DetectPostsTranslation < ::Jobs::Scheduled + sidekiq_options retry: false + every 5.minutes + + BATCH_SIZE = 100 + MAX_QUEUE_SIZE = 1000 + + def execute(args) + return unless SiteSetting.translator_enabled + + post_ids = Discourse.redis.spop(DiscourseTranslator::LANG_DETECT_NEEDED, MAX_QUEUE_SIZE) + return if post_ids.blank? + + post_ids.each_slice(BATCH_SIZE) { |batch| process_batch(batch) } + end + + private + + def process_batch(post_ids) + posts = Post.where(id: post_ids).to_a + posts.each do |post| + DistributedMutex.synchronize("detect_translation_#{post.id}") do + begin + translator = "DiscourseTranslator::#{SiteSetting.translator}".constantize + translator.detect(post) + if !post.custom_fields_clean? + post.save_custom_fields + post.publish_change_to_clients!(:revised) + end + rescue ::DiscourseTranslator::ProblemCheckedTranslationError + # problem-checked translation errors gracefully + end + end + end + end + end + end topic_view_post_custom_fields_allowlister { [::DiscourseTranslator::DETECTED_LANG_CUSTOM_FIELD] } @@ -160,7 +196,7 @@ def execute(args) detected_lang = post_custom_fields[::DiscourseTranslator::DETECTED_LANG_CUSTOM_FIELD] if !detected_lang - Jobs.enqueue(:detect_translation, post_id: object.id) + Discourse.redis.sadd?(DiscourseTranslator::LANG_DETECT_NEEDED, object.id) false else detected_lang != diff --git a/spec/jobs/detect_posts_translation_spec.rb b/spec/jobs/detect_posts_translation_spec.rb new file mode 100644 index 00000000..eb578659 --- /dev/null +++ b/spec/jobs/detect_posts_translation_spec.rb @@ -0,0 +1,82 @@ +# frozen_string_literal: true + +require "aws-sdk-translate" + +describe Jobs::DetectPostsTranslation do + fab!(:posts) { Fabricate.times(5, :post) } + let(:redis_key) { DiscourseTranslator::LANG_DETECT_NEEDED } + + before do + SiteSetting.translator_enabled = true + SiteSetting.translator = "Amazon" + client = Aws::Translate::Client.new(stub_responses: true) + client.stub_responses( + :translate_text, + { translated_text: "大丈夫", source_language_code: "en", target_language_code: "jp" }, + ) + Aws::Translate::Client.stubs(:new).returns(client) + posts.each { |post| Discourse.redis.sadd?(redis_key, post.id) } + end + + it "processes posts in batches and updates their translations" do + described_class.new.execute({}) + + posts.each do |post| + post.reload + expect(post.custom_fields[DiscourseTranslator::DETECTED_LANG_CUSTOM_FIELD]).not_to be_nil + end + + expect(Discourse.redis.smembers(redis_key)).to be_empty + end + + it "does not process posts if the translator is disabled" do + SiteSetting.translator_enabled = false + described_class.new.execute({}) + + posts.each do |post| + post.reload + expect(post.custom_fields[DiscourseTranslator::DETECTED_LANG_CUSTOM_FIELD]).to be_nil + end + + expect(Discourse.redis.smembers(redis_key)).to match_array(posts.map(&:id).map(&:to_s)) + end + + it "processes a maximum of MAX_QUEUE_SIZE posts per run" do + large_number = 2000 + large_number.times { |i| Discourse.redis.sadd?(redis_key, i + 1) } + described_class.new.execute({}) + + remaining = Discourse.redis.scard(redis_key) + expect(remaining).to eq(large_number - Jobs::DetectPostsTranslation::MAX_QUEUE_SIZE) + end + + it "handles an empty Redis queue gracefully" do + Discourse.redis.del(redis_key) + expect { described_class.new.execute({}) }.not_to raise_error + end + + it "removes successfully processed posts from Redis" do + described_class.new.execute({}) + + posts.each { |post| expect(Discourse.redis.sismember(redis_key, post.id)).to be_falsey } + end + + it "skips posts that no longer exist" do + non_existent_post_id = -1 + Discourse.redis.sadd?(redis_key, non_existent_post_id) + + expect { described_class.new.execute({}) }.not_to raise_error + + expect(Discourse.redis.sismember(redis_key, non_existent_post_id)).to be_falsey + end + + it "ensures posts are processed within a distributed mutex" do + allow(DistributedMutex).to receive(:synchronize).and_yield + + described_class.new.execute({}) + + posts.each do |post| + expect(DistributedMutex).to have_received(:synchronize).with("detect_translation_#{post.id}") + end + end +end diff --git a/spec/models/post_spec.rb b/spec/models/post_spec.rb index 16c86c90..41902926 100644 --- a/spec/models/post_spec.rb +++ b/spec/models/post_spec.rb @@ -3,6 +3,8 @@ require "rails_helper" RSpec.describe Post do + before { SiteSetting.translator_enabled = true } + describe "translator custom fields" do let(:post) do Fabricate( @@ -17,10 +19,6 @@ ) end - before { SiteSetting.translator_enabled = true } - - after { SiteSetting.translator_enabled = false } - it "should reset custom fields when post has been updated" do post.update!(raw: "this is an updated post") diff --git a/spec/serializers/post_serializer_spec.rb b/spec/serializers/post_serializer_spec.rb index 541aeada..928c3ae5 100644 --- a/spec/serializers/post_serializer_spec.rb +++ b/spec/serializers/post_serializer_spec.rb @@ -77,10 +77,13 @@ expect(serializer.can_translate).to eq(false) end - it "enqueues detect translation job" do + it "adds post id to redis if detected_language is blank" do + post.custom_fields["detected_language"] = nil + post.save_custom_fields + expect { serializer.can_translate }.to change { - Jobs::DetectTranslation.jobs.size - }.by(1) + Discourse.redis.sismember(DiscourseTranslator::LANG_DETECT_NEEDED, post.id) + }.from(false).to(true) end end