From 9f7de054b5565f303440b2f5741d42b6275b3c4a Mon Sep 17 00:00:00 2001 From: tomerqodo Date: Thu, 4 Dec 2025 22:41:33 +0200 Subject: [PATCH] Apply changes for benchmark PR --- .../automation/config/locales/server.en.yml | 4 + .../lib/discourse_automation/scripts.rb | 1 + ...remove_upload_markup_from_deleted_posts.rb | 49 +++++ plugins/automation/plugin.rb | 2 + ...e_upload_markup_from_deleted_posts_spec.rb | 174 ++++++++++++++++++ 5 files changed, 230 insertions(+) create mode 100644 plugins/automation/lib/discourse_automation/scripts/remove_upload_markup_from_deleted_posts.rb create mode 100644 plugins/automation/spec/scripts/remove_upload_markup_from_deleted_posts_spec.rb diff --git a/plugins/automation/config/locales/server.en.yml b/plugins/automation/config/locales/server.en.yml index 228cd33acb119..087b11a30d60c 100644 --- a/plugins/automation/config/locales/server.en.yml +++ b/plugins/automation/config/locales/server.en.yml @@ -102,6 +102,10 @@ en: title: Gift exchange description: Allows to anonymously pair users of a group to send each other a gift. doc: Gift exchange requires an existing group with at least 3 users. At the chosen date each users of the group will be paired with one gifter and one giftee. + remove_upload_markup_from_deleted_posts: + title: Remove upload markup in deleted posts + description: Acts as system user and adds a revision to deleted posts where the upload markup is removed. In conjunction with the 'clean up uploads' job, this results in the permanent deletion of uploads in deleted posts. Use with caution! + edit_reason: Automation removed upload markup from deleted post send_pms: title: Send pms description: Allows to send PMs (possibly delayed). diff --git a/plugins/automation/lib/discourse_automation/scripts.rb b/plugins/automation/lib/discourse_automation/scripts.rb index 208f4b319266f..20704f513bcad 100644 --- a/plugins/automation/lib/discourse_automation/scripts.rb +++ b/plugins/automation/lib/discourse_automation/scripts.rb @@ -14,6 +14,7 @@ module Scripts GROUP_CATEGORY_NOTIFICATION_DEFAULT = "group_category_notification_default" PIN_TOPIC = "pin_topic" POST = "post" + REMOVE_UPLOAD_MARKUP_FROM_DELETED_POSTS = "remove_upload_markup_from_deleted_posts" SEND_PMS = "send_pms" SUSPEND_USER_BY_EMAIL = "suspend_user_by_email" TOPIC = "topic" diff --git a/plugins/automation/lib/discourse_automation/scripts/remove_upload_markup_from_deleted_posts.rb b/plugins/automation/lib/discourse_automation/scripts/remove_upload_markup_from_deleted_posts.rb new file mode 100644 index 0000000000000..a9ccaec2d590f --- /dev/null +++ b/plugins/automation/lib/discourse_automation/scripts/remove_upload_markup_from_deleted_posts.rb @@ -0,0 +1,49 @@ +# frozen_string_literal: true + +# This script runs a revision to remove upload and attachment markup that are attached to posts that have been deleted. +# This automation is intended to be used with clean_up_uploads job. +# On the next clean_up_uploads job, uploads that are no longer referenced in any post will be deleted from the system. + +DiscourseAutomation::Scriptable.add( + DiscourseAutomation::Scripts::REMOVE_UPLOAD_MARKUP_FROM_DELETED_POSTS, +) do + version 1 + + triggerables %i[recurring point_in_time] + + script do |trigger, fields| + uploads_removed_at = Time.now + edit_reason = + I18n.t("discourse_automation.scriptables.remove_upload_markup_from_deleted_posts.edit_reason") + + # it matches both ![alt|size](upload://key) and [small.pdf|attachment](upload://key.pdf) (Number Bytes) + upload_and_attachment_regex = + %r{!?\[([^\]|]+)(?:\|[^\]]*)?\]\(upload://([A-Za-z0-9_-]+)[^)]*\)(?:\s*\([^)]*\))?} + + Post + .with_deleted + .where.not(deleted_at: nil) + .joins(:upload_references) + .where("upload_references.target_type = 'Post'") + .joins( + "LEFT JOIN post_custom_fields ON posts.id = post_custom_fields.post_id AND post_custom_fields.name = 'uploads_removed_at'", + ) + .where("post_custom_fields.post_id IS NULL") + .distinct + .limit(DiscourseAutomation::REMOVE_UPLOAD_MARKUP_FROM_DELETED_POSTS_BATCH_SIZE) + .each do |post| + if updated_raw = post.raw.gsub!(upload_and_attachment_regex, "") + if ok = + post.revise( + Discourse.system_user, + { raw: updated_raw, edit_reason: edit_reason }, + force_new_version: true, + skip_validations: true, + ) + post.custom_fields["uploads_removed_at"] = uploads_removed_at + post.save_custom_fields + end + end + end + end +end diff --git a/plugins/automation/plugin.rb b/plugins/automation/plugin.rb index 088ae8737a3e7..2046d3ba934f2 100644 --- a/plugins/automation/plugin.rb +++ b/plugins/automation/plugin.rb @@ -28,6 +28,7 @@ module ::DiscourseAutomation AUTO_RESPONDER_TRIGGERED_IDS = "auto_responder_triggered_ids_json" USER_GROUP_MEMBERSHIP_THROUGH_BADGE_BULK_MODIFY_START_COUNT = 1000 + REMOVE_UPLOAD_MARKUP_FROM_DELETED_POSTS_BATCH_SIZE = 1000 def self.set_active_automation(id) Thread.current[:active_automation_id] = id @@ -55,6 +56,7 @@ def self.get_active_automation lib/discourse_automation/scripts/group_category_notification_default lib/discourse_automation/scripts/pin_topic lib/discourse_automation/scripts/post + lib/discourse_automation/scripts/remove_upload_markup_from_deleted_posts lib/discourse_automation/scripts/topic lib/discourse_automation/scripts/send_pms lib/discourse_automation/scripts/suspend_user_by_email diff --git a/plugins/automation/spec/scripts/remove_upload_markup_from_deleted_posts_spec.rb b/plugins/automation/spec/scripts/remove_upload_markup_from_deleted_posts_spec.rb new file mode 100644 index 0000000000000..326e620bfc37f --- /dev/null +++ b/plugins/automation/spec/scripts/remove_upload_markup_from_deleted_posts_spec.rb @@ -0,0 +1,174 @@ +# frozen_string_literal: true + +describe "RemoveUploadMarkupFromDeletedPosts" do + fab!(:topic) + fab!(:upload) + + fab!(:filename) { "small.pdf" } + fab!(:file) { file_from_fixtures(filename, "pdf") } + fab!(:file_upload) do + UploadCreator.new(file, filename, { skip_validations: true }).create_for( + Discourse.system_user.id, + ) + end + let!(:raw) do + "Hey it is a regular post with a link to [Discourse](https://www.discourse.org) and a #{upload.to_markdown} #{file_upload.to_markdown}" + end + + let!(:post) { Fabricate(:post, topic: topic, raw: raw) } + let!(:deleted_post) { Fabricate(:post, topic: topic, raw: raw, deleted_at: 1.month.ago) } + + let!(:upload_reference) { Fabricate(:upload_reference, upload: upload, target: post) } + let!(:deleted_upload_reference) do + Fabricate(:upload_reference, upload: upload, target: deleted_post) + end + + let!(:file_upload_reference) { Fabricate(:upload_reference, upload: file_upload, target: post) } + let!(:deleted_file_upload_reference) do + Fabricate(:upload_reference, upload: file_upload, target: deleted_post) + end + + before { SiteSetting.discourse_automation_enabled = true } + + context "when using recurring trigger" do + fab!(:automation) do + Fabricate( + :automation, + script: DiscourseAutomation::Scripts::REMOVE_UPLOAD_MARKUP_FROM_DELETED_POSTS, + trigger: DiscourseAutomation::Triggers::RECURRING, + ) + end + + it "removes uploads from deleted posts" do + expect { + automation.trigger! + deleted_post.reload + }.to change { deleted_post.raw }.from(raw).to( + "Hey it is a regular post with a link to [Discourse](https://www.discourse.org) and a", + ) + + expect(post.raw).to eq(raw) + end + + it "does not remove uploads from non-deleted posts" do + expect { + automation.trigger! + post.reload + }.to_not change { post.raw } + + expect(post.custom_fields["uploads_removed_at"]).to be_nil + expect(upload_reference.reload).to be_present + expect(file_upload_reference.reload).to be_present + end + + it "adds a timestamp to the custom field uploads_removed_at" do + expect { + automation.trigger! + deleted_post.reload + }.to change { deleted_post.custom_fields["uploads_removed_at"] }.from(nil) + + expect(post.custom_fields["uploads_removed_at"]).to be_nil + end + + it "does not run again on posts that have already had uploads removed" do + deleted_post.custom_fields["uploads_removed_at"] = 1.day.ago + deleted_post.save_custom_fields + automation.trigger! + deleted_post.reload + expect(deleted_post.raw).to eq(raw) + end + + context "with clean_up_uploads job" do + fab!(:old_upload) { Fabricate(:upload, created_at: 2.days.ago) } + let!(:old_post_raw) do + "Hey it is a regular post with a link to [Discourse](https://www.discourse.org) and a #{old_upload.to_markdown}" + end + + let!(:old_post) { Fabricate(:post, topic: topic, raw: old_post_raw, deleted_at: 1.month.ago) } + let!(:old_upload_reference) do + Fabricate(:upload_reference, upload: old_upload, target: old_post) + end + + before do + SiteSetting.clean_up_uploads = true + SiteSetting.clean_orphan_uploads_grace_period_hours = 1 + end + + it "allows uploads to be deleted" do + automation.trigger! + old_post.reload + + expect { Jobs::CleanUpUploads.new.execute(nil) }.to change { + Upload.exists?(old_upload.id) + }.from(true).to(false) + + expect(UploadReference.exists?(old_upload_reference.id)).to be_falsey + expect(old_post.raw).to eq( + "Hey it is a regular post with a link to [Discourse](https://www.discourse.org) and a", + ) + end + + it "requires automation to remove upload references first" do + old_post.reload + + expect { Jobs::CleanUpUploads.new.execute(nil) }.to_not change { + Upload.exists?(old_upload.id) + }.from(true) + + expect(UploadReference.exists?(old_upload_reference.id)).to be_truthy + end + end + end + + context "when using point_in_time trigger" do + fab!(:automation) do + Fabricate( + :automation, + script: DiscourseAutomation::Scripts::REMOVE_UPLOAD_MARKUP_FROM_DELETED_POSTS, + trigger: DiscourseAutomation::Triggers::POINT_IN_TIME, + ) + end + + before do + automation.upsert_field!( + "execute_at", + "date_time", + { value: 3.hours.from_now }, + target: "trigger", + ) + end + + it "removes uploads from deleted posts" do + freeze_time 6.hours.from_now do + expect { + Jobs::DiscourseAutomation::Tracker.new.execute + deleted_post.reload + }.to change { deleted_post.raw }.from(raw).to( + "Hey it is a regular post with a link to [Discourse](https://www.discourse.org) and a", + ) + + expect(post.raw).to eq(raw) + end + end + + it "does not remove uploads from non-deleted posts" do + expect { + automation.trigger! + post.reload + }.to_not change { post.raw } + + expect(post.custom_fields["uploads_removed_at"]).to be_nil + expect(upload_reference.reload).to be_present + expect(file_upload_reference.reload).to be_present + end + + it "adds a timestamp to the custom field uploads_removed_at" do + expect { + automation.trigger! + deleted_post.reload + }.to change { deleted_post.custom_fields["uploads_removed_at"] }.from(nil) + + expect(post.custom_fields["uploads_removed_at"]).to be_nil + end + end +end