Skip to content
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
024f596
WIP: initial rake task for rich text link QA
rsmithlal Nov 24, 2024
d5ecc75
WIP: content link data type and rich text link metric
rsmithlal Nov 25, 2024
9038c08
Merge branch 'main' into feature/metrics/rich-text-links
rsmithlal Mar 7, 2025
ee7821b
Merge branch 'main' into feature/metrics/rich-text-links
rsmithlal Aug 7, 2025
4993dfd
Merge branch 'main' into feature/metrics/rich-text-links
rsmithlal Aug 29, 2025
3d1076f
Merge branch 'main' into feature/metrics/rich-text-links
rsmithlal Sep 2, 2025
e01bed8
Rubocop fixes
rsmithlal Sep 2, 2025
ce919ec
Rubocop fixes
rsmithlal Sep 2, 2025
067c5bf
Rubocop fixes
rsmithlal Sep 2, 2025
9f436a5
Rubocop fixes
rsmithlal Sep 2, 2025
44c6d45
Add RichText link checker functionality and related specs
rsmithlal Sep 2, 2025
81e586c
Add migration for better_together_metrics_rich_text_links and update …
rsmithlal Sep 2, 2025
be96825
Rubocop fixes
rsmithlal Sep 2, 2025
cec750e
Refactor RichText link handling and improve service documentation
rsmithlal Sep 2, 2025
b61f8d8
Refactor link processing logic in RichTextLinkIdentifier for clarity …
rsmithlal Sep 2, 2025
8156153
Rubocop fixes
rsmithlal Sep 2, 2025
fe419af
Add Link Checker report functionality with associated views, mailer, …
rsmithlal Sep 2, 2025
f04af71
Add scheduling and testing for daily link checker report functionality
rsmithlal Sep 2, 2025
583e4f7
Add Sidekiq scheduling for link checker and event reminder jobs, alon…
rsmithlal Sep 2, 2025
8e5911e
Implement Link Checker Reports functionality with CRUD operations, vi…
rsmithlal Sep 2, 2025
635d2c0
Rubocop fixes
rsmithlal Sep 2, 2025
a449930
Refactor tab navigation for metrics reports to improve accessibility …
rsmithlal Sep 2, 2025
8ec3b13
Refactor RSpec tests for ReportPORO and LinkCheckerReportsController …
rsmithlal Sep 2, 2025
ef124d9
Rubocop fixes
rsmithlal Sep 2, 2025
53f873a
Add migrations for rich text link associations and metrics link check…
rsmithlal Sep 2, 2025
19f4164
Refactor RichTextLinkCheckerQueueJob to use BetterTogether::Content::…
rsmithlal Sep 2, 2025
d48eddc
Refactor RichTextLink and related migrations to improve link handling…
rsmithlal Sep 3, 2025
148885f
Add link checker report translations to English, Spanish, and French …
rsmithlal Sep 3, 2025
1e20be2
Improve error handling assertions in HttpLinkChecker spec
rsmithlal Sep 3, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,8 @@ group :test do
# Capybara for integration testing
gem 'capybara', '>= 2.15'
gem 'capybara-screenshot'
# WebMock for stubbing external HTTP requests in specs
gem 'webmock'
# Coveralls for test coverage reporting
gem 'coveralls_reborn', require: false
# Database cleaner for test database cleaning
Expand Down
9 changes: 9 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,9 @@ GEM
term-ansicolor (~> 1.7)
thor (~> 1.2)
tins (~> 1.32)
crack (1.0.0)
bigdecimal
rexml
crass (1.0.6)
css_parser (1.21.1)
addressable
Expand Down Expand Up @@ -362,6 +365,7 @@ GEM
rake (>= 13)
groupdate (6.7.0)
activesupport (>= 7.1)
hashdiff (1.2.0)
hashie (5.0.0)
highline (3.1.2)
reline
Expand Down Expand Up @@ -801,6 +805,10 @@ GEM
activemodel (>= 6.0.0)
bindex (>= 0.4.0)
railties (>= 6.0.0)
webmock (3.25.1)
addressable (>= 2.8.0)
crack (>= 0.3.2)
hashdiff (>= 0.4.0, < 2.0.0)
websocket (1.2.11)
websocket-driver (0.8.0)
base64
Expand Down Expand Up @@ -873,6 +881,7 @@ DEPENDENCIES
storext!
uglifier (>= 1.3.0)
web-console (>= 3.3.0)
webmock

RUBY VERSION
ruby 3.4.4p34
Expand Down
31 changes: 31 additions & 0 deletions app/jobs/better_together/metrics/external_link_checker_job.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# frozen_string_literal: true

require 'net/http'
require 'uri'

module BetterTogether
module Metrics
class ExternalLinkCheckerJob < ApplicationJob
queue_as :default

def perform(link_id)
link = BetterTogether::Content::Link.find(link_id)
uri = URI.parse(link.url)
response = http_head(uri)

link.update!(last_checked_at: Time.current, latest_status_code: response.code.to_s, valid_link: response.is_a?(Net::HTTPSuccess))
rescue StandardError => e
link.update!(last_checked_at: Time.current, latest_status_code: nil, valid_link: false, error_message: e.message)
end

private

def http_head(uri)
Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == 'https', open_timeout: 5, read_timeout: 5) do |http|
request = Net::HTTP::Head.new(uri.request_uri)
http.request(request)
end
end
end
end
end
31 changes: 31 additions & 0 deletions app/jobs/better_together/metrics/internal_link_checker_job.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# frozen_string_literal: true

require 'net/http'
require 'uri'

module BetterTogether
module Metrics
class InternalLinkCheckerJob < ApplicationJob
queue_as :default

def perform(link_id)
link = BetterTogether::Content::Link.find(link_id)
uri = URI.parse(link.url)
response = http_head(uri)

link.update!(last_checked_at: Time.current, latest_status_code: response.code.to_s, valid_link: response.is_a?(Net::HTTPSuccess))
rescue StandardError => e
link.update!(last_checked_at: Time.current, latest_status_code: nil, valid_link: false, error_message: e.message)
end

private

def http_head(uri)
Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == 'https', open_timeout: 5, read_timeout: 5) do |http|
request = Net::HTTP::Head.new(uri.request_uri)
http.request(request)
end
end
end
end
end
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# frozen_string_literal: true

module BetterTogether
module Metrics
# Queues jobs that check external links found inside ActionText rich content.
# Subclasses of RichTextLinkCheckerQueueJob should implement the specifics
# for how individual link check jobs are performed.
class RichTextExternalLinkCheckerQueueJob < RichTextLinkCheckerQueueJob
protected

def model_collection
super.where(link_type: 'external')
end

def child_job_class
BetterTogether::Metrics::ExternalLinkCheckerJob
end
end
end
end
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# frozen_string_literal: true

module BetterTogether
module Metrics
# Queues jobs that check internal links found inside ActionText rich content.
# This job narrows the collection to internal links and may delay processing
# to reduce immediate load on the application.
class RichTextInternalLinkCheckerQueueJob < RichTextLinkCheckerQueueJob
protected

def model_collection
super.where(link_type: 'internal')
end

def queue_delay
5.minutes
end

def child_job_class
BetterTogether::Metrics::InternalLinkCheckerJob
end
end
end
end
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# frozen_string_literal: true

module BetterTogether
module Metrics
# Base queueing job that distributes RichText link check work across hosts.
# It groups RichText links by host and schedules child jobs with delays to
# avoid overloading external hosts or the application.
class RichTextLinkCheckerQueueJob < MetricsJob
def perform
records_size = model_collection.size
return if records_size.zero?

# Define the total time window for each host (e.g., 1 hour in seconds)
time_window = 3600

records_by_host.each do |host, link_count|
next if link_count.zero?

delay_between_requests = time_window / link_count.to_f
queue_jobs_for_host(host, delay_between_requests)
end
end

def records_by_host
model_collection.group(:host)
.order('count_all DESC')
.count
end

protected

def model_class
BetterTogether::Metrics::RichTextLink
end

def model_collection
model_class.where(valid_link: true)
.where(last_checked_at: [nil, last_checked_lt..])
end

def queue_jobs_for_host(host, delay_between_requests)
links_for_host = model_collection.where(host: host)
links_for_host.each_with_index do |link, index|
schedule_time = Time.current + (delay_between_requests * index).seconds
child_job_class.set(wait_until: schedule_time).perform_later(link.id)
end
end

def child_job_class
# Define this in subclasses (e.g., InternalLinkCheckerJob, ExternalLinkCheckerJob)
raise NotImplementedError, 'Subclasses must implement `child_job_class`'
end

def last_checked_lt
Time.current - last_checked_threshold
end

def last_checked_threshold
14.days
end
end
end
end
14 changes: 14 additions & 0 deletions app/models/better_together/content/link.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# frozen_string_literal: true

module BetterTogether
module Content
# Represents a persisted link discovered in rich content. Stores metadata
# about the link (host, scheme, validity) and associates to RichText
# metrics records.
class Link < ApplicationRecord
has_many :rich_text_links, class_name: 'BetterTogether::Metrics::RichTextLink', inverse_of: :link
has_many :rich_texts, through: :rich_text_links
has_many :rich_text_records, through: :rich_text_links
end
end
end
11 changes: 11 additions & 0 deletions app/models/better_together/links.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# frozen_string_literal: true

module BetterTogether
# Namespace helper for links-related tables. Ensures a consistent
# table name prefix for models placed under BetterTogether::Links.
module Links
def self.table_name_prefix
'better_together_links_'
end
end
end
15 changes: 15 additions & 0 deletions app/models/better_together/metrics/rich_text_link.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# frozen_string_literal: true

module BetterTogether
module Metrics
# Tracks occurrences of links found inside ActionText rich content and
# associates them with the original Link record and owning rich text.
class RichTextLink < ApplicationRecord
belongs_to :link, class_name: 'BetterTogether::Content::Link'
belongs_to :rich_text, class_name: 'ActionText::RichText'
belongs_to :rich_text_record, polymorphic: true

accepts_nested_attributes_for :link, reject_if: ->(attributes) { attributes['url'].blank? }, allow_destroy: false
end
end
end
101 changes: 101 additions & 0 deletions app/services/better_together/metrics/rich_text_link_identifier.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
# frozen_string_literal: true

module BetterTogether
module Metrics
# Service to scan ActionText::RichText records, extract links, and persist
# both the link metadata (BetterTogether::Content::Link) and the join
# records (BetterTogether::Metrics::RichTextLink).
#
# Usage:
# BetterTogether::Metrics::RichTextLinkIdentifier.call
class RichTextLinkIdentifier
def self.call(rich_texts: nil)
new(rich_texts: rich_texts).call
end

def initialize(rich_texts: nil)
@rich_texts = rich_texts
end

def call
texts = rich_texts || ActionText::RichText.includes(:record).where.not(body: nil)
valid_count = 0
invalid_count = 0

texts.find_each do |rt|
links = extract_links(rt)
next if links.empty?

links.each_with_index do |link, index|
uri = parse_uri(link)
if uri.nil? || (uri.host.nil? && uri.scheme.nil?)
create_invalid(rt, index, link, 'undetermined')
invalid_count += 1
next
end

# Create or find the canonical Link record
bt_link = BetterTogether::Content::Link.find_or_initialize_by(url: link)
bt_link.host ||= uri.host
bt_link.scheme ||= uri.scheme
bt_link.external = (uri.host.present? && (rt_platform_host != uri.host))
bt_link.save! if bt_link.changed?

# Create or update the rich text link join record
attrs = {
link_id: bt_link.id,
rich_text_id: rt.id,
rich_text_record_id: rt.record_id,
rich_text_record_type: rt.record_type,
position: index,
locale: rt.locale
}

BetterTogether::Metrics::RichTextLink.find_or_create_by!(attrs)
valid_count += 1
rescue URI::InvalidURIError
create_invalid(rt, index, link, 'invalid_uri')
invalid_count += 1
end
end

{ valid: valid_count, invalid: invalid_count }
end

private

attr_reader :rich_texts

def extract_links(rt)
# ActionText stores HTML; use the body helper to extract hrefs
rt.body.links.uniq
rescue StandardError
[]
end

def parse_uri(link)
URI.parse(link)
end

def create_invalid(rt, index, link, invalid_type)
BetterTogether::Metrics::RichTextLink.create!(
rich_text_id: rt.id,
rich_text_record_id: rt.record_id,
rich_text_record_type: rt.record_type,
position: index,
locale: rt.locale,
link: BetterTogether::Content::Link.create!(url: link, valid_link: false, error_message: invalid_type)
)
end

def rt_platform_host
@rt_platform_host ||= begin
host_platform = BetterTogether::Platform.host.first
URI(host_platform.url).host
rescue StandardError
nil
end
end
end
end
end
21 changes: 21 additions & 0 deletions db/migrate/20241124181740_create_better_together_content_links.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# frozen_string_literal: true

# Migration to create the persistent links table used by the
# BetterTogether rich text link metrics system.
class CreateBetterTogetherContentLinks < ActiveRecord::Migration[7.1]
# rubocop:disable Metrics/MethodLength
def change
create_bt_table :links, prefix: :better_together_content do |t|
t.string :link_type, null: false, index: true
t.string :url, null: false, index: true
t.string :scheme
t.string :host, index: true
t.boolean :external, index: true
t.boolean :valid_link, index: true
t.datetime :last_checked_at, index: true
t.string :latest_status_code, index: true
t.text :error_message
end
end
# rubocop:enable Metrics/MethodLength
end
Loading
Loading