Skip to content

Commit 44c6d45

Browse files
committed
Add RichText link checker functionality and related specs
- Implement RichTextLinkIdentifier service to extract and persist links from ActionText::RichText records. - Create ExternalLinkCheckerJob and InternalLinkCheckerJob for checking link validity. - Update Rake tasks for identifying and checking links. - Add specs for the new jobs and the RichTextLinkIdentifier service. - Include WebMock for stubbing HTTP requests in tests. - Add diagrams and documentation for the link checking process. - Introduce WebMock gem for stubbing external HTTP requests in specs.
1 parent 9f436a5 commit 44c6d45

File tree

14 files changed

+291
-107
lines changed

14 files changed

+291
-107
lines changed

Gemfile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,8 @@ group :test do
9898
# Capybara for integration testing
9999
gem 'capybara', '>= 2.15'
100100
gem 'capybara-screenshot'
101+
# WebMock for stubbing external HTTP requests in specs
102+
gem 'webmock'
101103
# Coveralls for test coverage reporting
102104
gem 'coveralls_reborn', require: false
103105
# Database cleaner for test database cleaning

Gemfile.lock

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,9 @@ GEM
229229
term-ansicolor (~> 1.7)
230230
thor (~> 1.2)
231231
tins (~> 1.32)
232+
crack (1.0.0)
233+
bigdecimal
234+
rexml
232235
crass (1.0.6)
233236
css_parser (1.21.1)
234237
addressable
@@ -362,6 +365,7 @@ GEM
362365
rake (>= 13)
363366
groupdate (6.7.0)
364367
activesupport (>= 7.1)
368+
hashdiff (1.2.0)
365369
hashie (5.0.0)
366370
highline (3.1.2)
367371
reline
@@ -801,6 +805,10 @@ GEM
801805
activemodel (>= 6.0.0)
802806
bindex (>= 0.4.0)
803807
railties (>= 6.0.0)
808+
webmock (3.25.1)
809+
addressable (>= 2.8.0)
810+
crack (>= 0.3.2)
811+
hashdiff (>= 0.4.0, < 2.0.0)
804812
websocket (1.2.11)
805813
websocket-driver (0.8.0)
806814
base64
@@ -873,6 +881,7 @@ DEPENDENCIES
873881
storext!
874882
uglifier (>= 1.3.0)
875883
web-console (>= 3.3.0)
884+
webmock
876885

877886
RUBY VERSION
878887
ruby 3.4.4p34
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# frozen_string_literal: true
2+
3+
require 'net/http'
4+
require 'uri'
5+
6+
module BetterTogether
7+
module Metrics
8+
class ExternalLinkCheckerJob < ApplicationJob
9+
queue_as :default
10+
11+
def perform(link_id)
12+
link = BetterTogether::Content::Link.find(link_id)
13+
uri = URI.parse(link.url)
14+
response = http_head(uri)
15+
16+
link.update!(last_checked_at: Time.current, latest_status_code: response.code.to_s, valid_link: response.is_a?(Net::HTTPSuccess))
17+
rescue StandardError => e
18+
link.update!(last_checked_at: Time.current, latest_status_code: nil, valid_link: false, error_message: e.message)
19+
end
20+
21+
private
22+
23+
def http_head(uri)
24+
Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == 'https', open_timeout: 5, read_timeout: 5) do |http|
25+
request = Net::HTTP::Head.new(uri.request_uri)
26+
http.request(request)
27+
end
28+
end
29+
end
30+
end
31+
end
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# frozen_string_literal: true
2+
3+
require 'net/http'
4+
require 'uri'
5+
6+
module BetterTogether
7+
module Metrics
8+
class InternalLinkCheckerJob < ApplicationJob
9+
queue_as :default
10+
11+
def perform(link_id)
12+
link = BetterTogether::Content::Link.find(link_id)
13+
uri = URI.parse(link.url)
14+
response = http_head(uri)
15+
16+
link.update!(last_checked_at: Time.current, latest_status_code: response.code.to_s, valid_link: response.is_a?(Net::HTTPSuccess))
17+
rescue StandardError => e
18+
link.update!(last_checked_at: Time.current, latest_status_code: nil, valid_link: false, error_message: e.message)
19+
end
20+
21+
private
22+
23+
def http_head(uri)
24+
Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == 'https', open_timeout: 5, read_timeout: 5) do |http|
25+
request = Net::HTTP::Head.new(uri.request_uri)
26+
http.request(request)
27+
end
28+
end
29+
end
30+
end
31+
end

app/jobs/better_together/metrics/rich_text_external_link_checker_queue_job.rb

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,10 @@ class RichTextExternalLinkCheckerQueueJob < RichTextLinkCheckerQueueJob
1111
def model_collection
1212
super.where(link_type: 'external')
1313
end
14+
15+
def child_job_class
16+
BetterTogether::Metrics::ExternalLinkCheckerJob
17+
end
1418
end
1519
end
1620
end

app/jobs/better_together/metrics/rich_text_internal_link_checker_queue_job.rb

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,10 @@ def model_collection
1515
def queue_delay
1616
5.minutes
1717
end
18+
19+
def child_job_class
20+
BetterTogether::Metrics::InternalLinkCheckerJob
21+
end
1822
end
1923
end
2024
end
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
# frozen_string_literal: true
2+
3+
module BetterTogether
4+
module Metrics
5+
# Service to scan ActionText::RichText records, extract links, and persist
6+
# both the link metadata (BetterTogether::Content::Link) and the join
7+
# records (BetterTogether::Metrics::RichTextLink).
8+
#
9+
# Usage:
10+
# BetterTogether::Metrics::RichTextLinkIdentifier.call
11+
class RichTextLinkIdentifier
12+
def self.call(rich_texts: nil)
13+
new(rich_texts: rich_texts).call
14+
end
15+
16+
def initialize(rich_texts: nil)
17+
@rich_texts = rich_texts
18+
end
19+
20+
def call
21+
texts = rich_texts || ActionText::RichText.includes(:record).where.not(body: nil)
22+
valid_count = 0
23+
invalid_count = 0
24+
25+
texts.find_each do |rt|
26+
links = extract_links(rt)
27+
next if links.empty?
28+
29+
links.each_with_index do |link, index|
30+
uri = parse_uri(link)
31+
if uri.nil? || (uri.host.nil? && uri.scheme.nil?)
32+
create_invalid(rt, index, link, 'undetermined')
33+
invalid_count += 1
34+
next
35+
end
36+
37+
# Create or find the canonical Link record
38+
bt_link = BetterTogether::Content::Link.find_or_initialize_by(url: link)
39+
bt_link.host ||= uri.host
40+
bt_link.scheme ||= uri.scheme
41+
bt_link.external = (uri.host.present? && (rt_platform_host != uri.host))
42+
bt_link.save! if bt_link.changed?
43+
44+
# Create or update the rich text link join record
45+
attrs = {
46+
link_id: bt_link.id,
47+
rich_text_id: rt.id,
48+
rich_text_record_id: rt.record_id,
49+
rich_text_record_type: rt.record_type,
50+
position: index,
51+
locale: rt.locale
52+
}
53+
54+
BetterTogether::Metrics::RichTextLink.find_or_create_by!(attrs)
55+
valid_count += 1
56+
rescue URI::InvalidURIError
57+
create_invalid(rt, index, link, 'invalid_uri')
58+
invalid_count += 1
59+
end
60+
end
61+
62+
{ valid: valid_count, invalid: invalid_count }
63+
end
64+
65+
private
66+
67+
attr_reader :rich_texts
68+
69+
def extract_links(rt)
70+
# ActionText stores HTML; use the body helper to extract hrefs
71+
rt.body.links.uniq
72+
rescue StandardError
73+
[]
74+
end
75+
76+
def parse_uri(link)
77+
URI.parse(link)
78+
end
79+
80+
def create_invalid(rt, index, link, invalid_type)
81+
BetterTogether::Metrics::RichTextLink.create!(
82+
rich_text_id: rt.id,
83+
rich_text_record_id: rt.record_id,
84+
rich_text_record_type: rt.record_type,
85+
position: index,
86+
locale: rt.locale,
87+
link: BetterTogether::Content::Link.create!(url: link, valid_link: false, error_message: invalid_type)
88+
)
89+
end
90+
91+
def rt_platform_host
92+
@rt_platform_host ||= begin
93+
host_platform = BetterTogether::Platform.host.first
94+
URI(host_platform.url).host
95+
rescue StandardError
96+
nil
97+
end
98+
end
99+
end
100+
end
101+
end
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
%% Mermaid source: RichText Link Checker Flow
2+
flowchart TD
3+
A[ActionText::RichText records] --> B[RichTextLinkIdentifier]
4+
B --> C[BetterTogether::Content::Link]
5+
B --> D[BetterTogether::Metrics::RichTextLink]
6+
E[Rake: check task] --> F[RichTextLinkCheckerQueueJob (internal)]
7+
E --> G[RichTextLinkCheckerQueueJob (external)]
8+
F --> H[InternalLinkCheckerJob]
9+
G --> I[ExternalLinkCheckerJob]
10+
H --> C
11+
I --> C

docs/rich_text_link_checker.md

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# RichText Link Checker
2+
3+
This document describes the pipeline that identifies links in ActionText rich content and checks them.
4+
5+
Process overview:
6+
7+
1. Identify: `BetterTogether::Metrics::RichTextLinkIdentifier` scans `ActionText::RichText` records and extracts links.
8+
2. Persist: For each link, create or find a `BetterTogether::Content::Link` and a `BetterTogether::Metrics::RichTextLink` join record.
9+
3. Queue: `rich_text:links:check` Rake task enqueues two queue jobs: internal and external checker queues.
10+
4. Distribute: `RichTextLinkCheckerQueueJob` groups links by host and schedules child check jobs over a time window to avoid bursts.
11+
5. Check: Child jobs (`InternalLinkCheckerJob` and `ExternalLinkCheckerJob`) perform HTTP HEAD requests and update Link metadata.
12+
13+
Documentation files:
14+
- diagrams/source/rich_text_link_checker_flow.mmd (Mermaid source)
15+
- diagrams/exports/png/rich_text_link_checker_flow.png (export placeholder)
16+
17+
Running locally:
18+
19+
Use Docker wrapper for commands that need DB access (see repo README):
20+
21+
```
22+
bin/dc-run rails runner "BetterTogether::Metrics::RichTextLinkIdentifier.call"
23+
bin/dc-run rake better_together:qa:rich_text:links:identify
24+
bin/dc-run rake better_together:qa:rich_text:links:check
25+
```
26+
27+
Notes:
28+
- External HTTP checks are rate-limited by the queueing logic. Configure behavior in the queue job if needed.
29+
- Tests use WebMock to stub external HTTP calls.

lib/tasks/quality_assurance.rake

Lines changed: 5 additions & 107 deletions
Original file line numberDiff line numberDiff line change
@@ -8,117 +8,15 @@ namespace :better_together do
88
namespace :links do
99
desc 'Generates report of status of RichText links'
1010
task identify: :environment do
11-
require 'uri'
12-
13-
host_platform = BetterTogether::Platform.host.first
14-
platform_uri = URI(host_platform.url)
15-
16-
rich_texts = ActionText::RichText.includes(:record).where.not(body: nil)
17-
puts 'rich text count:', rich_texts.size
18-
19-
valid_rich_text_links = []
20-
invalid_rich_text_links = []
21-
22-
rich_texts.each do |rt|
23-
links = rt.body.links.uniq # Deduplicate links within the same rich text
24-
next unless links.any?
25-
26-
links.each_with_index do |link, index|
27-
uri = URI.parse(link)
28-
29-
internal_link = uri.host == platform_uri.host
30-
link_type = determine_link_type(uri, internal_link)
31-
32-
if uri.host.nil? && uri.scheme.nil?
33-
invalid_type = if uri.path
34-
'path'
35-
elsif link.include?('mailto')
36-
'email'
37-
elsif link.include?('tel')
38-
'phone'
39-
else
40-
'undetermined'
41-
end
42-
43-
invalid_rich_text_links << {
44-
rich_text_id: rt.id,
45-
rich_text_record_id: rt.record_id,
46-
rich_text_record_type: rt.record_type,
47-
locale: rt.locale,
48-
position: index, # Track the first position for clarity
49-
50-
link_attributes: {
51-
url: link,
52-
link_type: "invalid:#{invalid_type}",
53-
valid_link: false,
54-
error_message: 'No host or scheme. Needs review.'
55-
}
56-
}
57-
58-
next
59-
end
60-
61-
valid_rich_text_links << {
62-
rich_text_id: rt.id,
63-
rich_text_record_id: rt.record_id,
64-
rich_text_record_type: rt.record_type,
65-
locale: rt.locale,
66-
position: index, # Track the first position for clarity
67-
68-
link_attributes: {
69-
url: link,
70-
host: uri.host,
71-
link_type: link_type,
72-
valid_link: true,
73-
external: !internal_link
74-
}
75-
}
76-
rescue URI::InvalidURIError => e
77-
invalid_type = if link.include?('mailto')
78-
'email'
79-
elsif link.include?('tel')
80-
'phone'
81-
else
82-
'undetermined'
83-
end
84-
85-
invalid_rich_text_links << {
86-
rich_text_id: rt.id,
87-
rich_text_record_id: rt.record_id,
88-
rich_text_record_type: rt.record_type,
89-
locale: rt.locale,
90-
position: index, # Track the first position for clarity
91-
92-
link_attributes: {
93-
url: link,
94-
link_type: "invalid:#{invalid_type}",
95-
valid_link: false,
96-
error_message: e.message
97-
}
98-
}
99-
end
100-
end
101-
102-
# Upsert valid and invalid links
103-
if valid_rich_text_links.any?
104-
BetterTogether::Metrics::RichTextLink.upsert_all(valid_rich_text_links,
105-
unique_by: %i[rich_text_id position
106-
locale])
107-
end
108-
if invalid_rich_text_links.any?
109-
BetterTogether::Metrics::RichTextLink.upsert_all(invalid_rich_text_links,
110-
unique_by: %i[rich_text_id position
111-
locale])
112-
end
113-
114-
puts "Valid links processed: #{valid_rich_text_links.size}"
115-
puts "Invalid links processed: #{invalid_rich_text_links.size}"
11+
result = BetterTogether::Metrics::RichTextLinkIdentifier.call
12+
puts "Valid links processed: #{result[:valid]}"
13+
puts "Invalid links processed: #{result[:invalid]}"
11614
end
11715

11816
desc 'checks rich text links and returns their status code'
11917
task check: :environment do
120-
BetterTogether::Metrics::RichTextInternalLinkCheckerQueueJob.new
121-
BetterTogether::Metrics::RichTextExternalLinkCheckerQueueJob.new
18+
BetterTogether::Metrics::RichTextInternalLinkCheckerQueueJob.perform_later
19+
BetterTogether::Metrics::RichTextExternalLinkCheckerQueueJob.perform_later
12220
end
12321

12422
def determine_link_type(uri, internal_link)

0 commit comments

Comments
 (0)