From b27c027b1626df20898f446ba96c143fd3aead49 Mon Sep 17 00:00:00 2001 From: Robert Smith Date: Wed, 13 Aug 2025 08:38:36 -0230 Subject: [PATCH] Test sitemap includes only public pages --- Gemfile | 3 ++ Gemfile.lock | 4 ++ .../better_together/sitemaps_controller.rb | 15 ++++++++ .../better_together/sitemap_refresh_job.rb | 16 ++++++++ app/models/better_together/page.rb | 10 +++++ app/models/better_together/platform.rb | 2 + app/models/better_together/sitemap.rb | 16 ++++++++ .../better_together/application.html.erb | 1 + better_together.gemspec | 1 + config/routes.rb | 4 +- config/sitemap.rb | 34 +++++++++++++++++ ...1120000_create_better_together_sitemaps.rb | 11 ++++++ lib/tasks/sitemap.rake | 29 ++++++++++++++ spec/dummy/db/schema.rb | 11 +++++- .../sitemap_refresh_job_spec.rb | 30 +++++++++++++++ .../requests/better_together/sitemaps_spec.rb | 38 +++++++++++++++++++ 16 files changed, 223 insertions(+), 2 deletions(-) create mode 100644 app/controllers/better_together/sitemaps_controller.rb create mode 100644 app/jobs/better_together/sitemap_refresh_job.rb create mode 100644 app/models/better_together/sitemap.rb create mode 100644 config/sitemap.rb create mode 100644 db/migrate/20250821120000_create_better_together_sitemaps.rb create mode 100644 lib/tasks/sitemap.rake create mode 100644 spec/jobs/better_together/sitemap_refresh_job_spec.rb create mode 100644 spec/requests/better_together/sitemaps_spec.rb diff --git a/Gemfile b/Gemfile index 6e9d2d967..408f8a74b 100644 --- a/Gemfile +++ b/Gemfile @@ -42,6 +42,9 @@ gem 'sentry-rails' gem 'sentry-ruby' gem 'stackprof' +# Sitemap generation +gem 'sitemap_generator' + # Storext for easier json attributes, custom fork for Better Together gem 'storext', github: 'better-together-org/storext' diff --git a/Gemfile.lock b/Gemfile.lock index 82c78c188..215367637 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -58,6 +58,7 @@ PATH rswag (>= 2.3.1, < 2.17.0) ruby-openai simple_calendar + sitemap_generator sprockets-rails stackprof stimulus-rails (~> 1.3) @@ -733,6 +734,8 @@ GEM logger (>= 1.6.2) rack (>= 3.1.0) redis-client (>= 0.23.2) + sitemap_generator (6.3.0) + builder (~> 3.0) simple_calendar (3.1.0) rails (>= 6.1) simplecov (0.22.0) @@ -864,6 +867,7 @@ DEPENDENCIES shoulda-callback-matchers shoulda-matchers sidekiq (~> 8.0.7) + sitemap_generator simplecov spring spring-watcher-listen (~> 2.1.0) diff --git a/app/controllers/better_together/sitemaps_controller.rb b/app/controllers/better_together/sitemaps_controller.rb new file mode 100644 index 000000000..2a0a98eba --- /dev/null +++ b/app/controllers/better_together/sitemaps_controller.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +module BetterTogether + # Serves the generated sitemap stored in Active Storage + class SitemapsController < ApplicationController + def show + sitemap = Sitemap.current(helpers.host_platform) + if sitemap.file.attached? + redirect_to sitemap.file.url, allow_other_host: true + else + head :not_found + end + end + end +end diff --git a/app/jobs/better_together/sitemap_refresh_job.rb b/app/jobs/better_together/sitemap_refresh_job.rb new file mode 100644 index 000000000..46f195214 --- /dev/null +++ b/app/jobs/better_together/sitemap_refresh_job.rb @@ -0,0 +1,16 @@ +# frozen_string_literal: true + +require 'rake' + +module BetterTogether + # Generates the sitemap in a background job so newly published pages are included + class SitemapRefreshJob < ApplicationJob + queue_as :default + + def perform + Rails.application.load_tasks unless Rake::Task.task_defined?('sitemap:refresh') + Rake::Task['sitemap:refresh'].invoke + Rake::Task['sitemap:refresh'].reenable + end + end +end diff --git a/app/models/better_together/page.rb b/app/models/better_together/page.rb index d67b2f681..3a52f8c99 100644 --- a/app/models/better_together/page.rb +++ b/app/models/better_together/page.rb @@ -48,6 +48,8 @@ class Page < ApplicationRecord scope :published, -> { where.not(published_at: nil).where('published_at <= ?', Time.zone.now) } scope :by_publication_date, -> { order(published_at: :desc) } + after_commit :refresh_sitemap, on: %i[create update destroy] + def hero_block @hero_block ||= blocks.where(type: 'BetterTogether::Content::Hero').with_attached_background_image_file.with_translations.first end @@ -96,5 +98,13 @@ def to_s def url "#{::BetterTogether.base_url_with_locale}/#{slug}" end + + private + + def refresh_sitemap + return if Rails.env.test? + + SitemapRefreshJob.perform_later + end end end diff --git a/app/models/better_together/platform.rb b/app/models/better_together/platform.rb index af449e47d..0236b478b 100644 --- a/app/models/better_together/platform.rb +++ b/app/models/better_together/platform.rb @@ -35,6 +35,8 @@ class Platform < ApplicationRecord has_one_attached :profile_image has_one_attached :cover_image + has_one :sitemap, class_name: '::BetterTogether::Sitemap', dependent: :destroy + has_many :platform_blocks, dependent: :destroy, class_name: 'BetterTogether::Content::PlatformBlock' has_many :blocks, through: :platform_blocks diff --git a/app/models/better_together/sitemap.rb b/app/models/better_together/sitemap.rb new file mode 100644 index 000000000..0a10dd2c5 --- /dev/null +++ b/app/models/better_together/sitemap.rb @@ -0,0 +1,16 @@ +# frozen_string_literal: true + +module BetterTogether + # Stores the generated sitemap in Active Storage for serving via S3 + class Sitemap < ApplicationRecord + belongs_to :platform + + has_one_attached :file + + validates :platform_id, uniqueness: true + + def self.current(platform) + find_or_create_by!(platform: platform) + end + end +end diff --git a/app/views/layouts/better_together/application.html.erb b/app/views/layouts/better_together/application.html.erb index 8e9bb045e..dc4f93763 100644 --- a/app/views/layouts/better_together/application.html.erb +++ b/app/views/layouts/better_together/application.html.erb @@ -15,6 +15,7 @@ <%= csrf_meta_tags %> <%= csp_meta_tag %> + diff --git a/better_together.gemspec b/better_together.gemspec index 4c688a404..60322ab9f 100644 --- a/better_together.gemspec +++ b/better_together.gemspec @@ -66,6 +66,7 @@ Gem::Specification.new do |spec| spec.add_dependency 'rswag', '>= 2.3.1', '< 2.17.0' spec.add_dependency 'ruby-openai' spec.add_dependency 'simple_calendar' + spec.add_dependency 'sitemap_generator' spec.add_dependency 'sprockets-rails' spec.add_dependency 'stackprof' spec.add_dependency 'stimulus-rails', '~> 1.3' diff --git a/config/routes.rb b/config/routes.rb index 9815b3f87..14ca35b26 100644 --- a/config/routes.rb +++ b/config/routes.rb @@ -2,7 +2,9 @@ require 'sidekiq/web' -BetterTogether::Engine.routes.draw do # rubocop:todo Metrics/BlockLength + BetterTogether::Engine.routes.draw do # rubocop:todo Metrics/BlockLength + get '/sitemap.xml.gz', to: 'sitemaps#show', as: :sitemap + scope ':locale', # rubocop:todo Metrics/BlockLength locale: /#{I18n.available_locales.join('|')}/ do # bt base path diff --git a/config/sitemap.rb b/config/sitemap.rb new file mode 100644 index 000000000..a6548b460 --- /dev/null +++ b/config/sitemap.rb @@ -0,0 +1,34 @@ +# frozen_string_literal: true + +SitemapGenerator::Sitemap.default_host = + "#{ENV.fetch('APP_PROTOCOL', 'http')}://#{ENV.fetch('APP_HOST', 'localhost:3000')}" + +helpers = BetterTogether::Engine.routes.url_helpers + +SitemapGenerator::Sitemap.create do + add helpers.home_page_path(locale: I18n.default_locale) + + add helpers.communities_path(locale: I18n.default_locale) + BetterTogether::Community.find_each do |community| + add helpers.community_path(community, locale: I18n.default_locale), lastmod: community.updated_at + end + + add helpers.conversations_path(locale: I18n.default_locale) + BetterTogether::Conversation.find_each do |conversation| + add helpers.conversation_path(conversation, locale: I18n.default_locale), lastmod: conversation.updated_at + end + + add helpers.posts_path(locale: I18n.default_locale) + BetterTogether::Post.published.find_each do |post| + add helpers.post_path(post, locale: I18n.default_locale), lastmod: post.updated_at + end + + add helpers.events_path(locale: I18n.default_locale) + BetterTogether::Event.find_each do |event| + add helpers.event_path(event, locale: I18n.default_locale), lastmod: event.updated_at + end + + BetterTogether::Page.published.privacy_public.find_each do |page| + add helpers.render_page_path(path: page.slug, locale: I18n.default_locale), lastmod: page.updated_at + end +end diff --git a/db/migrate/20250821120000_create_better_together_sitemaps.rb b/db/migrate/20250821120000_create_better_together_sitemaps.rb new file mode 100644 index 000000000..bc861234a --- /dev/null +++ b/db/migrate/20250821120000_create_better_together_sitemaps.rb @@ -0,0 +1,11 @@ +# frozen_string_literal: true + +class CreateBetterTogetherSitemaps < ActiveRecord::Migration[7.1] + def change + create_bt_table :sitemaps do |t| + t.bt_references :platform, + null: false, + index: { unique: true, name: 'unique_sitemaps_platform' } + end + end +end diff --git a/lib/tasks/sitemap.rake b/lib/tasks/sitemap.rake new file mode 100644 index 000000000..c17d13b73 --- /dev/null +++ b/lib/tasks/sitemap.rake @@ -0,0 +1,29 @@ +# frozen_string_literal: true + +namespace :sitemap do + desc 'Generate sitemap and upload to Active Storage' + task refresh: :environment do + require 'sitemap_generator' + + SitemapGenerator::Sitemap.public_path = Rails.root.join('tmp') + SitemapGenerator::Sitemap.sitemaps_path = '' + + load Rails.root.join('config/sitemap.rb') + + file_path = Rails.root.join('tmp', 'sitemap.xml.gz') + platform = BetterTogether::Platform.find_by!(host: true) + BetterTogether::Sitemap.current(platform).file.attach( + io: File.open(file_path), + filename: 'sitemap.xml.gz', + content_type: 'application/gzip' + ) + end +end + +begin + Rake::Task['assets:precompile'].enhance do + Rake::Task['sitemap:refresh'].invoke + end +rescue RuntimeError + # assets:precompile may not be defined in some environments +end diff --git a/spec/dummy/db/schema.rb b/spec/dummy/db/schema.rb index fd3e16719..6a22fc671 100644 --- a/spec/dummy/db/schema.rb +++ b/spec/dummy/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[7.1].define(version: 2025_08_12_143319) do +ActiveRecord::Schema[7.1].define(version: 2025_08_21_120000) do # These are extensions that must be enabled in order to support this database enable_extension "pgcrypto" enable_extension "plpgsql" @@ -986,6 +986,14 @@ t.index ["reporter_id"], name: "index_better_together_reports_on_reporter_id" end + create_table "better_together_sitemaps", id: :uuid, default: -> { "gen_random_uuid()" }, force: :cascade do |t| + t.integer "lock_version", default: 0, null: false + t.datetime "created_at", null: false + t.datetime "updated_at", null: false + t.uuid "platform_id", null: false + t.index ["platform_id"], name: "unique_sitemaps_platform", unique: true + end + create_table "better_together_resource_permissions", id: :uuid, default: -> { "gen_random_uuid()" }, force: :cascade do |t| t.integer "lock_version", default: 0, null: false t.datetime "created_at", null: false @@ -1290,6 +1298,7 @@ add_foreign_key "better_together_platforms", "better_together_communities", column: "community_id" add_foreign_key "better_together_posts", "better_together_people", column: "creator_id" add_foreign_key "better_together_reports", "better_together_people", column: "reporter_id" + add_foreign_key "better_together_sitemaps", "better_together_platforms", column: "platform_id" add_foreign_key "better_together_role_resource_permissions", "better_together_resource_permissions", column: "resource_permission_id" add_foreign_key "better_together_role_resource_permissions", "better_together_roles", column: "role_id" add_foreign_key "better_together_social_media_accounts", "better_together_contact_details", column: "contact_detail_id" diff --git a/spec/jobs/better_together/sitemap_refresh_job_spec.rb b/spec/jobs/better_together/sitemap_refresh_job_spec.rb new file mode 100644 index 000000000..e58202d98 --- /dev/null +++ b/spec/jobs/better_together/sitemap_refresh_job_spec.rb @@ -0,0 +1,30 @@ +# frozen_string_literal: true + +require 'rails_helper' +require 'zlib' + +RSpec.describe BetterTogether::SitemapRefreshJob, type: :job do + it 'generates and attaches a sitemap' do + host_platform = create(:platform, :host) + BetterTogether::Sitemap.destroy_all + + described_class.new.perform + + expect(BetterTogether::Sitemap.current(host_platform).file).to be_attached + end + + it 'includes only public pages in the sitemap' do + host_platform = create(:platform, :host) + public_page = create(:page, privacy: 'public', slug: 'public-page') + private_page = create(:page, privacy: 'private', slug: 'private-page') + BetterTogether::Sitemap.destroy_all + + described_class.perform_now + + data = BetterTogether::Sitemap.current(host_platform).file.download + xml = Zlib::GzipReader.new(StringIO.new(data)).read + + expect(xml).to include(public_page.slug) + expect(xml).not_to include(private_page.slug) + end +end diff --git a/spec/requests/better_together/sitemaps_spec.rb b/spec/requests/better_together/sitemaps_spec.rb new file mode 100644 index 000000000..fe4401da2 --- /dev/null +++ b/spec/requests/better_together/sitemaps_spec.rb @@ -0,0 +1,38 @@ +# frozen_string_literal: true + +require 'rails_helper' + +RSpec.describe 'Sitemap', type: :request do + include BetterTogether::Engine.routes.url_helpers + include BetterTogether::DeviseSessionHelpers + + let!(:host_platform) { configure_host_platform } + + before do + host! 'www.example.com' + Rails.application.routes.default_url_options[:host] = 'www.example.com' + end + + describe 'GET /sitemap.xml.gz' do + context 'when a sitemap is attached' do + it 'redirects to the file' do + sitemap = BetterTogether::Sitemap.current(host_platform) + sitemap.file.attach(io: StringIO.new('test'), filename: 'sitemap.xml.gz', content_type: 'application/gzip') + + get sitemap_path + + expect(response).to redirect_to(sitemap.file.url) + end + end + + context 'when no sitemap exists' do + it 'returns not found' do + BetterTogether::Sitemap.current(host_platform).file.detach + + get sitemap_path + + expect(response).to have_http_status(:not_found) + end + end + end +end