Skip to content

Commit fa517ae

Browse files
committed
replace local code with code extracted to BotChallengePage gem
For turnstile protection of some things
1 parent befecad commit fa517ae

File tree

13 files changed

+92
-395
lines changed

13 files changed

+92
-395
lines changed

Gemfile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,3 +268,5 @@ end
268268
gem "barnes"
269269

270270
gem 'equivalent-xml'
271+
272+
gem "bot_challenge_page"

Gemfile.lock

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,10 @@ GEM
158158
bootstrap4-kaminari-views (1.0.1)
159159
kaminari (>= 0.13)
160160
rails (>= 3.1)
161+
bot_challenge_page (0.2.0)
162+
http (~> 5.2)
163+
rack-attack (~> 6.7)
164+
rails (>= 7.1, < 8.1)
161165
browse-everything (1.5.0)
162166
addressable (~> 2.5)
163167
aws-sdk-s3
@@ -801,6 +805,7 @@ DEPENDENCIES
801805
blacklight_range_limit (~> 9.0.0)
802806
bootsnap (>= 1.4.4)
803807
bootstrap4-kaminari-views
808+
bot_challenge_page
804809
browse-everything (~> 1.5)
805810
browser (~> 6.0)
806811
capybara (>= 2.15)

app/controllers/application_controller.rb

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,10 @@
11
class ApplicationController < ActionController::Base
2+
# This will only protect CONFIGURED routes, but also could be put on just certain
3+
# controllers, it does not need to be in ApplicationController
4+
before_action do |controller|
5+
BotChallengePage::BotChallengePageController.bot_challenge_enforce_filter(controller)
6+
end
7+
28
# Blacklight tried to add some things to ApplicationController, but
39
# we pretty much only want to use CatalogController from Blacklight, so
410
# are trying just doing these things there instead
@@ -23,8 +29,6 @@ class ApplicationController < ActionController::Base
2329

2430
around_action :batch_kithe_indexable
2531

26-
before_action { |controller| BotDetectController.bot_detection_enforce_filter(controller) }
27-
2832
def batch_kithe_indexable
2933
Kithe::Indexable.index_with(batching: true) do
3034
yield

app/controllers/bot_detect_controller.rb

Lines changed: 0 additions & 221 deletions
This file was deleted.

app/controllers/downloads_controller.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ class DownloadsController < ApplicationController
4040

4141
# protect originals only from bots with bot challenge redirect, no allowed pre-challenge
4242
# rate limit.
43-
before_action(only: :original) { |controller| BotDetectController.bot_detection_enforce_filter(controller, immediate: true) }
43+
before_action(only: :original) { |controller| BotChallengePage::BotChallengePageController.bot_challenge_enforce_filter(controller, immediate: true) }
4444

4545
#GET /downloads/:asset_id
4646
def original

config/application.rb

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,8 @@ class Application < Rails::Application
7070

7171
# code to be executed when launching `rails console`
7272
# https://github.com/rails/rails/blob/cf27cfa18bc3742cfaf732da5a839521d6662785/railties/lib/rails/railtie.rb#L143
73+
74+
require 'rack/attack'
7375
console do
7476
# Disable honeybadger reporting in conosle. Avoid those annoying SIGHUP errors
7577
# reported for timed out console you left running.
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
# Some explanation at https://sciencehistory.atlassian.net/wiki/spaces/HDC/pages/2645098498/Cloudflare+Turnstile+bot+detection
2+
Rails.application.config.to_prepare do
3+
config = BotChallengePage::BotChallengePageController.bot_challenge_config
4+
5+
# allow rate_limit_count requests in rate_limit_period, before issuing challenge
6+
config.rate_limit_period = 12.hour
7+
config.rate_limit_count = 2 # seriously reduced to see if that helps
8+
9+
# How long a challenge pass is good for
10+
config.session_passed_good_for = 24.hours
11+
12+
config.enabled = ScihistDigicoll::Env.lookup(:cf_turnstile_enabled)
13+
config.cf_turnstile_sitekey = ScihistDigicoll::Env.lookup(:cf_turnstile_sitekey)
14+
config.cf_turnstile_secret_key = ScihistDigicoll::Env.lookup(:cf_turnstile_secret_key)
15+
16+
# any custom collection controllers or other controllers that offer search have to be listed here
17+
# to rate-limit them!
18+
config.rate_limited_locations = [
19+
'/catalog',
20+
'/focus',
21+
# we want to omit `/collections` list page, so we do these by controller
22+
{ controller: "collection_show" },
23+
{ controller: "collection_show_controllers/immigrants_and_innovation_collection" },
24+
{ controller: "collection_show_controllers/oral_history_collection"},
25+
{ controller: "collection_show_controllers/bredig_collection"}
26+
]
27+
28+
config.allow_exempt = ->(controller, config) {
29+
# Excempt any Catalog #facet action that looks like an ajax/fetch request, the redirect
30+
# ain't gonna work there, we just exempt it.
31+
#
32+
# sec-fetch-dest is set to 'empty' by browser on fetch requests, to limit us further;
33+
# sure an attacker could fake it, we don't mind if someone determined can avoid rate-limiting on this one action
34+
( controller.params[:action] == "facet" &&
35+
controller.request.headers["sec-fetch-dest"] == "empty" &&
36+
controller.kind_of?(CatalogController)
37+
) ||
38+
# Exempt honeybadger token from uptime checker
39+
# https://docs.honeybadger.io/guides/security/
40+
(
41+
ENV['HONEYBADGER_TOKEN'].present? &&
42+
controller.request.headers['Honeybadger-Token'] == ENV['HONEYBADGER_TOKEN']
43+
) ||
44+
# Exempt a collection controller (or sub-class!) with _no query params_, we want to
45+
# let Google and other bots into colleciton home pages, even though they show search results.
46+
(
47+
controller.kind_of?(CollectionShowController) &&
48+
controller.respond_to?(:has_search_parameters?) &&
49+
!controller.has_search_parameters?
50+
) ||
51+
## exempt PDF original downloads, which are protected with an 'immediate' filter
52+
(
53+
controller.kind_of?(DownloadsController) &&
54+
controller.params[:file_category] == "pdf"
55+
)
56+
}
57+
58+
BotChallengePage::BotChallengePageController.rack_attack_init
59+
end

0 commit comments

Comments
 (0)