Skip to content

Commit 2fc2624

Browse files
Merge pull request #140 from doctolib/TT-23249_increment_lock_timeout
Tt 23249 increment lock_timeout gradually to reduce failing migrations
2 parents be9e6f8 + 43f730a commit 2fc2624

File tree

4 files changed

+132
-36
lines changed

4 files changed

+132
-36
lines changed

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,10 @@ SafePgMigrations.config.safe_timeout = 5.seconds # Statement timeout used for al
329329

330330
SafePgMigrations.config.lock_timeout = nil # Lock timeout used for all DDL operations except from CREATE / DROP INDEX. If not set, safe_timeout will be used with a deduction of 1% to ensure that the lock timeout is raised in priority
331331

332+
SafePgMigrations.config.increase_lock_timeout_on_retry # Activate the lock timeout increase feature on retry if set to true. See max_lock_timeout_for_retry for more information.
333+
334+
SafePgMigrations.config.max_lock_timeout_for_retry = 1.second # Max lock timeout for the retries for all DDL operations except from CREATE / DROP INDEX. Each retry will increase the lock_timeout (if increase_lock_timeout_on_retry option is set to true) by (max_lock_timeout_for_retry - lock_timeout) / max_tries
335+
332336
SafePgMigrations.config.blocking_activity_logger_verbose = true # Outputs the raw blocking queries on timeout. When false, outputs information about the lock instead
333337

334338
SafePgMigrations.config.sensitive_logger = nil # When given, sensitive data will be sent to this logger instead of the standard output. Must implement method `info`.

lib/safe-pg-migrations/configuration.rb

Lines changed: 31 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -5,50 +5,66 @@
55
module SafePgMigrations
66
class Configuration
77
attr_accessor(*%i[
8+
backfill_batch_size
9+
backfill_pause
810
blocking_activity_logger_margin
911
blocking_activity_logger_verbose
1012
default_value_backfill_threshold
11-
backfill_batch_size
12-
backfill_pause
13-
retry_delay
13+
increase_lock_timeout_on_retry
1414
max_tries
15+
retry_delay
1516
sensitive_logger
1617
])
17-
attr_reader :lock_timeout, :safe_timeout
18+
attr_reader :lock_timeout, :safe_timeout, :max_lock_timeout_for_retry
1819

1920
def initialize
20-
self.default_value_backfill_threshold = nil
21-
self.safe_timeout = 5.seconds
22-
self.lock_timeout = nil
23-
self.blocking_activity_logger_margin = 1.second
24-
self.blocking_activity_logger_verbose = true
2521
self.backfill_batch_size = 100_000
2622
self.backfill_pause = 0.5.second
27-
self.retry_delay = 1.minute
23+
self.blocking_activity_logger_margin = 1.second
24+
self.blocking_activity_logger_verbose = true
25+
self.default_value_backfill_threshold = nil
26+
self.increase_lock_timeout_on_retry = false
27+
self.lock_timeout = nil
28+
self.max_lock_timeout_for_retry = 1.second
2829
self.max_tries = 5
30+
self.retry_delay = 1.minute
31+
self.safe_timeout = 5.seconds
2932
self.sensitive_logger = nil
3033
end
3134

3235
def lock_timeout=(value)
3336
raise 'Setting lock timeout to 0 disables the lock timeout and is dangerous' if value == 0.seconds
3437

35-
unless value.nil? || value < safe_timeout
36-
raise ArgumentError, "Lock timeout (#{value}) cannot be greater than safe timeout (#{safe_timeout})"
38+
unless value.nil? || (value < safe_timeout && value <= max_lock_timeout_for_retry)
39+
raise ArgumentError, "Lock timeout (#{value}) cannot be greater than the safe timeout (#{safe_timeout}) or the
40+
max lock timeout for retry (#{max_lock_timeout_for_retry})"
3741
end
3842

3943
@lock_timeout = value
4044
end
4145

4246
def safe_timeout=(value)
43-
raise 'Setting safe timeout to 0 disables the safe timeout and is dangerous' unless value
47+
unless value && value > 0.seconds
48+
raise 'Setting safe timeout to 0 or nil disables the safe timeout and is dangerous'
49+
end
4450

45-
unless lock_timeout.nil? || value > lock_timeout
46-
raise ArgumentError, "Safe timeout (#{value}) cannot be less than lock timeout (#{lock_timeout})"
51+
unless lock_timeout.nil? || (value > lock_timeout && value >= max_lock_timeout_for_retry)
52+
raise ArgumentError, "Safe timeout (#{value}) cannot be lower than the lock timeout (#{lock_timeout}) or the
53+
max lock timeout for retry (#{max_lock_timeout_for_retry})"
4754
end
4855

4956
@safe_timeout = value
5057
end
5158

59+
def max_lock_timeout_for_retry=(value)
60+
unless lock_timeout.nil? || (value >= lock_timeout && value <= safe_timeout)
61+
raise ArgumentError, "Max lock timeout for retry (#{value}) cannot be lower than the lock timeout
62+
(#{lock_timeout}) and greater than the safe timeout (#{safe_timeout})"
63+
end
64+
65+
@max_lock_timeout_for_retry = value
66+
end
67+
5268
def pg_statement_timeout
5369
pg_duration safe_timeout
5470
end

lib/safe-pg-migrations/plugins/statement_retrier.rb

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,20 +13,46 @@ module StatementRetrier
1313
private
1414

1515
def retry_if_lock_timeout
16-
remaining_tries = SafePgMigrations.config.max_tries
16+
initial_lock_timeout = SafePgMigrations.config.lock_timeout
17+
number_of_retries = 0
1718
begin
18-
remaining_tries -= 1
19+
number_of_retries += 1
1920
yield
2021
rescue ActiveRecord::LockWaitTimeout
21-
raise if transaction_open? # Retrying is useless if we're inside a transaction.
22-
raise unless remaining_tries > 0
22+
# Retrying is useless if we're inside a transaction.
23+
if transaction_open? || number_of_retries >= SafePgMigrations.config.max_tries
24+
SafePgMigrations.config.lock_timeout = initial_lock_timeout
25+
raise
26+
end
2327

2428
retry_delay = SafePgMigrations.config.retry_delay
2529
Helpers::Logger.say "Retrying in #{retry_delay} seconds...", sub_item: true
30+
31+
if SafePgMigrations.config.increase_lock_timeout_on_retry && !SafePgMigrations.config.lock_timeout.nil?
32+
increase_lock_timeout
33+
end
34+
2635
sleep retry_delay
2736
Helpers::Logger.say 'Retrying now.', sub_item: true
2837
retry
2938
end
3039
end
40+
41+
def increase_lock_timeout
42+
Helpers::Logger.say " Increasing the lock timeout... Currently set to #{SafePgMigrations.config.lock_timeout}",
43+
sub_item: true
44+
SafePgMigrations.config.lock_timeout = (SafePgMigrations.config.lock_timeout + lock_timeout_step)
45+
unless SafePgMigrations.config.lock_timeout < SafePgMigrations.config.max_lock_timeout_for_retry
46+
SafePgMigrations.config.lock_timeout = SafePgMigrations.config.max_lock_timeout_for_retry
47+
end
48+
Helpers::Logger.say " Lock timeout is now set to #{SafePgMigrations.config.lock_timeout}", sub_item: true
49+
end
50+
51+
def lock_timeout_step
52+
max_lock_timeout_for_retry = SafePgMigrations.config.max_lock_timeout_for_retry
53+
lock_timeout = SafePgMigrations.config.lock_timeout
54+
max_tries = SafePgMigrations.config.max_tries
55+
@lock_timeout_step ||= (max_lock_timeout_for_retry - lock_timeout) / (max_tries - 1)
56+
end
3157
end
3258
end

test/statement_retrier_test.rb

Lines changed: 67 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3,25 +3,53 @@
33
require 'test_helper'
44

55
class StatementRetrierTest < Minitest::Test
6+
def test_lock_timeout_increase_on_retry
7+
SafePgMigrations.config.lock_timeout = 0.1.seconds
8+
SafePgMigrations.config.increase_lock_timeout_on_retry = true
9+
10+
calls = calls_for_lock_timeout_migration
11+
12+
assert_equal [
13+
' -> Retrying in 60 seconds...',
14+
' -> Increasing the lock timeout... Currently set to 0.1',
15+
' -> Lock timeout is now set to 0.325',
16+
' -> Retrying now.',
17+
' -> Retrying in 60 seconds...',
18+
' -> Increasing the lock timeout... Currently set to 0.325',
19+
' -> Lock timeout is now set to 0.55',
20+
' -> Retrying now.',
21+
' -> Retrying in 60 seconds...',
22+
' -> Increasing the lock timeout... Currently set to 0.55',
23+
' -> Lock timeout is now set to 0.775',
24+
' -> Retrying now.',
25+
' -> Retrying in 60 seconds...',
26+
' -> Increasing the lock timeout... Currently set to 0.775',
27+
' -> Lock timeout is now set to 1',
28+
' -> Retrying now.',
29+
], calls[1..].map(&:first)
30+
end
31+
32+
def test_no_lock_timeout_increase_on_retry_if_disabled
33+
SafePgMigrations.config.lock_timeout = 0.1.seconds
34+
SafePgMigrations.config.increase_lock_timeout_on_retry = false
35+
36+
calls = calls_for_lock_timeout_migration
37+
38+
assert_equal [
39+
' -> Retrying in 60 seconds...',
40+
' -> Retrying now.',
41+
' -> Retrying in 60 seconds...',
42+
' -> Retrying now.',
43+
' -> Retrying in 60 seconds...',
44+
' -> Retrying now.',
45+
' -> Retrying in 60 seconds...',
46+
' -> Retrying now.',
47+
], calls[1..].map(&:first)
48+
end
49+
650
def test_retry_if_lock_timeout
7-
@migration =
8-
Class.new(ActiveRecord::Migration::Current) do
9-
def up
10-
connection.send(:retry_if_lock_timeout) do
11-
raise ActiveRecord::LockWaitTimeout, 'PG::LockNotAvailable: ERROR: canceling statement due to lock timeout'
12-
end
13-
end
14-
end.new
51+
calls = calls_for_lock_timeout_migration
1552

16-
@connection.expects(:sleep).times(4)
17-
calls =
18-
record_calls(@migration, :write) do
19-
run_migration
20-
flunk 'run_migration should raise'
21-
rescue StandardError => e
22-
assert_instance_of ActiveRecord::LockWaitTimeout, e.cause
23-
assert_includes e.cause.message, 'canceling statement due to lock timeout'
24-
end
2553
assert_equal [
2654
' -> Retrying in 60 seconds...',
2755
' -> Retrying now.',
@@ -48,4 +76,26 @@ def test_statement_retry
4876
' -> Retrying now.',
4977
], calls[7..9]
5078
end
79+
80+
private
81+
82+
def calls_for_lock_timeout_migration
83+
@migration = Class.new(ActiveRecord::Migration::Current) do
84+
def up
85+
connection.send(:retry_if_lock_timeout) do
86+
raise ActiveRecord::LockWaitTimeout, 'PG::LockNotAvailable: ERROR: canceling statement due to lock timeout'
87+
end
88+
end
89+
end.new
90+
91+
@connection.expects(:sleep).times(4)
92+
93+
record_calls(@migration, :write) do
94+
run_migration
95+
flunk 'run_migration should raise'
96+
rescue StandardError => e
97+
assert_instance_of ActiveRecord::LockWaitTimeout, e.cause
98+
assert_includes e.cause.message, 'canceling statement due to lock timeout'
99+
end
100+
end
51101
end

0 commit comments

Comments
 (0)