Skip to content

Commit 7662619

Browse files
committed
Allow querying last error that caused lock unhealthiness. Retry test if that last error is retryable.
1 parent f145785 commit 7662619

File tree

3 files changed

+88
-9
lines changed

3 files changed

+88
-9
lines changed

lib/distributed-lock-google-cloud-storage/lock.rb

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@ def initialize(bucket_name:, path:, instance_identity_prefix: self.class.default
120120
@owner = nil
121121
@metageneration = nil
122122
@refresher_thread = nil
123+
@refresher_error = nil
123124

124125
# The refresher generation is incremented every time we shutdown
125126
# the refresher thread. It allows the refresher thread to know
@@ -315,6 +316,7 @@ def abandon
315316
#
316317
# @return [Boolean]
317318
# @raise [NotLockedError] This lock was not obtained.
319+
# @see #last_refresh_error
318320
def healthy?
319321
@state_mutex.synchronize do
320322
raise NotLockedError, 'Not locked' if !unsynced_locked_according_to_internal_state?
@@ -323,6 +325,8 @@ def healthy?
323325
end
324326

325327
# Checks whether the lock is healthy. See {#healthy?} for the definition of "healthy".
328+
# Use {#last_refresh_error} to query the last error that caused the lock to be declared
329+
# unhealthy.
326330
#
327331
# It only makes sense to call this method after having obtained this lock.
328332
#
@@ -333,6 +337,19 @@ def check_health!
333337
raise LockUnhealthyError, 'Lock is not healthy' if !healthy?
334338
end
335339

340+
# Returns the last error that caused the lock to be declared unhealthy.
341+
#
342+
# Don't use this method to check whether the lock is _currently_ healthy.
343+
# If this lock has ever been unhealthy, then this method returns a non-nil value
344+
# even if the lock is currently healthy.
345+
#
346+
# @return [StandardError, nil]
347+
def last_refresh_error
348+
@state_mutex.synchronize do
349+
@refresher_error
350+
end
351+
end
352+
336353

337354
private
338355

@@ -565,7 +582,17 @@ def refresh_lock(refresher_generation)
565582
end
566583
log_debug { "Done refreshing lock. metageneration=#{file.metageneration}" }
567584
true
585+
568586
rescue => e
587+
@state_mutex.synchronize do
588+
if @refresher_generation != refresher_generation
589+
log_debug { 'Abort refreshing lock' }
590+
return true
591+
end
592+
593+
@refresher_error = e
594+
end
595+
569596
log_error { "Error refreshing lock: #{e}" }
570597
[false, permanent_failure]
571598
end

spec/lock_spec.rb

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,10 @@ def force_erase_lock_object
8181
expect(lock).not_to be_owned_according_to_server
8282
end
8383

84+
it 'has no refresh error' do
85+
expect(lock.last_refresh_error).to be_nil
86+
end
87+
8488
specify 'checking for health is not possible due to being unlocked' do
8589
expect { lock.healthy? }.to \
8690
raise_error(DistributedLock::GoogleCloudStorage::NotLockedError)
@@ -113,6 +117,7 @@ def force_erase_lock_object
113117
expect(@lock).to be_owned_according_to_internal_state
114118
expect(@lock).to be_owned_according_to_server
115119
expect(@lock).to be_healthy
120+
expect(@lock.last_refresh_error).to be_nil
116121
expect { @lock.check_health! }.not_to raise_error
117122
end
118123

@@ -131,6 +136,7 @@ def force_erase_lock_object
131136
owned_according_to_internal_state: @lock2.owned_according_to_internal_state?,
132137
owned_according_to_server: @lock2.owned_according_to_server?,
133138
healthy: @lock2.healthy?,
139+
last_refresh_error: @lock2.last_refresh_error,
134140
}
135141
end
136142

@@ -155,6 +161,7 @@ def force_erase_lock_object
155161
expect(result[:owned_according_to_internal_state]).to be_truthy
156162
expect(result[:owned_according_to_server]).to be_truthy
157163
expect(result[:healthy]).to be_truthy
164+
expect(result[:last_refresh_error]).to be_nil
158165
end
159166

160167
it 'raises AlreadyLockedError if called twice by the same instance and thread' do
@@ -183,6 +190,7 @@ def force_erase_lock_object
183190
expect(@lock).to be_owned_according_to_internal_state
184191
expect(@lock).to be_owned_according_to_server
185192
expect(@lock).to be_healthy
193+
expect(@lock.last_refresh_error).to be_nil
186194
expect { @lock.check_health! }.not_to raise_error
187195
end
188196

@@ -209,6 +217,7 @@ def force_erase_lock_object
209217
expect(@lock).to be_owned_according_to_internal_state
210218
expect(@lock).to be_owned_according_to_server
211219
expect(@lock).to be_healthy
220+
expect(@lock.last_refresh_error).to be_nil
212221
expect { @lock.check_health! }.not_to raise_error
213222
end
214223

@@ -224,6 +233,7 @@ def force_erase_lock_object
224233
expect(@lock).to be_owned_according_to_internal_state
225234
expect(@lock).to be_owned_according_to_server
226235
expect(@lock).to be_healthy
236+
expect(@lock.last_refresh_error).to be_nil
227237
expect { @lock.check_health! }.not_to raise_error
228238
end
229239

@@ -238,20 +248,25 @@ def force_erase_lock_object
238248
expect(@lock).to be_owned_according_to_internal_state
239249
expect(@lock).to be_owned_according_to_server
240250
expect(@lock).to be_healthy
251+
expect(@lock.last_refresh_error).to be_nil
241252
expect { @lock.check_health! }.not_to raise_error
242253
end
243254
end
244255

245256

246257
describe '#unlock' do
247258
after :each do
248-
@lock.abandon if @lock
259+
if @lock
260+
@lock.abandon
261+
raise @lock.last_refresh_error if exception_to_retry?(@lock.last_refresh_error)
262+
end
249263
end
250264

251265
def lock_and_unlock
252266
@lock.lock(timeout: 0)
253267
deleted = nil
254-
expect { deleted = @lock.unlock }.not_to raise_error
268+
e = catch_non_retryable_exception { deleted = @lock.unlock }
269+
expect(e).to be_nil
255270
deleted
256271
end
257272

@@ -292,8 +307,11 @@ def lock_and_unlock
292307

293308
@lock.lock(timeout: 0)
294309
force_erase_lock_object
310+
295311
deleted = nil
296-
expect { deleted = @lock.unlock }.not_to raise_error
312+
e = catch_non_retryable_exception { deleted = @lock.unlock }
313+
314+
expect(e).to be_nil
297315
expect(deleted).to be_falsey
298316
expect(@lock).not_to be_locked_according_to_internal_state
299317
expect(@lock).not_to be_locked_according_to_server
@@ -326,7 +344,10 @@ def lock_and_unlock
326344
end
327345

328346
after :each do
329-
@lock.abandon if @lock
347+
if @lock
348+
@lock.abandon
349+
raise @lock.last_refresh_error if exception_to_retry?(@lock.last_refresh_error)
350+
end
330351
end
331352

332353
it 'updates the update time' do
@@ -348,12 +369,13 @@ def lock_and_unlock
348369
f.metadata['something'] = '123'
349370
end
350371
expect(file.metageneration).not_to eq(orig_metageneration)
351-
eventually(timeout: 10) do
372+
eventually(timeout: 30) do
352373
!@lock.healthy?
353374
end
354375
expect { @lock.check_health! }.to \
355376
raise_error(DistributedLock::GoogleCloudStorage::LockUnhealthyError)
356377
expect(log_output.string).to include('Lock object has an unexpected metageneration number')
378+
expect(@lock.last_refresh_error).not_to be_nil
357379
end
358380

359381
it 'declares unhealthiness when the lock object is deleted' do
@@ -370,6 +392,7 @@ def lock_and_unlock
370392
expect { @lock.check_health! }.to \
371393
raise_error(DistributedLock::GoogleCloudStorage::LockUnhealthyError)
372394
expect(log_output.string).to include('Lock object has been unexpectedly deleted')
395+
expect(@lock.last_refresh_error).not_to be_nil
373396
end
374397
end
375398
end

spec/spec_helper.rb

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,11 @@
55
require 'rspec/retry'
66

77
module Helpers
8+
EXCEPTIONS_TO_RETRY = [
9+
Google::Cloud::ResourceExhaustedError,
10+
Google::Cloud::UnavailableError,
11+
]
12+
813
def self.print_all_thread_backtraces
914
puts '-------- Begin backtrace dump --------'
1015
Thread.list.each do |thr|
@@ -17,6 +22,33 @@ def self.print_all_thread_backtraces
1722
puts '-------- End backtrace dump --------'
1823
end
1924

25+
def exception_to_retry?(ex)
26+
EXCEPTIONS_TO_RETRY.any? do |ex_class|
27+
ex.is_a?(ex_class)
28+
end
29+
end
30+
31+
# Runs the given block.
32+
# If it raises a retryable exception, then pass it through.
33+
# If it raises a non-retryable exception, then returns that exception.
34+
# Otherwise, returns nil.
35+
#
36+
# This is used as an alternative to `expect { block }.not_to raise_error`
37+
# which lets retryable exceptions through so that rspec-retry can retry
38+
# the test.
39+
def catch_non_retryable_exception
40+
begin
41+
yield
42+
nil
43+
rescue => e
44+
if exception_to_retry?(e)
45+
raise e
46+
else
47+
e
48+
end
49+
end
50+
end
51+
2052
def require_envvar(name)
2153
value = ENV[name]
2254
raise ArgumentError, "Required environment variable: #{name}" if value.to_s.empty?
@@ -56,10 +88,7 @@ def consistently(duration:, interval: 0.1)
5688
c.display_try_failure_messages = true
5789
c.default_retry_count = 3
5890
c.default_sleep_interval = 10
59-
c.exceptions_to_retry = [
60-
Google::Cloud::ResourceExhaustedError,
61-
Google::Cloud::UnavailableError
62-
]
91+
c.exceptions_to_retry = Helpers::EXCEPTIONS_TO_RETRY
6392
end
6493

6594
Signal.trap('QUIT') { Helpers.print_all_thread_backtraces }

0 commit comments

Comments
 (0)