Skip to content

Commit 02e905b

Browse files
committed
rebalancer: refactoring of rebalancer_request_state
Before this patch the function `rebalancer_request_state` returned only nil in case of errors (e.g. presence of SENDING, RECEIVING, GARBAGE buckets, not active rebalancer). This makes it inconsistent compared to other rebalancer functions. Now, in case of errors we return `(nil, err)` instead of `nil`. It can help us to propagate a meaningful error in rebalancer service. In addition we introduce a new vshard error for cases when the storage has some non-active buckets during rebalancing - `BUCKET_INVALID_STATE`. Also we remove "Some buckets are not active" log from `rebalancer_service_f` because a meaningful error about non-active buckets is already returned from `rebalancer_download_states`. NO_TEST=refactoring NO_DOC=refactoring
1 parent 06c15b0 commit 02e905b

File tree

6 files changed

+45
-19
lines changed

6 files changed

+45
-19
lines changed

test/rebalancer/rebalancer.result

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -318,12 +318,13 @@ _bucket:update({150}, {{'=', 2, vshard.consts.BUCKET.RECEIVING}})
318318
---
319319
- [150, 'receiving']
320320
...
321-
wait_rebalancer_state("Some buckets are not active", test_run)
322-
---
323-
...
321+
-- We should not check the certain status of buckets (e.g. receiving) because
322+
-- in rare cases we accidentally can get the wrong one. For example we wait for
323+
-- "receiving" status, but get "garbage" due to some previous rebalancer error.
324+
wait_rebalancer_state('Error during downloading rebalancer states:.*' .. \
325+
'BUCKET_INVALID_STATE', test_run) \
324326
_bucket:update({150}, {{'=', 2, vshard.consts.BUCKET.ACTIVE}})
325327
---
326-
- [150, 'active']
327328
...
328329
vshard.storage.sync()
329330
---

test/rebalancer/rebalancer.test.lua

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,11 @@ util.map_bucket_protection(test_run, {REPLICASET_1}, false)
156156
test_run:switch('box_1_a')
157157
vshard.storage.rebalancer_enable()
158158
_bucket:update({150}, {{'=', 2, vshard.consts.BUCKET.RECEIVING}})
159-
wait_rebalancer_state("Some buckets are not active", test_run)
159+
-- We should not check the certain status of buckets (e.g. receiving) because
160+
-- in rare cases we accidentally can get the wrong one. For example we wait for
161+
-- "receiving" status, but get "garbage" due to some previous rebalancer error.
162+
wait_rebalancer_state('Error during downloading rebalancer states:.*' .. \
163+
'BUCKET_INVALID_STATE', test_run) \
160164
_bucket:update({150}, {{'=', 2, vshard.consts.BUCKET.ACTIVE}})
161165
vshard.storage.sync()
162166

test/unit/rebalancer.result

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -290,9 +290,11 @@ build_routes(replicasets)
290290
vshard.storage.internal.is_master = true
291291
---
292292
...
293-
get_state = vshard.storage._rebalancer_request_state
294-
---
295-
...
293+
get_state = function() \
294+
local res, err = vshard.storage._rebalancer_request_state() \
295+
if res == nil then err.trace = nil end \
296+
return res, err \
297+
end \
296298
_bucket = box.schema.create_space('_bucket')
297299
---
298300
...
@@ -318,13 +320,21 @@ get_state()
318320
---
319321
- bucket_active_count: 2
320322
bucket_pinned_count: 0
323+
- null
321324
...
322325
_bucket:replace{1, consts.BUCKET.RECEIVING}
323326
---
324327
- [1, 'receiving']
325328
...
326329
get_state()
327330
---
331+
- null
332+
- buckets_state: receiving
333+
code: 42
334+
replica_id: _
335+
type: ShardingError
336+
message: Replica _ has receiving buckets
337+
name: BUCKET_INVALID_STATE
328338
...
329339
vshard.storage.internal.is_master = false
330340
---

test/unit/rebalancer.test.lua

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,11 @@ build_routes(replicasets)
7676
-- Test rebalancer local state.
7777
--
7878
vshard.storage.internal.is_master = true
79-
get_state = vshard.storage._rebalancer_request_state
79+
get_state = function() \
80+
local res, err = vshard.storage._rebalancer_request_state() \
81+
if res == nil then err.trace = nil end \
82+
return res, err \
83+
end \
8084
_bucket = box.schema.create_space('_bucket')
8185
pk = _bucket:create_index('pk')
8286
status = _bucket:create_index('status', {parts = {{2, 'string'}}, unique = false})

vshard/error.lua

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,11 @@ local error_message_template = {
207207
msg = 'Mismatch server name: expected "%s", but got "%s"',
208208
args = {'expected_name', 'actual_name'},
209209
},
210+
[42] = {
211+
name = 'BUCKET_INVALID_STATE',
212+
msg = 'Replica %s has %s buckets',
213+
args = {'replica_id', 'buckets_state'}
214+
}
210215
}
211216

212217
--

vshard/storage/init.lua

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2851,12 +2851,10 @@ local function rebalancer_service_f(service, limiter)
28512851
return
28522852
end
28532853
if not status or replicasets == nil then
2854-
local err = status and total_bucket_active_count or replicasets
2855-
if err then
2856-
limiter:log_error(err, service:set_status_error(
2857-
'Error during downloading rebalancer states: %s', err))
2858-
end
2859-
log.info('Some buckets are not active, retry rebalancing later')
2854+
local err = total_bucket_active_count
2855+
limiter:log_error(err, service:set_status_error(
2856+
'Error during downloading rebalancer states: %s, ' ..
2857+
'retry rebalancing later', err))
28602858
service:set_activity('idling')
28612859
lfiber.testcancel()
28622860
lfiber.sleep(consts.REBALANCER_WORK_INTERVAL)
@@ -2945,18 +2943,22 @@ local function rebalancer_request_state()
29452943
return nil, err
29462944
end
29472945
if not M.is_rebalancer_active or rebalancing_is_in_progress() then
2948-
return
2946+
return nil, lerror.make('Rebalancer is not active or is in progress')
29492947
end
29502948
local _bucket = box.space._bucket
29512949
local status_index = _bucket.index.status
2950+
local repl_id = M.this_replica and M.this_replica.id or '_'
29522951
if #status_index:select({BSENDING}, {limit = 1}) > 0 then
2953-
return
2952+
return nil, lerror.vshard(lerror.code.BUCKET_INVALID_STATE,
2953+
repl_id, 'sending')
29542954
end
29552955
if #status_index:select({BRECEIVING}, {limit = 1}) > 0 then
2956-
return
2956+
return nil, lerror.vshard(lerror.code.BUCKET_INVALID_STATE,
2957+
repl_id, 'receiving')
29572958
end
29582959
if #status_index:select({BGARBAGE}, {limit = 1}) > 0 then
2959-
return
2960+
return nil, lerror.vshard(lerror.code.BUCKET_INVALID_STATE,
2961+
repl_id, 'garbage')
29602962
end
29612963
return {
29622964
bucket_active_count = status_index:count({BACTIVE}),

0 commit comments

Comments
 (0)