Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions test/rebalancer/rebalancer.result
Original file line number Diff line number Diff line change
Expand Up @@ -318,12 +318,13 @@ _bucket:update({150}, {{'=', 2, vshard.consts.BUCKET.RECEIVING}})
---
- [150, 'receiving']
...
wait_rebalancer_state("Some buckets are not active", test_run)
---
...
-- We should not check the certain status of buckets (e.g. receiving) because
-- in rare cases we accidentally can get the wrong one. For example we wait for
-- "receiving" status, but get "garbage" due to some previous rebalancer error.
wait_rebalancer_state('Error during downloading rebalancer states:.*' .. \
'REBALANCER_INVALID_STATE', test_run) \
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The commit message still mentions BUCKET_INVALID_STATE.

_bucket:update({150}, {{'=', 2, vshard.consts.BUCKET.ACTIVE}})
---
- [150, 'active']
...
vshard.storage.sync()
---
Expand Down
6 changes: 5 additions & 1 deletion test/rebalancer/rebalancer.test.lua
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,11 @@ util.map_bucket_protection(test_run, {REPLICASET_1}, false)
test_run:switch('box_1_a')
vshard.storage.rebalancer_enable()
_bucket:update({150}, {{'=', 2, vshard.consts.BUCKET.RECEIVING}})
wait_rebalancer_state("Some buckets are not active", test_run)
-- We should not check the certain status of buckets (e.g. receiving) because
-- in rare cases we accidentally can get the wrong one. For example we wait for
-- "receiving" status, but get "garbage" due to some previous rebalancer error.
wait_rebalancer_state('Error during downloading rebalancer states:.*' .. \
'REBALANCER_INVALID_STATE', test_run) \
_bucket:update({150}, {{'=', 2, vshard.consts.BUCKET.ACTIVE}})
vshard.storage.sync()

Expand Down
16 changes: 13 additions & 3 deletions test/unit/rebalancer.result
Original file line number Diff line number Diff line change
Expand Up @@ -290,9 +290,11 @@ build_routes(replicasets)
vshard.storage.internal.is_master = true
---
...
get_state = vshard.storage._rebalancer_request_state
---
...
get_state = function() \
local res, err = vshard.storage._rebalancer_request_state() \
if res == nil then err.trace = nil end \
return res, err \
end \
_bucket = box.schema.create_space('_bucket')
---
...
Expand All @@ -318,13 +320,21 @@ get_state()
---
- bucket_active_count: 2
bucket_pinned_count: 0
- null
...
_bucket:replace{1, consts.BUCKET.RECEIVING}
---
- [1, 'receiving']
...
get_state()
---
- null
- buckets_state: receiving
code: 42
replica_id: _
type: ShardingError
message: Replica _ has receiving buckets during rebalancing
name: REBALANCER_INVALID_STATE
...
vshard.storage.internal.is_master = false
---
Expand Down
6 changes: 5 additions & 1 deletion test/unit/rebalancer.test.lua
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,11 @@ build_routes(replicasets)
-- Test rebalancer local state.
--
vshard.storage.internal.is_master = true
get_state = vshard.storage._rebalancer_request_state
get_state = function() \
local res, err = vshard.storage._rebalancer_request_state() \
if res == nil then err.trace = nil end \
return res, err \
end \
_bucket = box.schema.create_space('_bucket')
pk = _bucket:create_index('pk')
status = _bucket:create_index('status', {parts = {{2, 'string'}}, unique = false})
Expand Down
5 changes: 5 additions & 0 deletions vshard/error.lua
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,11 @@ local error_message_template = {
msg = 'Mismatch server name: expected "%s", but got "%s"',
args = {'expected_name', 'actual_name'},
},
[42] = {
name = 'REBALANCER_INVALID_STATE',
msg = 'Replica %s has %s buckets during rebalancing',
args = {'replica_id', 'buckets_state'}
}
}

--
Expand Down
26 changes: 11 additions & 15 deletions vshard/storage/init.lua
Original file line number Diff line number Diff line change
Expand Up @@ -2857,12 +2857,10 @@ local function rebalancer_service_f(service, limiter)
return
end
if not status or replicasets == nil then
local err = status and total_bucket_active_count or replicasets
if err then
limiter:log_error(err, service:set_status_error(
'Error during downloading rebalancer states: %s', err))
end
log.info('Some buckets are not active, retry rebalancing later')
local err = total_bucket_active_count
limiter:log_error(err, service:set_status_error(
'Error during downloading rebalancer states: %s, ' ..
'retry rebalancing later', err))
service:set_activity('idling')
lfiber.testcancel()
lfiber.sleep(consts.REBALANCER_WORK_INTERVAL)
Expand Down Expand Up @@ -2951,18 +2949,16 @@ local function rebalancer_request_state()
return nil, err
end
if not M.is_rebalancer_active or rebalancing_is_in_progress() then
return
return nil, lerror.make('Rebalancer is not active or is in progress')
end
local _bucket = box.space._bucket
local status_index = _bucket.index.status
if #status_index:select({BSENDING}, {limit = 1}) > 0 then
return
end
if #status_index:select({BRECEIVING}, {limit = 1}) > 0 then
return
end
if #status_index:select({BGARBAGE}, {limit = 1}) > 0 then
return
local repl_id = M.this_replica and M.this_replica.id or '_'
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How can M.this_replica or its .id field be nil?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

M.this_replica or its id can be nil in unit tests (e.g. unit/rebalancer) where we don't initialize replica object

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you then make these tests have a proper ID? If this is east to do, I would say this is preferable to making the production code have special checks for tests.

for _, status in pairs({BSENDING, BRECEIVING, BGARBAGE}) do
if #status_index:select({status}, {limit = 1}) > 0 then
return nil, lerror.vshard(lerror.code.REBALANCER_INVALID_STATE,
repl_id, status)
end
end
return {
bucket_active_count = status_index:count({BACTIVE}),
Expand Down