Skip to content

Commit bbc93d6

Browse files
committed
fix(schema): avoid deadlock on reload_schema error
Ensure reload_schema always clears reload_in_progress flag and broadcasts condition variable even if schema reload fails or times out. Previously, an early return on error left reload_in_progress set to true, causing subsequent crud.schema() calls to wait until timeout indefinitely. Add integration test reproducing the issue and verifying that schema reload can be retried successfully after a failure. Fixes #479
1 parent a84e19f commit bbc93d6

File tree

2 files changed

+73
-10
lines changed

2 files changed

+73
-10
lines changed

crud/common/schema.lua

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -58,28 +58,38 @@ function schema.reload_schema(vshard_router)
5858
local replicasets = vshard_router:routeall()
5959
local vshard_router_name = vshard_router.name
6060

61+
if reload_schema_cond[vshard_router_name] == nil then
62+
reload_schema_cond[vshard_router_name] = fiber.cond()
63+
end
64+
6165
if reload_in_progress[vshard_router_name] == true then
6266
if not reload_schema_cond[vshard_router_name]:wait(const.RELOAD_SCHEMA_TIMEOUT) then
6367
return nil, ReloadSchemaError:new('Waiting for schema to be reloaded is timed out')
6468
end
69+
return true
70+
end
71+
72+
reload_in_progress[vshard_router_name] = true
73+
74+
local ok, err
75+
local p_ok, p_res1, p_res2 = pcall(call_reload_schema, replicasets)
76+
if p_ok then
77+
ok, err = p_res1, p_res2
6578
else
66-
reload_in_progress[vshard_router_name] = true
67-
if reload_schema_cond[vshard_router_name] == nil then
68-
reload_schema_cond[vshard_router_name] = fiber.cond()
69-
end
79+
ok, err = nil, p_res1
80+
end
7081

71-
local ok, err = call_reload_schema(replicasets)
72-
if not ok then
73-
return nil, err
74-
end
82+
reload_in_progress[vshard_router_name] = false
83+
reload_schema_cond[vshard_router_name]:broadcast()
7584

76-
reload_schema_cond[vshard_router_name]:broadcast()
77-
reload_in_progress[vshard_router_name] = false
85+
if not ok then
86+
return nil, err
7887
end
7988

8089
return true
8190
end
8291

92+
8393
-- schema.wrap_func_reload calls func with specified arguments.
8494
-- func should return `res, err, need_reload`
8595
-- If function returned error and `need_reload` is true,
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
local t = require('luatest')
2+
local helpers = require('test.helper')
3+
4+
local pgroup = t.group('schema_reload_deadlock', helpers.backend_matrix({
5+
{ engine = 'memtx' },
6+
}))
7+
8+
pgroup.before_all(function(g)
9+
helpers.start_default_cluster(g, 'srv_select')
10+
end)
11+
12+
pgroup.after_all(function(g)
13+
helpers.stop_cluster(g.cluster, g.params.backend)
14+
end)
15+
16+
pgroup.before_each(function(g)
17+
g._old_reload_schema_timeout = g.router:exec(function(new_timeout)
18+
local const = require('crud.common.const')
19+
local old = const.RELOAD_SCHEMA_TIMEOUT
20+
const.RELOAD_SCHEMA_TIMEOUT = new_timeout
21+
return old
22+
end, { 0.2 })
23+
end)
24+
25+
pgroup.after_each(function(g)
26+
if g._old_reload_schema_timeout ~= nil then
27+
g.router:exec(function(prev_timeout)
28+
local const = require('crud.common.const')
29+
const.RELOAD_SCHEMA_TIMEOUT = prev_timeout
30+
end, { g._old_reload_schema_timeout })
31+
32+
g._old_reload_schema_timeout = nil
33+
end
34+
end)
35+
36+
pgroup.test_schema_reload_in_progress_is_cleared_after_error = function(g)
37+
local _, err = g.router:call('crud.schema')
38+
t.assert_equals(err, nil)
39+
40+
t.assert_is_not(g.cluster:server('s2-master'), nil)
41+
g.cluster:server('s2-master'):stop()
42+
43+
local _, err1 = g.router:call('crud.schema')
44+
t.assert_is_not(err1, nil)
45+
t.assert_str_contains(err1.err, 'timed out')
46+
47+
g.cluster:server('s2-master'):start()
48+
49+
t.helpers.retrying({ timeout = 30, delay = 0.1 }, function()
50+
local _, err2 = g.router:call('crud.schema')
51+
t.assert_equals(err2, nil)
52+
end)
53+
end

0 commit comments

Comments
 (0)