Skip to content

Commit 921a625

Browse files
committed
MB-48047:[BP] During config reload try connecting to kv on all
afamily and use the connection which works. When we attempt to reload memcached config, we may have a situation where the address family in ns_server has changed but not propogated to memcached yet. We might fail to communicate with memcached over new address family as memcached may not be listening on the new address family. This bug is especially seen when we go from ipv4-only to ipv6-only and vice versa. Since we cannot know for sure which address family memcached is listening on, due to various error paths that can lead to mismatch in what ns_server perceive the memcached config is and what in fact is applied in memcached, it is simplest to attempt connection using both address family. Backports change related to MB-43196. Reviewed-on: http://review.couchbase.org/c/ns_server/+/142182 Change-Id: Id8fb97470bbd2bdb4573c8a01b16650cc652601b Reviewed-on: http://review.couchbase.org/c/ns_server/+/164953 Tested-by: Abhijeeth Nuthan <[email protected]> Reviewed-by: Timofey Barmin <[email protected]> Well-Formed: Restriction Checker
1 parent 65d77ea commit 921a625

File tree

2 files changed

+48
-21
lines changed

2 files changed

+48
-21
lines changed

src/memcached_config_mgr.erl

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -175,10 +175,20 @@ code_change(_OldVsn, State, _Extra) ->
175175
{ok, State}.
176176

177177
apply_changed_memcached_config(DifferentConfig, State) ->
178-
case ns_memcached:config_validate(DifferentConfig) of
178+
%% We might have changed address family in ns_server and we don't want to
179+
%% fail after trying to connect to the address family which might not exist
180+
%% in memcached yet.
181+
%% Since we cannot know for sure which address family memcached is
182+
%% listening on, due to various error paths that can lead to mismatch in
183+
%% what this module perceives the memcached config is and what in fact is
184+
%% applied in memcached, it is simplest to attempt connection using both
185+
%% address family.
186+
AFamiliesToTry = [inet, inet6],
187+
188+
case ns_memcached:config_validate(DifferentConfig, AFamiliesToTry) of
179189
ok ->
180190
?log_debug("New memcached config is hot-reloadable."),
181-
hot_reload_config(DifferentConfig, State, 10, []),
191+
hot_reload_config(DifferentConfig, AFamiliesToTry, State, 10, []),
182192
{noreply, State#state{memcached_config = DifferentConfig}};
183193
{memcached_error, einval, _} ->
184194
?log_debug("Memcached config is not hot-reloadable"),
@@ -221,14 +231,14 @@ changed_keys(BlobBefore, BlobAfter) ->
221231
unknown
222232
end.
223233

224-
hot_reload_config(NewMcdConfig, State, Tries, LastErr) when Tries < 1 ->
234+
hot_reload_config(NewMcdConfig, _, State, Tries, LastErr) when Tries < 1 ->
225235
ale:error(?USER_LOGGER,
226236
"Unable to apply memcached config update that was supposed to "
227237
"succeed. Error: ~p. Giving up. Restart memcached to apply that "
228238
"config change. Updated keys: ~p",
229239
[LastErr, changed_keys(State#state.memcached_config,
230240
NewMcdConfig)]);
231-
hot_reload_config(NewMcdConfig, State, Tries, _LastErr) ->
241+
hot_reload_config(NewMcdConfig, AFamiliesToTry, State, Tries, _LastErr) ->
232242
FilePath = get_memcached_config_path(),
233243
PrevFilePath = FilePath ++ ".prev",
234244

@@ -243,7 +253,7 @@ hot_reload_config(NewMcdConfig, State, Tries, _LastErr) ->
243253
%% we'll be able to retry hot or cold config update
244254
ok = misc:atomic_write_file(FilePath, NewMcdConfig),
245255

246-
case ns_memcached:config_reload() of
256+
case ns_memcached:config_reload(AFamiliesToTry) of
247257
ok ->
248258
delete_prev_config_file(),
249259
ale:info(?USER_LOGGER,
@@ -255,7 +265,8 @@ hot_reload_config(NewMcdConfig, State, Tries, _LastErr) ->
255265
?log_error("Failed to reload memcached config. "
256266
"Will retry. Error: ~p", [Err]),
257267
timer:sleep(1000),
258-
hot_reload_config(NewMcdConfig, State, Tries - 1, Err)
268+
hot_reload_config(NewMcdConfig, AFamiliesToTry, State,
269+
Tries - 1, Err)
259270
end.
260271

261272
get_memcached_config_path() ->

src/ns_memcached.erl

Lines changed: 31 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -116,8 +116,8 @@
116116
get_vbucket_high_seqno/2,
117117
wait_for_seqno_persistence/3,
118118
get_keys/3,
119-
config_validate/1,
120-
config_reload/0,
119+
config_validate/2,
120+
config_reload/1,
121121
get_failover_log/2,
122122
get_failover_logs/2,
123123
get_collections_uid/1
@@ -1184,13 +1184,27 @@ do_connect(Options) ->
11841184
Port = service_ports:get_port(memcached_dedicated_port, Config),
11851185
User = ns_config:search_node_prop(Config, memcached, admin_user),
11861186
Pass = ns_config:search_node_prop(Config, memcached, admin_pass),
1187-
{ok, Sock} = gen_tcp:connect(misc:localhost(), Port,
1188-
[misc:get_net_family(),
1189-
binary,
1190-
{packet, 0},
1191-
{active, false},
1192-
{recbuf, ?RECBUF},
1193-
{sndbuf, ?SNDBUF}]),
1187+
AFamilies = proplists:get_value(try_afamily, Options,
1188+
[misc:get_net_family()]),
1189+
HelloFeatures = proplists:delete(try_afamily, Options),
1190+
{ok, Sock} = lists:foldl(
1191+
fun (_AFamily, {ok, Socket}) ->
1192+
{ok, Socket};
1193+
(AFamily, Acc) ->
1194+
RV = gen_tcp:connect(misc:localhost(AFamily, []),
1195+
Port,
1196+
[AFamily,
1197+
binary,
1198+
{packet, 0},
1199+
{active, false},
1200+
{recbuf, ?RECBUF},
1201+
{sndbuf, ?SNDBUF}]),
1202+
case RV of
1203+
{ok, S} -> {ok, S};
1204+
_ -> [{AFamily, RV} | Acc]
1205+
end
1206+
end, [], AFamilies),
1207+
11941208
try
11951209
case mc_client_binary:auth(Sock, {<<"PLAIN">>,
11961210
{list_to_binary(User),
@@ -1201,7 +1215,7 @@ do_connect(Options) ->
12011215
"provided password <ud>~s</ud>", [User, Pass]),
12021216
error({auth_failure, Err})
12031217
end,
1204-
Features = mc_client_binary:hello_features(Options),
1218+
Features = mc_client_binary:hello_features(HelloFeatures),
12051219
{ok, Negotiated} = mc_client_binary:hello(Sock, "regular", Features),
12061220
Failed = Features -- Negotiated,
12071221
Failed == [] orelse error({feature_negotiation_failed, Failed}),
@@ -1447,18 +1461,20 @@ do_get_keys(Bucket, NodeVBuckets, Params) ->
14471461
end
14481462
end, NodeVBuckets, ?GET_KEYS_OUTER_TIMEOUT).
14491463

1450-
-spec config_validate(binary()) -> ok | mc_error().
1451-
config_validate(NewConfig) ->
1464+
-spec config_validate(binary(), [inet | inet6]) -> ok | mc_error().
1465+
config_validate(NewConfig, AFamilies) ->
14521466
misc:executing_on_new_process(
14531467
fun () ->
1454-
{ok, Sock} = connect([{retries, 1}]),
1468+
{ok, Sock} = connect([{retries, 1},
1469+
{try_afamily, AFamilies}]),
14551470
mc_client_binary:config_validate(Sock, NewConfig)
14561471
end).
14571472

1458-
config_reload() ->
1473+
config_reload(AFamilies) ->
14591474
misc:executing_on_new_process(
14601475
fun () ->
1461-
{ok, Sock} = connect([{retries, 1}]),
1476+
{ok, Sock} = connect([{retries, 1},
1477+
{try_afamily, AFamilies}]),
14621478
mc_client_binary:config_reload(Sock)
14631479
end).
14641480

0 commit comments

Comments
 (0)