Skip to content

Commit e776055

Browse files
committed
MB-47702: Conditionally kill epmd on settings change and restart
In order to support EnforceTLS(don't listen on unencrypted ports) and address family only feature, it was decided to kill epmd. Both the above features are only supported above 7.0. The reason for killing epmd is that it no longer required from 6.5 and it is hard to make it listen only on a particular address family. The ports opened by epmd are also unencrypted ports. In order, to kill epmd we need to start it with relaxed_command_check option. Using "epmd -kill" since we can only kill epmd if, 1. no node names are registered 2. started with relaxed_command_check option Corresponding Windows change, http://review.couchbase.org/c/voltron/+/159157 Change-Id: I537e68a14b0455de5e4dd15bf40fcc91e76ad742 Reviewed-on: http://review.couchbase.org/c/ns_server/+/158666 Tested-by: Abhijeeth Nuthan <[email protected]> Well-Formed: Build Bot <[email protected]> Reviewed-by: Timofey Barmin <[email protected]>
1 parent e625b97 commit e776055

File tree

4 files changed

+80
-4
lines changed

4 files changed

+80
-4
lines changed

couchbase-server.sh.in

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,9 @@ export ERL_CRASH_DUMP
7979
ERL_FULLSWEEP_AFTER=512
8080
export ERL_FULLSWEEP_AFTER
8181

82+
ERL_EPMD_RELAXED_COMMAND_CHECK=1
83+
export ERL_EPMD_RELAXED_COMMAND_CHECK
84+
8285
# For some obscure reason erl requires HOME environment variable to be set.
8386
if [ -z "$HOME" ]
8487
then
@@ -143,11 +146,14 @@ _prepare_datadir () {
143146
}
144147

145148
_maybe_start_epmd () {
146-
# Initialize distributed erlang on the system (i.e. epmd)
147-
erl -noshell -setcookie nocookie -sname init -run init stop 2>&1 > /dev/null
148-
if [ $? -ne 0 ]
149+
if [ ! -f $CB_DATA_DIR/no_epmd ]
149150
then
150-
exit 1
151+
# Initialize distributed erlang on the system (i.e. epmd)
152+
erl -noshell -setcookie nocookie -sname init -run init stop 2>&1 > /dev/null
153+
if [ $? -ne 0 ]
154+
then
155+
exit 1
156+
fi
151157
fi
152158
}
153159

@@ -211,6 +217,7 @@ _start() {
211217
212218
-proto_dist cb \
213219
-epmd_module cb_epmd \
220+
-start_epmd false \
214221
-ssl_dist_optfile $SSL_DIST_OPTFILE \
215222
-setcookie nocookie \
216223
$* \
@@ -236,6 +243,7 @@ _stop() {
236243
inetrc "\"$HOSTS_CFG_FILE\"" \
237244
-proto_dist cb \
238245
-epmd_module cb_epmd \
246+
-start_epmd false \
239247
-ssl_dist_optfile $SSL_DIST_OPTFILE \
240248
-noshell \
241249
-hidden \

pylib/cluster_run_lib.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,7 @@ def erlang_args_for_node(i, ebin_path, extra_args, args_prefix, root_dir):
263263
"-proto_dist", "cb",
264264
"-ssl_dist_optfile", ssloptfile,
265265
"-epmd_module", "cb_epmd",
266+
"-start_epmd", "false",
266267
"-hidden",
267268
"-kernel", "dist_config_file", quote_string_for_erl(cb_dist_config),
268269
"-kernel", "inetrc", f"\"{hosts_file}\"",
@@ -454,6 +455,7 @@ def start_node(node_num):
454455
params['env']['ERL_CRASH_DUMP'] = crash_dump_base + '.babysitter'
455456

456457
params['env']['COUCHBASE_SMALLER_PKEYS'] = '1'
458+
params['env']['ERL_EPMD_RELAXED_COMMAND_CHECK'] = '1'
457459

458460
params['close_fds'] = True
459461
if platform.system() == "Windows":
@@ -477,6 +479,11 @@ def start_node(node_num):
477479
params['stdout'] = subprocess.DEVNULL
478480
params['stderr'] = subprocess.DEVNULL
479481

482+
if not os.path.isfile(abs_path_join(root_dir, 'data', f'n_{node_num}',
483+
'no_epmd')):
484+
subprocess.run(["erl", "-noshell", "-setcookie", "nocookie",
485+
"-sname", "init", "-run", "init", "stop"],
486+
env=params['env'])
480487
pr = subprocess.Popen(args, **params)
481488
if w is not None:
482489
os.close(r)

src/menelaus_event.erl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,7 @@ maybe_restart(#state{webconfig = WebConfigOld,
203203
AFROld =:= AFRNew of
204204
true -> State;
205205
false -> {ok, _} = menelaus_web_sup:restart_web_servers(),
206+
netconfig_updater:maybe_kill_epmd(),
206207
State#state{webconfig = WebConfigNew,
207208
disable_non_ssl_ports = DisableNew,
208209
afamily_requirement = AFRNew}

src/netconfig_updater.erl

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
%% API
1414
-export([start_link/0,
15+
maybe_kill_epmd/0,
1516
apply_config/1,
1617
change_external_listeners/2,
1718
ensure_tls_dist_started/1]).
@@ -25,6 +26,8 @@
2526
-include_lib("kernel/include/net_address.hrl").
2627
-include("ns_common.hrl").
2728

29+
-define(CAN_KILL_EPMD, ?get_param(can_kill_epmd, true)).
30+
2831
%%%===================================================================
2932
%%% API
3033
%%%===================================================================
@@ -49,6 +52,10 @@ init([]) ->
4952
ServerName = ?MODULE,
5053
register(ServerName, self()),
5154
ensure_ns_config_settings_in_order(),
55+
%% We choose to kill epmd at startup if required. This is mainly required
56+
%% for windows as for unix systems epmd will not be started because of
57+
%% no_epmd file.
58+
misc:is_windows() andalso maybe_kill_epmd(),
5259
proc_lib:init_ack({ok, self()}),
5360
case misc:consult_marker(update_marker_path()) of
5461
{ok, [Cmd]} ->
@@ -371,6 +378,59 @@ check_nodename_resolvable(Node, AFamily) ->
371378
{error, iolist_to_binary(M)}
372379
end.
373380

381+
epmd_executable() ->
382+
case misc:is_windows() of
383+
true ->
384+
%% Epmd doesn't exist in the bin path for windows so we pass the
385+
%% erts_bin_path env to point us to it.
386+
{ok, ERTSPath} = application:get_env(ns_server,
387+
erts_bin_path),
388+
filename:join(ERTSPath, "epmd.exe");
389+
false ->
390+
path_config:component_path(bin, "epmd")
391+
end.
392+
393+
kill_epmd() ->
394+
Path = epmd_executable(),
395+
Port = erlang:open_port({spawn_executable, Path},
396+
[stderr_to_stdout, binary,
397+
stream, exit_status, hide,
398+
{args, ["-kill"]}]),
399+
{ExitStatus, Output} = wait_for_exit(Port, []),
400+
case ExitStatus of
401+
0 ->
402+
ok;
403+
_ ->
404+
?log_error("Failed to kill epmd: ~p", [{ExitStatus, Output}]),
405+
error
406+
end.
407+
408+
wait_for_exit(Port, Output) ->
409+
receive
410+
{Port, {data, Data}} ->
411+
wait_for_exit(Port, Output ++ binary_to_list(Data));
412+
{Port, {exit_status, Status}} ->
413+
{Status, Output}
414+
end.
415+
416+
maybe_kill_epmd() ->
417+
NoEpmdFile = path_config:component_path(data, "no_epmd"),
418+
case ?CAN_KILL_EPMD andalso cluster_compat_mode:is_cluster_70() andalso
419+
(misc:get_afamily_only() orelse misc:disable_non_ssl_ports()) of
420+
true ->
421+
try
422+
misc:create_marker(NoEpmdFile),
423+
?log_info("Killing epmd ..."),
424+
kill_epmd()
425+
catch
426+
T:E:S ->
427+
?log_error("Exception while killing epmd ~p", [{T, E, S}])
428+
end;
429+
false ->
430+
file:delete(NoEpmdFile),
431+
ok
432+
end.
433+
374434
%% This function is needed in two cases:
375435
%% - migration for address family settings to 6.5
376436
%% - allow manual changes in dist_cfg file

0 commit comments

Comments
 (0)