Skip to content

Commit 1028501

Browse files
MB-50306: Add cert expiration alerts for...
... trusted certs and node certs Change-Id: If98d5f47a3feadb574a470c3cec84d7b613ab57e Reviewed-on: https://review.couchbase.org/c/ns_server/+/168732 Well-Formed: Build Bot <[email protected]> Reviewed-by: Steve Watanabe <[email protected]> Tested-by: Timofey Barmin <[email protected]>
1 parent 080c93d commit 1028501

File tree

4 files changed

+92
-13
lines changed

4 files changed

+92
-13
lines changed

src/menelaus_web_alerts_srv.erl

Lines changed: 86 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
-export([init/1, handle_call/3, handle_cast/2, handle_info/2,
2525
terminate/2, code_change/3]).
2626

27-
-export([alert_keys/0, config_upgrade_to_70/1]).
27+
-export([alert_keys/0, config_upgrade_to_70/1, config_upgrade_to_MORPHEUS/1]).
2828

2929
%% @doc Hold client state for any alerts that need to be shown in
3030
%% the browser, is used by menelaus_web to piggy back for a transport
@@ -78,6 +78,10 @@ short_description(time_out_of_sync) ->
7878
"node time not in sync";
7979
short_description(disk_usage_analyzer_stuck) ->
8080
"disks usage worker is stuck and unresponsive";
81+
short_description(cert_expires_soon) ->
82+
"certificate will expire soon";
83+
short_description(cert_expired) ->
84+
"certificate has expired";
8185
short_description(Other) ->
8286
%% this case is needed for tests to work
8387
couch_util:to_list(Other).
@@ -109,7 +113,15 @@ errors(time_out_of_sync) ->
109113
errors(disk_usage_analyzer_stuck) ->
110114
"Disk usage worker is stuck on node \"~s\". Please ensure all mounts are "
111115
"accessible via \"df\" and consider killing any existing \"df\" "
112-
"processes.".
116+
"processes.";
117+
errors(node_cert_expires_soon) ->
118+
"Server certificate for node ~s (subject: '~s') will expire at ~s.";
119+
errors(node_cert_expired) ->
120+
"Server certificate for node ~s (subject: '~s') has expired.";
121+
errors(ca_expires_soon) ->
122+
"Trusted CA certificate with ID=~b (subject: '~s') will expire at ~s.";
123+
errors(ca_expired) ->
124+
"Trusted CA certificate with ID=~b (subject: '~s') has expired.".
113125

114126
%% ------------------------------------------------------------------
115127
%% API Function Definitions
@@ -258,14 +270,31 @@ alert_keys() ->
258270
[ip, disk, overhead, ep_oom_errors, ep_item_commit_failed,
259271
audit_dropped_events, indexer_ram_max_usage,
260272
ep_clock_cas_drift_threshold_exceeded,
261-
communication_issue, time_out_of_sync, disk_usage_analyzer_stuck].
273+
communication_issue, time_out_of_sync, disk_usage_analyzer_stuck,
274+
cert_expires_soon, cert_expired].
262275

263276
config_upgrade_to_70(Config) ->
264277
case ns_config:search(Config, email_alerts) of
265278
false ->
266279
[];
267280
{value, EmailAlerts} ->
268-
config_email_alerts_upgrade_to_70(EmailAlerts)
281+
upgrade_alerts(
282+
EmailAlerts,
283+
[add_proplist_list_elem(alerts, time_out_of_sync, _),
284+
add_proplist_kv(pop_up_alerts, alert_keys(), _)])
285+
end.
286+
287+
config_upgrade_to_MORPHEUS(Config) ->
288+
case ns_config:search(Config, email_alerts) of
289+
false ->
290+
[];
291+
{value, EmailAlerts} ->
292+
upgrade_alerts(
293+
EmailAlerts,
294+
[add_proplist_list_elem(alerts, cert_expired, _),
295+
add_proplist_list_elem(pop_up_alerts, cert_expired, _),
296+
add_proplist_list_elem(alerts, cert_expires_soon, _),
297+
add_proplist_list_elem(pop_up_alerts, cert_expires_soon, _)])
269298
end.
270299

271300
%% ------------------------------------------------------------------
@@ -282,7 +311,7 @@ start_timer() ->
282311
global_checks() ->
283312
[oom, ip, write_fail, overhead, disk, audit_write_fail,
284313
indexer_ram_max_usage, cas_drift_threshold, communication_issue,
285-
time_out_of_sync, disk_usage_analyzer_stuck].
314+
time_out_of_sync, disk_usage_analyzer_stuck, certs].
286315

287316
%% @doc fires off various checks
288317
check_alerts(Opaque, Hist, Stats) ->
@@ -485,6 +514,54 @@ check(time_out_of_sync, Opaque, _History, _Stats) ->
485514
false ->
486515
ok
487516
end,
517+
Opaque;
518+
519+
check(certs, Opaque, _History, _Stats) ->
520+
CAAlerts =
521+
case mb_master:master_node() == node() of
522+
true ->
523+
lists:flatmap(
524+
fun (CAProps) ->
525+
ExpWarnings = ns_server_cert:expiration_warnings(CAProps),
526+
Subject = proplists:get_value(subject, CAProps),
527+
Id = proplists:get_value(id, CAProps),
528+
[{{ca, Id, Subject}, W} || W <- ExpWarnings]
529+
end, ns_server_cert:trusted_CAs(props));
530+
false ->
531+
[]
532+
end,
533+
534+
LocalAlerts =
535+
case ns_config:read_key_fast({node, node(), node_cert}, undefined) of
536+
undefined -> [];
537+
Props ->
538+
lists:map(
539+
fun (W) ->
540+
Subject = proplists:get_value(subject, Props),
541+
{{node, Subject}, W}
542+
end, ns_server_cert:expiration_warnings(Props))
543+
end,
544+
545+
lists:foreach(
546+
fun ({{ca, Id, Subj}, expired}) ->
547+
Error = fmt_to_bin(errors(ca_expired), [Id, Subj]),
548+
global_alert({cert_expired, {ca, Id}}, Error);
549+
({{node, Subj}, expired}) ->
550+
Host = misc:extract_node_address(node()),
551+
Error = fmt_to_bin(errors(node_cert_expired), [Host, Subj]),
552+
global_alert({cert_expired, {node, Host}}, Error);
553+
({{ca, Id, Subj}, {expires_soon, UTCSeconds}}) ->
554+
Date = menelaus_web_cert:format_time(UTCSeconds),
555+
Error = fmt_to_bin(errors(ca_expires_soon), [Id, Subj, Date]),
556+
global_alert({cert_expires_soon, {ca, Id}}, Error);
557+
({{node, Subj}, {expires_soon, UTCSeconds}}) ->
558+
Host = misc:extract_node_address(node()),
559+
Date = menelaus_web_cert:format_time(UTCSeconds),
560+
Error = fmt_to_bin(errors(node_cert_expires_soon),
561+
[Host, Subj, Date]),
562+
global_alert({cert_expires_soon, {node, Host}}, Error)
563+
end, CAAlerts ++ LocalAlerts),
564+
488565
Opaque.
489566

490567
alert_if_time_out_of_sync({time_offset_status, true}) ->
@@ -683,12 +760,11 @@ add_proplist_list_elem(ListKey, Elem, PList) ->
683760
List = misc:expect_prop_value(ListKey, PList),
684761
misc:update_proplist(PList, [{ListKey, lists:usort([Elem | List])}]).
685762

686-
config_email_alerts_upgrade_to_70(EmailAlerts) ->
763+
upgrade_alerts(EmailAlerts, Mutations) ->
687764
Result =
688-
functools:chain(
689-
EmailAlerts,
690-
[add_proplist_list_elem(alerts, time_out_of_sync, _),
691-
add_proplist_kv(pop_up_alerts, alert_keys(), _)]),
765+
lists:foldl(
766+
fun (Mutation, Acc) -> Mutation(Acc) end,
767+
EmailAlerts, Mutations),
692768

693769
case misc:sort_kv_list(Result) =:= misc:sort_kv_list(EmailAlerts) of
694770
true ->

src/menelaus_web_cert.erl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,8 @@
2424
handle_get_node_certificate/2,
2525
handle_get_node_certificates/1,
2626
handle_client_cert_auth_settings/1,
27-
handle_client_cert_auth_settings_post/1]).
27+
handle_client_cert_auth_settings_post/1,
28+
format_time/1]).
2829

2930
-define(MAX_CLIENT_CERT_PREFIXES, ?get_param(max_prefixes, 10)).
3031

src/ns_online_config_upgrader.erl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,4 +103,5 @@ upgrade(?VERSION_70, Config) ->
103103

104104
upgrade(?VERSION_71, Config) ->
105105
{?VERSION_MORPHEUS,
106-
menelaus_web_auto_failover:config_upgrade_to_MORPHEUS(Config)}.
106+
menelaus_web_auto_failover:config_upgrade_to_MORPHEUS(Config) ++
107+
menelaus_web_alerts_srv:config_upgrade_to_MORPHEUS(Config)}.

src/ns_server_cert.erl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,8 @@
4040
trusted_CAs_pre_71/1,
4141
generate_node_certs/1,
4242
filter_nodes_by_ca/2,
43-
inbox_chain_path/0]).
43+
inbox_chain_path/0,
44+
expiration_warnings/1]).
4445

4546
inbox_ca_path() ->
4647
filename:join(path_config:component_path(data, "inbox"), "CA").

0 commit comments

Comments
 (0)