Skip to content

Commit acb26a5

Browse files
MB-50306: Add cert expiration alerts for xdcr certs
Change-Id: Ia5757c34d6681383d992baff93e625867547b472 Reviewed-on: https://review.couchbase.org/c/ns_server/+/169188 Well-Formed: Build Bot <[email protected]> Reviewed-by: Steve Watanabe <[email protected]> Tested-by: Build Bot <[email protected]> Tested-by: Timofey Barmin <[email protected]>
1 parent b300319 commit acb26a5

File tree

3 files changed

+134
-13
lines changed

3 files changed

+134
-13
lines changed

src/goxdcr_rest.erl

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@
1818
find_all_replication_docs/0,
1919
all_local_replication_infos/0,
2020
stats/1,
21-
get_replications_with_remote_info/0]).
21+
get_replications_with_remote_info/0,
22+
get_certificates/0]).
2223

2324
convert_header_name(Header) when is_atom(Header) ->
2425
atom_to_list(Header);
@@ -234,3 +235,20 @@ get_replications_with_remote_info() ->
234235
ClusterName = proplists:get_value(RemoteClusterUUID, RemoteClusters, <<"unknown">>),
235236
[{Id, BucketName, binary_to_list(ClusterName), RemoteBucket} | Acc]
236237
end, [], find_all_replication_docs()).
238+
239+
get_certificates() ->
240+
get_from_goxdcr(
241+
fun (Json) ->
242+
Extract = fun (What) ->
243+
lists:flatmap(
244+
fun ({Cluster}) ->
245+
case proplists:get_value(What, Cluster) of
246+
undefined -> [];
247+
B -> ns_server_cert:split_certs(B)
248+
end
249+
end, Json)
250+
end,
251+
TrustedCerts = Extract(<<"certificate">>),
252+
ClientCerts = Extract(<<"clientCertificate">>),
253+
#{trusted_certs => TrustedCerts, client_certs => ClientCerts}
254+
end, "/pools/default/remoteClusters", 30000).

src/menelaus_web_alerts_srv.erl

Lines changed: 102 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,15 @@ errors(node_cert_expired) ->
121121
errors(ca_expires_soon) ->
122122
"Trusted CA certificate with ID=~b (subject: '~s') will expire at ~s.";
123123
errors(ca_expired) ->
124-
"Trusted CA certificate with ID=~b (subject: '~s') has expired.".
124+
"Trusted CA certificate with ID=~b (subject: '~s') has expired.";
125+
errors(client_xdcr_cert_expires_soon) ->
126+
"Client XDCR certificate (subject: '~s') will expire at ~s.";
127+
errors(client_xdcr_cert_expired) ->
128+
"Client XDCR certificate (subject: '~s') has expired.";
129+
errors(xdcr_ca_expires_soon) ->
130+
"XDCR CA certificate (subject: '~s') will expire at ~s.";
131+
errors(xdcr_ca_expired) ->
132+
"XDCR CA certificate (subject: '~s') has expired.".
125133

126134
%% ------------------------------------------------------------------
127135
%% API Function Definitions
@@ -311,7 +319,7 @@ start_timer() ->
311319
global_checks() ->
312320
[oom, ip, write_fail, overhead, disk, audit_write_fail,
313321
indexer_ram_max_usage, cas_drift_threshold, communication_issue,
314-
time_out_of_sync, disk_usage_analyzer_stuck, certs].
322+
time_out_of_sync, disk_usage_analyzer_stuck, certs, xdcr_certs].
315323

316324
%% @doc fires off various checks
317325
check_alerts(Opaque, Hist, Stats) ->
@@ -522,7 +530,8 @@ check(certs, Opaque, _History, _Stats) ->
522530
true ->
523531
lists:flatmap(
524532
fun (CAProps) ->
525-
ExpWarnings = ns_server_cert:expiration_warnings(CAProps),
533+
{_, ExpWarnings} =
534+
ns_server_cert:expiration_warnings(CAProps),
526535
Subject = proplists:get_value(subject, CAProps),
527536
Id = proplists:get_value(id, CAProps),
528537
[{{ca, Id, Subject}, W} || W <- ExpWarnings]
@@ -535,11 +544,12 @@ check(certs, Opaque, _History, _Stats) ->
535544
case ns_config:read_key_fast({node, node(), node_cert}, undefined) of
536545
undefined -> [];
537546
Props ->
547+
{_, ExpWarnings} = ns_server_cert:expiration_warnings(Props),
538548
lists:map(
539549
fun (W) ->
540550
Subject = proplists:get_value(subject, Props),
541551
{{node, Subject}, W}
542-
end, ns_server_cert:expiration_warnings(Props))
552+
end, ExpWarnings)
543553
end,
544554

545555
lists:foreach(
@@ -562,7 +572,94 @@ check(certs, Opaque, _History, _Stats) ->
562572
global_alert({cert_expires_soon, {node, Host}}, Error)
563573
end, CAAlerts ++ LocalAlerts),
564574

565-
Opaque.
575+
Opaque;
576+
577+
check(xdcr_certs, Opaque, _History, _Stats) ->
578+
case mb_master:master_node() == node() of
579+
true -> check_xdcr_certs(Opaque);
580+
false -> Opaque
581+
end.
582+
583+
check_xdcr_certs(Opaque) ->
584+
try goxdcr_rest:get_certificates() of
585+
Info ->
586+
WarningDays = ns_config:read_key_fast(
587+
{cert, expiration_warning_days}, undefined),
588+
Hash = erlang:phash2({Info, WarningDays}),
589+
Now = calendar:datetime_to_gregorian_seconds(
590+
calendar:universal_time()),
591+
592+
{NewOpaque, AlertsList} =
593+
case dict:find(xdcr_certs_check, Opaque) of
594+
{ok, #{hash := Hash,
595+
retry_time := RetryTime,
596+
result := Res}} when Now < RetryTime ->
597+
{Opaque, Res};
598+
_ ->
599+
{RetryTime, Alerts} = calculate_xdcr_cert_alerts(Info),
600+
{dict:store(xdcr_certs_check,
601+
#{hash => Hash,
602+
retry_time => RetryTime,
603+
result => Alerts},
604+
Opaque), Alerts}
605+
end,
606+
607+
lists:foreach(
608+
fun ({{ca, Subj}, expired}) ->
609+
Error = fmt_to_bin(errors(xdcr_ca_expired), [Subj]),
610+
global_alert({cert_expired, {xdcr, Subj}}, Error);
611+
({{client_cert, Subj}, expired}) ->
612+
Error = fmt_to_bin(errors(client_xdcr_cert_expired),
613+
[Subj]),
614+
global_alert({cert_expired, {client_xdcr, Subj}}, Error);
615+
({{ca, Subj}, {expires_soon, UTCSeconds}}) ->
616+
Date = menelaus_web_cert:format_time(UTCSeconds),
617+
Error = fmt_to_bin(errors(xdcr_ca_expires_soon),
618+
[Subj, Date]),
619+
global_alert({cert_expires_soon, {xdcr, Subj}}, Error);
620+
({{client_cert, Subj}, {expires_soon, UTCSeconds}}) ->
621+
Date = menelaus_web_cert:format_time(UTCSeconds),
622+
Error = fmt_to_bin(errors(client_xdcr_cert_expires_soon),
623+
[Subj, Date]),
624+
global_alert({cert_expires_soon, {client_xdcr, Subj}},
625+
Error)
626+
end, AlertsList),
627+
628+
NewOpaque
629+
630+
catch
631+
C:E:ST ->
632+
?log_error("Failed to extract certs info from xdcr: ~p:~p~n~p",
633+
[C, E, ST]),
634+
Opaque
635+
end.
636+
637+
calculate_xdcr_cert_alerts(#{trusted_certs := TrustedCerts,
638+
client_certs := ClientCerts}) ->
639+
AlertsFun =
640+
fun (Certs, Type) ->
641+
lists:foldl(
642+
fun (C, {WarningsAcc, RecheckAcc}) ->
643+
case ns_server_cert:decode_single_certificate(C) of
644+
{ok, D} ->
645+
Props = ns_server_cert:cert_props(D),
646+
{RecheckTime, Warnings} =
647+
ns_server_cert:expiration_warnings(Props),
648+
Subject = proplists:get_value(subject, Props),
649+
{[{{Type, Subject}, W} || W <- Warnings] ++
650+
WarningsAcc,
651+
min(RecheckTime, RecheckAcc)};
652+
{error, E} ->
653+
?log_debug("Failed to decode xdcr cert: ~p",
654+
[E]),
655+
{WarningsAcc, RecheckAcc}
656+
end
657+
end, {[], infinity}, Certs)
658+
end,
659+
660+
{CAAlerts, CARecheckTime} = AlertsFun(TrustedCerts, ca),
661+
{ClientAlerts, ClientRecheckTime} = AlertsFun(ClientCerts, client_cert),
662+
{min(CARecheckTime, ClientRecheckTime), CAAlerts ++ ClientAlerts}.
566663

567664
alert_if_time_out_of_sync({time_offset_status, true}) ->
568665
Err = fmt_to_bin(errors(time_out_of_sync), [node()]),

src/ns_server_cert.erl

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,9 @@
4141
generate_node_certs/1,
4242
filter_nodes_by_ca/2,
4343
inbox_chain_path/0,
44-
expiration_warnings/1]).
44+
expiration_warnings/1,
45+
split_certs/1,
46+
cert_props/1]).
4547

4648
inbox_ca_path() ->
4749
filename:join(path_config:component_path(data, "inbox"), "CA").
@@ -1129,6 +1131,9 @@ read_ca_file(Path) ->
11291131
{error, {read, Reason}}
11301132
end.
11311133

1134+
cert_props(DerCert) ->
1135+
cert_props(undefined, DerCert, []).
1136+
11321137
cert_props(Type, DerCert, Extras) when is_binary(DerCert) ->
11331138
{Sub, NotBefore, NotAfter} = get_der_info(DerCert),
11341139
[{subject, iolist_to_binary(Sub)},
@@ -1174,7 +1179,7 @@ get_warnings() ->
11741179
generated -> [self_signed];
11751180
_ -> []
11761181
end,
1177-
ExpWarnings = expiration_warnings(CAProps),
1182+
{_, ExpWarnings} = expiration_warnings(CAProps),
11781183
Id = proplists:get_value(id, CAProps),
11791184
UnusedWarnings =
11801185
case proplists:get_value(type, CAProps) of
@@ -1194,17 +1199,18 @@ get_warnings() ->
11941199
expiration_warnings(CertProps) ->
11951200
Now = calendar:datetime_to_gregorian_seconds(calendar:universal_time()),
11961201
WarningDays = ns_config:read_key_fast({cert, expiration_warning_days}, 7),
1197-
WarningThreshold = Now + WarningDays * 24 * 60 * 60,
1202+
WarningSeconds = WarningDays * 24 * 60 * 60,
1203+
WarningThreshold = Now + WarningSeconds,
11981204

11991205
Expire = proplists:get_value(expires, CertProps), %% For pre-7.1 only
12001206
NotAfter = proplists:get_value(not_after, CertProps, Expire),
12011207
case NotAfter of
12021208
A when is_integer(A) andalso A =< Now ->
1203-
[expired];
1209+
{infinity, [expired]};
12041210
A when is_integer(A) andalso A =< WarningThreshold ->
1205-
[{expires_soon, A}];
1211+
{NotAfter, [{expires_soon, A}]};
12061212
_ ->
1207-
[]
1213+
{NotAfter - WarningSeconds, []}
12081214
end.
12091215

12101216
is_trusted(CAPem, TrustedCAs) ->
@@ -1237,7 +1243,7 @@ node_cert_warnings(TrustedCAs, NodeCertProps) ->
12371243
end
12381244
end,
12391245

1240-
ExpirationWarnings = expiration_warnings(NodeCertProps),
1246+
{_, ExpirationWarnings} = expiration_warnings(NodeCertProps),
12411247

12421248
SelfSignedWarnings =
12431249
case proplists:get_value(type, NodeCertProps) of

0 commit comments

Comments
 (0)