@@ -121,7 +121,15 @@ errors(node_cert_expired) ->
121121errors (ca_expires_soon ) ->
122122 " Trusted CA certificate with ID=~b (subject: '~s ') will expire at ~s ." ;
123123errors (ca_expired ) ->
124- " Trusted CA certificate with ID=~b (subject: '~s ') has expired." .
124+ " Trusted CA certificate with ID=~b (subject: '~s ') has expired." ;
125+ errors (client_xdcr_cert_expires_soon ) ->
126+ " Client XDCR certificate (subject: '~s ') will expire at ~s ." ;
127+ errors (client_xdcr_cert_expired ) ->
128+ " Client XDCR certificate (subject: '~s ') has expired." ;
129+ errors (xdcr_ca_expires_soon ) ->
130+ " XDCR CA certificate (subject: '~s ') will expire at ~s ." ;
131+ errors (xdcr_ca_expired ) ->
132+ " XDCR CA certificate (subject: '~s ') has expired." .
125133
126134% % ------------------------------------------------------------------
127135% % API Function Definitions
@@ -311,7 +319,7 @@ start_timer() ->
311319global_checks () ->
312320 [oom , ip , write_fail , overhead , disk , audit_write_fail ,
313321 indexer_ram_max_usage , cas_drift_threshold , communication_issue ,
314- time_out_of_sync , disk_usage_analyzer_stuck , certs ].
322+ time_out_of_sync , disk_usage_analyzer_stuck , certs , xdcr_certs ].
315323
316324% % @doc fires off various checks
317325check_alerts (Opaque , Hist , Stats ) ->
@@ -522,7 +530,8 @@ check(certs, Opaque, _History, _Stats) ->
522530 true ->
523531 lists :flatmap (
524532 fun (CAProps ) ->
525- ExpWarnings = ns_server_cert :expiration_warnings (CAProps ),
533+ {_ , ExpWarnings } =
534+ ns_server_cert :expiration_warnings (CAProps ),
526535 Subject = proplists :get_value (subject , CAProps ),
527536 Id = proplists :get_value (id , CAProps ),
528537 [{{ca , Id , Subject }, W } || W <- ExpWarnings ]
@@ -535,11 +544,12 @@ check(certs, Opaque, _History, _Stats) ->
535544 case ns_config :read_key_fast ({node , node (), node_cert }, undefined ) of
536545 undefined -> [];
537546 Props ->
547+ {_ , ExpWarnings } = ns_server_cert :expiration_warnings (Props ),
538548 lists :map (
539549 fun (W ) ->
540550 Subject = proplists :get_value (subject , Props ),
541551 {{node , Subject }, W }
542- end , ns_server_cert : expiration_warnings ( Props ) )
552+ end , ExpWarnings )
543553 end ,
544554
545555 lists :foreach (
@@ -562,7 +572,94 @@ check(certs, Opaque, _History, _Stats) ->
562572 global_alert ({cert_expires_soon , {node , Host }}, Error )
563573 end , CAAlerts ++ LocalAlerts ),
564574
565- Opaque .
575+ Opaque ;
576+
577+ check (xdcr_certs , Opaque , _History , _Stats ) ->
578+ case mb_master :master_node () == node () of
579+ true -> check_xdcr_certs (Opaque );
580+ false -> Opaque
581+ end .
582+
583+ check_xdcr_certs (Opaque ) ->
584+ try goxdcr_rest :get_certificates () of
585+ Info ->
586+ WarningDays = ns_config :read_key_fast (
587+ {cert , expiration_warning_days }, undefined ),
588+ Hash = erlang :phash2 ({Info , WarningDays }),
589+ Now = calendar :datetime_to_gregorian_seconds (
590+ calendar :universal_time ()),
591+
592+ {NewOpaque , AlertsList } =
593+ case dict :find (xdcr_certs_check , Opaque ) of
594+ {ok , #{hash := Hash ,
595+ retry_time := RetryTime ,
596+ result := Res }} when Now < RetryTime ->
597+ {Opaque , Res };
598+ _ ->
599+ {RetryTime , Alerts } = calculate_xdcr_cert_alerts (Info ),
600+ {dict :store (xdcr_certs_check ,
601+ #{hash => Hash ,
602+ retry_time => RetryTime ,
603+ result => Alerts },
604+ Opaque ), Alerts }
605+ end ,
606+
607+ lists :foreach (
608+ fun ({{ca , Subj }, expired }) ->
609+ Error = fmt_to_bin (errors (xdcr_ca_expired ), [Subj ]),
610+ global_alert ({cert_expired , {xdcr , Subj }}, Error );
611+ ({{client_cert , Subj }, expired }) ->
612+ Error = fmt_to_bin (errors (client_xdcr_cert_expired ),
613+ [Subj ]),
614+ global_alert ({cert_expired , {client_xdcr , Subj }}, Error );
615+ ({{ca , Subj }, {expires_soon , UTCSeconds }}) ->
616+ Date = menelaus_web_cert :format_time (UTCSeconds ),
617+ Error = fmt_to_bin (errors (xdcr_ca_expires_soon ),
618+ [Subj , Date ]),
619+ global_alert ({cert_expires_soon , {xdcr , Subj }}, Error );
620+ ({{client_cert , Subj }, {expires_soon , UTCSeconds }}) ->
621+ Date = menelaus_web_cert :format_time (UTCSeconds ),
622+ Error = fmt_to_bin (errors (client_xdcr_cert_expires_soon ),
623+ [Subj , Date ]),
624+ global_alert ({cert_expires_soon , {client_xdcr , Subj }},
625+ Error )
626+ end , AlertsList ),
627+
628+ NewOpaque
629+
630+ catch
631+ C :E :ST ->
632+ ? log_error (" Failed to extract certs info from xdcr: ~p :~p~n~p " ,
633+ [C , E , ST ]),
634+ Opaque
635+ end .
636+
637+ calculate_xdcr_cert_alerts (#{trusted_certs := TrustedCerts ,
638+ client_certs := ClientCerts }) ->
639+ AlertsFun =
640+ fun (Certs , Type ) ->
641+ lists :foldl (
642+ fun (C , {WarningsAcc , RecheckAcc }) ->
643+ case ns_server_cert :decode_single_certificate (C ) of
644+ {ok , D } ->
645+ Props = ns_server_cert :cert_props (D ),
646+ {RecheckTime , Warnings } =
647+ ns_server_cert :expiration_warnings (Props ),
648+ Subject = proplists :get_value (subject , Props ),
649+ {[{{Type , Subject }, W } || W <- Warnings ] ++
650+ WarningsAcc ,
651+ min (RecheckTime , RecheckAcc )};
652+ {error , E } ->
653+ ? log_debug (" Failed to decode xdcr cert: ~p " ,
654+ [E ]),
655+ {WarningsAcc , RecheckAcc }
656+ end
657+ end , {[], infinity }, Certs )
658+ end ,
659+
660+ {CAAlerts , CARecheckTime } = AlertsFun (TrustedCerts , ca ),
661+ {ClientAlerts , ClientRecheckTime } = AlertsFun (ClientCerts , client_cert ),
662+ {min (CARecheckTime , ClientRecheckTime ), CAAlerts ++ ClientAlerts }.
566663
567664alert_if_time_out_of_sync ({time_offset_status , true }) ->
568665 Err = fmt_to_bin (errors (time_out_of_sync ), [node ()]),
0 commit comments