Skip to content

Commit b189c2f

Browse files
authored
TD-788: Adds woody pool/connections metrics (#107)
* Adds overrides for linter deps * Bumps woody w/ prometheus collectors * Bumps woody & scoper * Raises processing info log messages * Adds woody event severity mapping example to `sys.config`
1 parent df89aad commit b189c2f

File tree

11 files changed

+74
-30
lines changed

11 files changed

+74
-30
lines changed

apps/hellgate/src/hellgate.app.src

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
payproc_errors,
2222
erl_health,
2323
limiter_proto,
24+
prometheus,
25+
prometheus_cowboy,
2426
opentelemetry_api,
2527
opentelemetry_exporter,
2628
opentelemetry

apps/hellgate/src/hellgate.erl

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,8 +104,15 @@ get_prometheus_route() ->
104104

105105
-spec start(normal, any()) -> {ok, pid()} | {error, any()}.
106106
start(_StartType, _StartArgs) ->
107+
ok = setup_metrics(),
107108
supervisor:start_link(?MODULE, []).
108109

109110
-spec stop(any()) -> ok.
110111
stop(_State) ->
111112
ok.
113+
114+
%%
115+
116+
setup_metrics() ->
117+
ok = woody_ranch_prometheus_collector:setup(),
118+
ok = woody_hackney_prometheus_collector:setup().

apps/hellgate/src/hg_invoice_payment.erl

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -686,8 +686,8 @@ validate_recurrent_terms(RecurrentTerms, PaymentTool) ->
686686
_ =
687687
case hg_payment_tool:has_any_payment_method(PaymentTool, PMs) of
688688
false ->
689-
logger:info("PaymentTool: ~p", [PaymentTool]),
690-
logger:info("RecurrentPaymentMethods: ~p", [PMs]),
689+
logger:notice("PaymentTool: ~p", [PaymentTool]),
690+
logger:notice("RecurrentPaymentMethods: ~p", [PMs]),
691691
throw_invalid_request(<<"Invalid payment method">>);
692692
true ->
693693
ok
@@ -794,7 +794,7 @@ log_route_choice_meta(#{choice_meta := undefined}, _Revision) ->
794794
ok;
795795
log_route_choice_meta(#{choice_meta := ChoiceMeta}, Revision) ->
796796
Metadata = hg_routing:get_logger_metadata(ChoiceMeta, Revision),
797-
logger:log(info, "Routing decision made", #{routing => Metadata}).
797+
logger:log(notice, "Routing decision made", #{routing => Metadata}).
798798

799799
maybe_log_misconfigurations({misconfiguration, _} = Error) ->
800800
{Format, Details} = hg_routing:prepare_log_message(Error),
@@ -810,14 +810,14 @@ log_rejected_routes(all, Routes, VS) ->
810810
log_rejected_routes(limit_misconfiguration, Routes, _VS) ->
811811
?LOG_MD(warning, "Limiter hold error caused route candidates to be rejected: ~p", [Routes]);
812812
log_rejected_routes(limit_overflow, Routes, _VS) ->
813-
?LOG_MD(info, "Limit overflow caused route candidates to be rejected: ~p", [Routes]);
813+
?LOG_MD(notice, "Limit overflow caused route candidates to be rejected: ~p", [Routes]);
814814
log_rejected_routes(adapter_unavailable, Routes, _VS) ->
815-
?LOG_MD(info, "Adapter unavailability caused route candidates to be rejected: ~p", [Routes]);
815+
?LOG_MD(notice, "Adapter unavailability caused route candidates to be rejected: ~p", [Routes]);
816816
log_rejected_routes(provider_conversion_is_too_low, Routes, _VS) ->
817-
?LOG_MD(info, "Lacking conversion of provider caused route candidates to be rejected: ~p", [Routes]);
817+
?LOG_MD(notice, "Lacking conversion of provider caused route candidates to be rejected: ~p", [Routes]);
818818
log_rejected_routes(forbidden, Routes, VS) ->
819-
?LOG_MD(info, "Rejected routes found for varset: ~p", [VS]),
820-
?LOG_MD(info, "Rejected routes found, rejected routes: ~p", [Routes]);
819+
?LOG_MD(notice, "Rejected routes found for varset: ~p", [VS]),
820+
?LOG_MD(notice, "Rejected routes found, rejected routes: ~p", [Routes]);
821821
log_rejected_routes(_, _Routes, _VS) ->
822822
ok.
823823

@@ -1932,7 +1932,7 @@ process_risk_score(Action, St) ->
19321932
ok ->
19331933
{next, {Events, hg_machine_action:set_timeout(0, Action)}};
19341934
{error, risk_score_is_too_high = Reason} ->
1935-
logger:info("No route found, reason = ~p, varset: ~p", [Reason, VS1]),
1935+
logger:notice("No route found, reason = ~p, varset: ~p", [Reason, VS1]),
19361936
handle_choose_route_error(Reason, Events, St, Action)
19371937
end.
19381938

@@ -2030,15 +2030,15 @@ handle_choose_route_error(Error, Events, St, Action) ->
20302030

20312031
%% NOTE See damsel payproc errors (proto/payment_processing_errors.thrift) for no route found
20322032

2033-
construct_routing_failure({rejected_routes, {forbidden, RejectedRoutes}}) ->
2034-
construct_routing_failure([forbidden], genlib:format(RejectedRoutes));
20352033
construct_routing_failure({rejected_routes, {SubCode, RejectedRoutes}}) when
20362034
SubCode =:= limit_misconfiguration orelse
20372035
SubCode =:= limit_overflow orelse
20382036
SubCode =:= adapter_unavailable orelse
20392037
SubCode =:= provider_conversion_is_too_low
20402038
->
20412039
construct_routing_failure([rejected, SubCode], genlib:format(RejectedRoutes));
2040+
construct_routing_failure({rejected_routes, {_SubCode, RejectedRoutes}}) ->
2041+
construct_routing_failure([forbidden], genlib:format(RejectedRoutes));
20422042
construct_routing_failure({misconfiguration = Code, Details}) ->
20432043
construct_routing_failure([unknown, {unknown_error, atom_to_binary(Code)}], genlib:format(Details));
20442044
construct_routing_failure(Code = risk_score_is_too_high) ->
@@ -2336,7 +2336,7 @@ process_failure({payment, Step} = Activity, Events, Action, Failure, St, _Refund
23362336
Target = get_target(St),
23372337
case check_retry_possibility(Target, Failure, St) of
23382338
{retry, Timeout} ->
2339-
_ = logger:info("Retry session after transient failure, wait ~p", [Timeout]),
2339+
_ = logger:notice("Retry session after transient failure, wait ~p", [Timeout]),
23402340
{SessionEvents, SessionAction} = retry_session(Action, Target, Timeout),
23412341
{next, {Events ++ SessionEvents, SessionAction}};
23422342
fatal ->
@@ -3273,7 +3273,7 @@ log_cascade_attempt_context(
32733273
#domain_PaymentsServiceTerms{attempt_limit = AttemptLimit},
32743274
#st{routes = AttemptedRoutes}
32753275
) ->
3276-
?LOG_MD(info, "Cascade context: merchant payment terms' attempt limit '~p', attempted routes: ~p", [
3276+
?LOG_MD(notice, "Cascade context: merchant payment terms' attempt limit '~p', attempted routes: ~p", [
32773277
AttemptLimit, AttemptedRoutes
32783278
]).
32793279

apps/hellgate/src/hg_invoice_payment_refund.erl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -313,7 +313,7 @@ finish_session_processing({Events0, Action}, Session, Refund) ->
313313
{finished, ?session_failed(Failure)} ->
314314
case check_retry_possibility(Failure, Refund) of
315315
{retry, Timeout} ->
316-
_ = logger:info("Retry session after transient failure, wait ~p", [Timeout]),
316+
_ = logger:notice("Retry session after transient failure, wait ~p", [Timeout]),
317317
{SessionEvents, SessionAction} = retry_session(Action, Timeout),
318318
{next, {Events1 ++ SessionEvents, SessionAction}};
319319
fatal ->

apps/hellgate/src/hg_limiter.erl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ check_limits_([T | TurnoverLimits], Context, Acc) ->
8686
true ->
8787
check_limits_(TurnoverLimits, Context, [Limit | Acc]);
8888
false ->
89-
logger:info("Limit with id ~p overflowed, amount ~p upper boundary ~p", [
89+
logger:notice("Limit with id ~p overflowed, amount ~p upper boundary ~p", [
9090
LimitID,
9191
LimiterAmount,
9292
UpperBoundary

apps/hellgate/src/hg_machine.erl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -346,7 +346,7 @@ dispatch_repair(Ns, Payload, Machine) ->
346346
marshal_repair_result(ok, Result, Machine)
347347
catch
348348
throw:{exception, Reason} = Error ->
349-
logger:info("Process repair failed, ~p", [Reason]),
349+
logger:notice("Process repair failed, ~p", [Reason]),
350350
woody_error:raise(business, marshal_repair_failed(Error))
351351
end.
352352

apps/hellgate/src/hg_recurrent_paytool.erl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,7 @@ init(EncodedParams, #{id := RecPaymentToolID}) ->
253253
{ChosenRoute, ChoiceContext} = hg_routing:choose_route(NonFailRatedRoutes),
254254
ChosenPaymentRoute = hg_route:to_payment_route(ChosenRoute),
255255
LoggerMetadata = hg_routing:get_logger_metadata(ChoiceContext, Revision),
256-
_ = logger:log(info, "Routing decision made", #{routing => LoggerMetadata}),
256+
_ = logger:log(notice, "Routing decision made", #{routing => LoggerMetadata}),
257257
RecPaymentTool2 = set_minimal_payment_cost(RecPaymentTool, ChosenPaymentRoute, VS, Revision),
258258
{ok, {Changes, Action}} = start_session(),
259259
StartChanges = [
@@ -376,7 +376,7 @@ validate_risk_score(RiskScore) when RiskScore == low; RiskScore == high ->
376376
RiskScore.
377377

378378
handle_route_error(risk_score_is_too_high = Reason, RecPaymentTool) ->
379-
_ = logger:log(info, "No route found, reason = ~p", [Reason], logger:get_process_metadata()),
379+
_ = logger:log(notice, "No route found, reason = ~p", [Reason], logger:get_process_metadata()),
380380
{misconfiguration, {'No route found for a recurrent payment tool', RecPaymentTool}}.
381381
handle_route_error({no_route_found, {Reason, RejectedRoutes}}, RecPaymentTool, Varset) ->
382382
LogFun = fun(Msg, Param) ->

apps/hellgate/src/hg_routing.erl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -483,6 +483,7 @@ get_provider_status(Route, FDStats) ->
483483
AvailabilityStatus = get_adapter_availability_status(FdOverrides, AvailabilityServiceID, FDStats),
484484
ConversionStatus = get_provider_conversion_status(FdOverrides, ConversionServiceID, FDStats),
485485
{AvailabilityStatus, ConversionStatus}.
486+
486487
get_adapter_availability_status(#domain_RouteFaultDetectorOverrides{enabled = true}, _FDID, _Stats) ->
487488
%% ignore fd statistic if set override
488489
{alive, 0.0};

config/sys.config

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,15 @@
4646
event_handler_opts => #{
4747
formatter_opts => #{
4848
max_length => 1000
49+
},
50+
events_severity => #{
51+
%% Maybe disregard those events by lowering their severity level
52+
['call service'] => info,
53+
['service result'] => info,
54+
['invoke service handler'] => info,
55+
['service handler result'] => info,
56+
['service handler result', error, business] => warning,
57+
['client cache hit'] => info
4958
}
5059
}
5160
}},
@@ -126,6 +135,10 @@
126135
event_handler_opts => #{
127136
formatter_opts => #{
128137
max_length => 1000
138+
},
139+
events_severity => #{
140+
%% Was 'info'
141+
['service handler result', error, business] => warning
129142
}
130143
}
131144
}}
@@ -150,6 +163,10 @@
150163
event_handler_opts => #{
151164
formatter_opts => #{
152165
max_length => 1000
166+
},
167+
events_severity => #{
168+
%% Was 'info'
169+
['service handler result', error, business] => warning
153170
}
154171
}
155172
}}

rebar.config

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@
4040
{erl_health, {git, "https://github.com/valitydev/erlang-health.git", {branch, "master"}}},
4141
{fault_detector_proto, {git, "https://github.com/valitydev/fault-detector-proto.git", {branch, "master"}}},
4242
{limiter_proto, {git, "https://github.com/valitydev/limiter-proto.git", {branch, "master"}}},
43+
{prometheus, "4.8.1"},
44+
{prometheus_cowboy, "0.1.8"},
4345

4446
%% OpenTelemetry deps
4547
{opentelemetry_api, "1.2.1"},
@@ -72,12 +74,6 @@
7274
{profiles, [
7375
{prod, [
7476
{deps, [
75-
%% NOTE
76-
%% Because of a dependency conflict, prometheus libs are only included in production build for now
77-
%% https://github.com/project-fifo/rebar3_lint/issues/42
78-
%% https://github.com/valitydev/hellgate/pull/2/commits/884724c1799703cee4d1033850fe32c17f986d9e
79-
{prometheus, "4.8.1"},
80-
{prometheus_cowboy, "0.1.8"},
8177
% for introspection on production
8278
{recon, "2.5.2"},
8379
{logger_logstash_formatter,
@@ -92,8 +88,6 @@
9288
{tools, load},
9389
{opentelemetry, temporary},
9490
logger_logstash_formatter,
95-
prometheus,
96-
prometheus_cowboy,
9791
sasl,
9892
hellgate
9993
]},
@@ -111,7 +105,7 @@
111105
]}
112106
]}.
113107

114-
{plugins, [
108+
{project_plugins, [
115109
{covertool, "2.0.4"},
116110
{erlfmt, "1.0.0"},
117111
{rebar3_lint, "1.0.1"},
@@ -136,3 +130,11 @@
136130
"ct.coverdata"
137131
]}
138132
]}.
133+
134+
%% NOTE
135+
%% It is needed to use rebar3 lint plugin
136+
{overrides, [
137+
{del, accept, [{plugins, [{rebar3_archive_plugin, "0.0.2"}]}]},
138+
{del, prometheus_cowboy, [{plugins, [{rebar3_archive_plugin, "0.0.1"}]}]},
139+
{del, prometheus_httpd, [{plugins, [{rebar3_archive_plugin, "0.0.1"}]}]}
140+
]}.

0 commit comments

Comments
 (0)