Skip to content

Commit 132cdf0

Browse files
authored
lotus metrics (#533)
Signed-off-by: turuslan <[email protected]>
1 parent b285318 commit 132cdf0

File tree

11 files changed

+279
-6
lines changed

11 files changed

+279
-6
lines changed

cmake/dependencies.cmake

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,3 +71,6 @@ find_package(RapidJSON CONFIG REQUIRED)
7171
# https://thalhammer.it/projects/jwt_cpp
7272
hunter_add_package(jwt-cpp)
7373
find_package(jwt-cpp CONFIG REQUIRED)
74+
75+
hunter_add_package(prometheus-cpp)
76+
find_package(prometheus-cpp CONFIG REQUIRED)

core/common/CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,11 @@ target_link_libraries(outcome
3333
p2p::p2p
3434
)
3535

36+
add_library(prometheus INTERFACE)
37+
target_link_libraries(prometheus INTERFACE
38+
prometheus-cpp::core
39+
)
40+
3641
add_library(logger
3742
logger.cpp
3843
)

core/common/prometheus/metrics.hpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
/**
2+
* Copyright Soramitsu Co., Ltd. All Rights Reserved.
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
#pragma once
7+
8+
#include <prometheus/histogram.h>
9+
#include <prometheus/registry.h>
10+
11+
namespace fc {
12+
inline auto &prometheusRegistry() {
13+
static prometheus::Registry x;
14+
return x;
15+
}
16+
17+
constexpr std::initializer_list<double> kDefaultPrometheusMsBuckets{
18+
0.01, 0.05, 0.1, 0.3, 0.6, 0.8, 1, 2, 3, 4, 5,
19+
6, 8, 10, 13, 16, 20, 25, 30, 40, 50, 65,
20+
80, 100, 130, 160, 200, 250, 300, 400, 500, 650, 800,
21+
1000, 2000, 3000, 4000, 5000, 7500, 10000, 20000, 50000, 100000,
22+
};
23+
} // namespace fc

core/common/prometheus/rpc.hpp

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
/**
2+
* Copyright Soramitsu Co., Ltd. All Rights Reserved.
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
#pragma once
7+
8+
#include "api/rpc/rpc.hpp"
9+
#include "common/prometheus/metrics.hpp"
10+
#include "common/prometheus/since.hpp"
11+
12+
namespace fc::api::rpc {
13+
inline auto &metricApiTime() {
14+
static auto &x{prometheus::BuildHistogram()
15+
.Name("lotus_api_request_duration_ms")
16+
.Help("Duration of API requests")
17+
.Register(prometheusRegistry())};
18+
return x;
19+
}
20+
21+
inline Method metricApiTime(std::string name, Method f) {
22+
return [name{std::move(name)}, f{std::move(f)}](
23+
const Value &value,
24+
Respond respond,
25+
MakeChan make_chan,
26+
Send send,
27+
const Permissions &permissions) {
28+
f(
29+
value,
30+
[name{std::move(name)}, respond{std::move(respond)}, since{Since{}}](
31+
auto &&value) {
32+
const auto time{since.ms()};
33+
metricApiTime()
34+
.Add({{"endpoint", name}}, kDefaultPrometheusMsBuckets)
35+
.Observe(time);
36+
respond(std::move(value));
37+
},
38+
std::move(make_chan),
39+
std::move(send),
40+
permissions);
41+
};
42+
}
43+
44+
inline void metricApiTime(Rpc &rpc) {
45+
for (auto &[name, value] : rpc.ms) {
46+
value = metricApiTime(name, value);
47+
}
48+
}
49+
} // namespace fc::api::rpc

core/common/prometheus/since.hpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
/**
2+
* Copyright Soramitsu Co., Ltd. All Rights Reserved.
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
#pragma once
7+
8+
#include <chrono>
9+
10+
namespace fc {
11+
struct Since {
12+
using Clock = std::chrono::steady_clock;
13+
14+
Clock::time_point start{Clock::now()};
15+
16+
template <typename T = double>
17+
T ms() const {
18+
return std::chrono::duration_cast<std::chrono::duration<T, std::milli>>(
19+
Clock::now() - start)
20+
.count();
21+
}
22+
};
23+
} // namespace fc

core/node/main/main.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "common/libp2p/peer/peer_info_helper.hpp"
1818
#include "common/libp2p/soralog.hpp"
1919
#include "common/logger.hpp"
20+
#include "common/prometheus/rpc.hpp"
2021
#include "drand/impl/http.hpp"
2122
#include "markets/storage/types.hpp"
2223
#include "node/blocksync_server.hpp"
@@ -209,6 +210,9 @@ namespace fc {
209210
*node_objects.api,
210211
std::bind(node_objects.api->AuthVerify, std::placeholders::_1))};
211212

213+
metricApiTime(*rpc_v1);
214+
metricApiTime(*rpc);
215+
212216
std::map<std::string, std::shared_ptr<api::Rpc>> rpcs;
213217
rpcs.emplace("/rpc/v0", rpc_v1);
214218
rpcs.emplace("/rpc/v1", rpc);

core/node/main/metrics.hpp

Lines changed: 32 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,15 @@
55

66
#pragma once
77

8+
#include <prometheus/text_serializer.h>
89
#include <libp2p/common/metrics/instance_count.hpp>
910
#include <sstream>
1011

1112
#include "clock/chain_epoch_clock.hpp"
1213
#include "clock/utc_clock.hpp"
1314
#include "common/fd_usage.hpp"
1415
#include "common/memory_usage.hpp"
16+
#include "common/prometheus/metrics.hpp"
1517
#include "node/events.hpp"
1618
#include "node/main/builder.hpp"
1719
#include "node/sync_job.hpp"
@@ -27,9 +29,19 @@ namespace fc::node {
2729
}
2830

2931
std::string prometheus() const {
30-
std::stringstream ss;
31-
auto metric{[&](auto &&name, auto &&value) {
32-
ss << name << ' ' << value << std::endl;
32+
auto families{prometheusRegistry().Collect()};
33+
using ::prometheus::MetricType;
34+
auto manual{[&](MetricType type,
35+
std::string name,
36+
std::string help) -> ::prometheus::ClientMetric & {
37+
auto &family{families.emplace_back()};
38+
family.name = std::move(name);
39+
family.help = std::move(help);
40+
family.type = type;
41+
return family.metric.emplace_back();
42+
}};
43+
auto metric{[&](std::string name, double value) {
44+
manual(MetricType::Untyped, std::move(name), "").untyped.value = value;
3345
}};
3446

3547
metric("uptime",
@@ -52,8 +64,9 @@ namespace fc::node {
5264
metric("height_attached",
5365
std::max(height_head, o.sync_job->metricAttachedHeight()));
5466
metric("height_known", std::max(height_head, height_known.load()));
55-
metric("height_expected",
56-
o.chain_epoch_clock->epochAtTime(o.utc_clock->nowUTC()).value());
67+
const auto height_expected{
68+
o.chain_epoch_clock->epochAtTime(o.utc_clock->nowUTC()).value()};
69+
metric("height_expected", height_expected);
5770

5871
auto car{[&](auto _size, auto _count, auto _tmp, auto &ipld) {
5972
uint64_t size{};
@@ -85,7 +98,20 @@ namespace fc::node {
8598
}
8699
instances_lock.unlock();
87100

88-
return ss.str();
101+
manual(MetricType::Gauge,
102+
"lotus_chain_node_height",
103+
"Current Height of the node")
104+
.gauge.value = height_head;
105+
manual(MetricType::Gauge,
106+
"lotus_chain_node_height_expected",
107+
"Expected Height of the node")
108+
.gauge.value = height_expected;
109+
manual(MetricType::Gauge,
110+
"lotus_chain_node_worker_height",
111+
"Height of workers on the node")
112+
.gauge.value = height_head;
113+
114+
return ::prometheus::TextSerializer{}.Serialize(families);
89115
}
90116

91117
const NodeObjects &o;

core/node/pubsub_gate.cpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
#include "codec/cbor/cbor_codec.hpp"
99
#include "common/logger.hpp"
10+
#include "common/prometheus/metrics.hpp"
1011
#include "primitives/block/block.hpp"
1112
#include "primitives/cid/cid_of_cbor.hpp"
1213

@@ -85,6 +86,14 @@ namespace fc::sync {
8586
if (!gossip_->publish(blocks_topic_, buffer)) {
8687
log()->warn("cannot publish block");
8788
}
89+
90+
static auto &metric{prometheus::BuildCounter()
91+
.Name("lotus_block_published")
92+
.Help("Counter for total locally published blocks")
93+
.Register(prometheusRegistry())
94+
.Add({})};
95+
metric.Increment();
96+
8897
return outcome::success();
8998
}
9099

@@ -93,6 +102,14 @@ namespace fc::sync {
93102
Bytes{codec::cbor::encode(msg).value()})) {
94103
log()->warn("cannot publish message");
95104
}
105+
106+
static auto &metric{
107+
prometheus::BuildCounter()
108+
.Name("lotus_message_published")
109+
.Help("Counter for total locally published messages")
110+
.Register(prometheusRegistry())
111+
.Add({})};
112+
metric.Increment();
96113
}
97114

98115
bool PubSubGate::onBlock(const PeerId &from, const Bytes &raw) {
@@ -102,6 +119,13 @@ namespace fc::sync {
102119

103120
// TODO validate
104121

122+
static auto &metric{prometheus::BuildCounter()
123+
.Name("lotus_block_received")
124+
.Help("Counter for total received blocks")
125+
.Register(prometheusRegistry())
126+
.Add({})};
127+
metric.Increment();
128+
105129
events_->signalBlockFromPubSub(
106130
events::BlockFromPubSub{from, CbCid::hash(cbor), std::move(bm)});
107131

@@ -129,6 +153,13 @@ namespace fc::sync {
129153
} else {
130154
auto res = codec::cbor::decode<primitives::block::SignedMessage>(raw);
131155
if (res) {
156+
static auto &metric{prometheus::BuildCounter()
157+
.Name("lotus_message_received")
158+
.Help("Counter for total received messages")
159+
.Register(prometheusRegistry())
160+
.Add({})};
161+
metric.Increment();
162+
132163
events_->signalMessageFromPubSub(events::MessageFromPubSub{
133164
from,
134165
std::move(cid_res.value()),

core/vm/interpreter/impl/interpreter_impl.cpp

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77

88
#include <utility>
99

10+
#include "common/prometheus/metrics.hpp"
11+
#include "common/prometheus/since.hpp"
1012
#include "const.hpp"
1113
#include "primitives/tipset/load.hpp"
1214
#include "vm/actor/builtin/v0/cron/cron_actor.hpp"
@@ -70,6 +72,61 @@ namespace fc::vm::interpreter {
7072
std::vector<MessageReceipt> *all_receipts) const {
7173
const auto &ipld{env_context_.ipld};
7274

75+
static auto &metricFailure{
76+
prometheus::BuildCounter()
77+
.Name("lotus_block_failure")
78+
.Help("Counter for block validation failures")
79+
.Register(prometheusRegistry())
80+
.Add({})};
81+
static auto &metricSuccess{
82+
prometheus::BuildCounter()
83+
.Name("lotus_block_success")
84+
.Help("Counter for block validation successes")
85+
.Register(prometheusRegistry())
86+
.Add({})};
87+
static auto &metricTotal{prometheus::BuildHistogram()
88+
.Name("lotus_vm_applyblocks_total_ms")
89+
.Help("Time spent applying block state")
90+
.Register(prometheusRegistry())
91+
.Add({}, kDefaultPrometheusMsBuckets)};
92+
static auto &metricMessages{prometheus::BuildHistogram()
93+
.Name("lotus_vm_applyblocks_messages")
94+
.Help("Time spent applying block messages")
95+
.Register(prometheusRegistry())
96+
.Add({}, kDefaultPrometheusMsBuckets)};
97+
static auto &metricEarly{
98+
prometheus::BuildHistogram()
99+
.Name("lotus_vm_applyblocks_early")
100+
.Help("Time spent in early apply-blocks (null cron, upgrades)")
101+
.Register(prometheusRegistry())
102+
.Add({}, kDefaultPrometheusMsBuckets)};
103+
static auto &metricCron{prometheus::BuildHistogram()
104+
.Name("lotus_vm_applyblocks_cron")
105+
.Help("Time spent in cron")
106+
.Register(prometheusRegistry())
107+
.Add({}, kDefaultPrometheusMsBuckets)};
108+
static auto &metricFlush{prometheus::BuildHistogram()
109+
.Name("lotus_vm_applyblocks_flush")
110+
.Help("Time spent flushing vm state")
111+
.Register(prometheusRegistry())
112+
.Add({}, kDefaultPrometheusMsBuckets)};
113+
114+
bool success{false};
115+
const Since since;
116+
std::pair<::prometheus::Histogram *, Since> last_step;
117+
auto nextStep{[&](auto metric) {
118+
if (last_step.first) {
119+
last_step.first->Observe(last_step.second.ms());
120+
}
121+
last_step = std::make_pair(metric, Since{});
122+
}};
123+
auto BOOST_OUTCOME_TRY_UNIQUE_NAME{gsl::finally([&] {
124+
metricTotal.Observe(since.ms());
125+
nextStep(nullptr);
126+
(success ? metricSuccess : metricFailure).Increment();
127+
})};
128+
nextStep(&metricEarly);
129+
73130
auto on_receipt{[&](auto &receipt) {
74131
if (all_receipts) {
75132
all_receipts->push_back(receipt);
@@ -111,6 +168,8 @@ namespace fc::vm::interpreter {
111168
env->setHeight(tipset->height());
112169
}
113170

171+
nextStep(&metricMessages);
172+
114173
adt::Array<MessageReceipt> receipts{ipld};
115174
MessageVisitor message_visitor{ipld, true, true};
116175
for (const auto &block : tipset->blks) {
@@ -147,15 +206,21 @@ namespace fc::vm::interpreter {
147206
on_receipt(receipt);
148207
}
149208

209+
nextStep(&metricCron);
210+
150211
OUTCOME_TRY(cron());
151212

213+
nextStep(&metricFlush);
214+
152215
OUTCOME_TRY(new_state_root, env->state_tree->flush());
153216
OUTCOME_TRY(env->ipld->flush(new_state_root));
154217

155218
OUTCOME_TRY(receipts.amt.flush());
156219

157220
OUTCOME_TRY(weight, getWeight(tipset));
158221

222+
success = true;
223+
159224
return Result{new_state_root, receipts.amt.cid(), std::move(weight)};
160225
}
161226

core/vm/runtime/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ target_link_libraries(runtime
2020
ipfs_datastore_error
2121
keystore
2222
message
23+
prometheus
2324
proofs
2425
tipset
2526
signature

0 commit comments

Comments
 (0)