Skip to content

Commit ab079be

Browse files
author
wuxianrong
committed
added metrics for P99 and slow command statistics
1 parent 6dbb59f commit ab079be

File tree

15 files changed

+301
-4
lines changed

15 files changed

+301
-4
lines changed

CMakeLists.txt

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -630,6 +630,24 @@ ExternalProject_Add(rocksdb
630630
make -j${CPU_CORE}
631631
)
632632

633+
ExternalProject_Add(prometheus_cpp
634+
URL
635+
https://github.com/jupp0r/prometheus-cpp/releases/download/v1.2.4/prometheus-cpp-with-submodules.tar.gz
636+
CMAKE_ARGS
637+
-DBUILD_SHARED_LIBS=OFF
638+
-DENABLE_PUSH=OFF
639+
-DENABLE_COMPRESSION=OFF
640+
-DCMAKE_INSTALL_LIBDIR=${INSTALL_LIBDIR}
641+
-DCMAKE_INSTALL_INCLUDEDIR=${INSTALL_INCLUDEDIR}
642+
BUILD_ALWAYS
643+
1
644+
BUILD_COMMAND
645+
make -j${CPU_CORE}
646+
)
647+
648+
set(PROMETHEUS_CPP_CORE_LIB ${INSTALL_LIBDIR}/libprometheus-cpp-core.a)
649+
set(PROMETHEUS_CPP_PULL_LIB ${INSTALL_LIBDIR}/libprometheus-cpp-pull.a)
650+
633651
ExternalProject_Add(rediscache
634652
URL
635653
https://github.com/pikiwidb/rediscache/archive/refs/tags/v1.0.7.tar.gz
@@ -822,6 +840,8 @@ target_link_libraries(${PROJECT_NAME}
822840
${LIB_PROTOBUF}
823841
${LIB_GFLAGS}
824842
${LIB_FMT}
843+
${PROMETHEUS_CPP_PULL_LIB}
844+
${PROMETHEUS_CPP_CORE_LIB}
825845
libsnappy.a
826846
libzstd.a
827847
liblz4.a

include/pika_admin.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,8 @@ class InfoCmd : public Cmd {
267267
kInfoAll,
268268
kInfoDebug,
269269
kInfoCommandStats,
270+
kInfoSlowCommand,
271+
kInfoCommandP99,
270272
kInfoCache
271273
};
272274
InfoCmd(const std::string& name, int arity, uint32_t flag) : Cmd(name, arity, flag) {}
@@ -294,6 +296,8 @@ class InfoCmd : public Cmd {
294296
const static std::string kRocksDBSection;
295297
const static std::string kDebugSection;
296298
const static std::string kCommandStatsSection;
299+
const static std::string kCommandP99Section;
300+
const static std::string kSlowCommandSection;
297301
const static std::string kCacheSection;
298302

299303
void DoInitial() override;
@@ -314,6 +318,8 @@ class InfoCmd : public Cmd {
314318
void InfoRocksDB(std::string& info);
315319
void InfoDebug(std::string& info);
316320
void InfoCommandStats(std::string& info);
321+
void InfoCommandP99(std::string& info);
322+
void InfoSlowCommand(std::string& info);
317323
void InfoCache(std::string& info, std::shared_ptr<DB> db);
318324

319325
std::string CacheStatusToString(int status);

include/pika_client_conn.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ class PikaClientConn : public net::RedisConn {
130130
std::shared_ptr<Cmd> DoCmd(const PikaCmdArgsType& argv, const std::string& opt,
131131
const std::shared_ptr<std::string>& resp_ptr, bool cache_miss_in_rtc);
132132

133-
void ProcessSlowlog(const PikaCmdArgsType& argv, std::shared_ptr<Cmd> c_ptr);
133+
void ProcessSlowlog(const PikaCmdArgsType& argv, std::shared_ptr<Cmd> c_ptr, const std::string& opt);
134134
void ProcessMonitor(const PikaCmdArgsType& argv);
135135

136136
void ExecRedisCmd(const PikaCmdArgsType& argv, std::shared_ptr<std::string>& resp_ptr, bool cache_miss_in_rtc);

include/pika_cmd_table_manager.h

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,17 @@
88

99
#include <shared_mutex>
1010
#include <thread>
11+
#include <prometheus/exposer.h>
12+
#include <prometheus/registry.h>
13+
#include <prometheus/counter.h>
14+
#include <prometheus/histogram.h>
1115

1216
#include "include/acl.h"
1317
#include "include/pika_command.h"
1418
#include "include/pika_data_distribution.h"
1519

20+
using namespace prometheus;
21+
1622
struct CommandStatistics {
1723
CommandStatistics() = default;
1824
CommandStatistics(const CommandStatistics& other) {
@@ -23,6 +29,23 @@ struct CommandStatistics {
2329
std::atomic<uint64_t> cmd_time_consuming = 0;
2430
};
2531

32+
struct HistogramData {
33+
std::shared_ptr<prometheus::Registry> registry;
34+
prometheus::Family<prometheus::Histogram>* family;
35+
std::unordered_map<std::string, prometheus::Histogram*> histograms;
36+
37+
HistogramData() {
38+
registry = std::make_shared<prometheus::Registry>();
39+
family = &prometheus::BuildHistogram()
40+
.Name("pika_command_duration_seconds")
41+
.Help("Execution time of Pika commands in seconds")
42+
.Register(*registry);
43+
}
44+
45+
HistogramData(const HistogramData&) = delete;
46+
HistogramData& operator=(const HistogramData&) = delete;
47+
};
48+
2649
class PikaCmdTableManager {
2750
friend AclSelector;
2851

@@ -31,6 +54,7 @@ class PikaCmdTableManager {
3154
virtual ~PikaCmdTableManager() = default;
3255
void InitCmdTable(void);
3356
void RenameCommand(const std::string before, const std::string after);
57+
void InitHistograms();
3458
std::shared_ptr<Cmd> GetCmd(const std::string& opt);
3559
bool CmdExist(const std::string& cmd) const;
3660
CmdTable* GetCmdTable();
@@ -42,6 +66,11 @@ class PikaCmdTableManager {
4266
* Info Commandstats used
4367
*/
4468
std::unordered_map<std::string, CommandStatistics>* GetCommandStatMap();
69+
std::unordered_map<std::string, CommandStatistics> GetSlowCommandCount();
70+
void UpdateSlowCommandCount(const std::string& opt);
71+
void ResetCommandCount();
72+
prometheus::Histogram& GetHistogram(const std::string& opt);
73+
std::shared_ptr<HistogramData> GetHistogramsData();
4574

4675
private:
4776
std::shared_ptr<Cmd> NewCommand(const std::string& opt);
@@ -60,5 +89,9 @@ class PikaCmdTableManager {
6089
* Info Commandstats used
6190
*/
6291
std::unordered_map<std::string, CommandStatistics> cmdstat_map_;
92+
std::unordered_map<std::string, CommandStatistics> slow_command_count_;
93+
std::shared_mutex slow_command_mutex_;
94+
std::mutex data_mutex_;
95+
std::shared_ptr<HistogramData> data_;
6396
};
6497
#endif

include/pika_server.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,7 @@ class PikaServer : public pstd::noncopyable {
259259
void ResetStat();
260260
void incr_accumulative_connections();
261261
void ResetLastSecQuerynum();
262+
void ResetCommandCount();
262263
void UpdateQueryNumAndExecCountDB(const std::string& db_name, const std::string& command, bool is_write);
263264
std::unordered_map<std::string, uint64_t> ServerExecCountDB();
264265
std::unordered_map<std::string, QpsStatistic> ServerAllDBStat();

src/pika_admin.cc

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -882,6 +882,8 @@ const std::string InfoCmd::kKeyspaceSection = "keyspace";
882882
const std::string InfoCmd::kDataSection = "data";
883883
const std::string InfoCmd::kRocksDBSection = "rocksdb";
884884
const std::string InfoCmd::kDebugSection = "debug";
885+
const std::string InfoCmd::kCommandP99Section = "commandp99";
886+
const std::string InfoCmd::kSlowCommandSection = "slowcommand";
885887
const std::string InfoCmd::kCommandStatsSection = "commandstats";
886888
const std::string InfoCmd::kCacheSection = "cache";
887889

@@ -967,6 +969,10 @@ void InfoCmd::DoInitial() {
967969
info_section_ = kInfoDebug;
968970
} else if (strcasecmp(argv_[1].data(), kCommandStatsSection.data()) == 0) {
969971
info_section_ = kInfoCommandStats;
972+
} else if (strcasecmp(argv_[1].data(), kCommandP99Section.data()) == 0) {
973+
info_section_ = kInfoCommandP99;
974+
} else if (strcasecmp(argv_[1].data(), kSlowCommandSection.data()) == 0) {
975+
info_section_ = kInfoSlowCommand;
970976
} else if (strcasecmp(argv_[1].data(), kCacheSection.data()) == 0) {
971977
info_section_ = kInfoCache;
972978
} else {
@@ -1008,6 +1014,10 @@ void InfoCmd::Do() {
10081014
info.append("\r\n");
10091015
InfoCommandStats(info);
10101016
info.append("\r\n");
1017+
InfoCommandP99(info);
1018+
info.append("\r\n");
1019+
InfoSlowCommand(info);
1020+
info.append("\r\n");
10111021
InfoCache(info, db_);
10121022
info.append("\r\n");
10131023
InfoCPU(info);
@@ -1051,6 +1061,12 @@ void InfoCmd::Do() {
10511061
case kInfoCommandStats:
10521062
InfoCommandStats(info);
10531063
break;
1064+
case kInfoCommandP99:
1065+
InfoCommandP99(info);
1066+
break;
1067+
case kInfoSlowCommand:
1068+
InfoSlowCommand(info);
1069+
break;
10541070
case kInfoCache:
10551071
InfoCache(info, db_);
10561072
break;
@@ -1499,6 +1515,73 @@ void InfoCmd::InfoCommandStats(std::string& info) {
14991515
info.append(tmp_stream.str());
15001516
}
15011517

1518+
void InfoCmd::InfoCommandP99(std::string& info) {
1519+
std::stringstream tmp_stream;
1520+
tmp_stream.precision(2);
1521+
tmp_stream.setf(std::ios::fixed);
1522+
tmp_stream << "# Commands P99" << "\r\n";
1523+
auto data = g_pika_cmd_table_manager->GetHistogramsData();
1524+
auto* histogram_family = data->family;
1525+
for (const auto& metric_family : histogram_family->Collect()) {
1526+
for (const auto& metric : metric_family.metric) {
1527+
std::string command_name;
1528+
1529+
for (const auto& label : metric.label) {
1530+
if (label.name == "command") {
1531+
command_name = label.value;
1532+
break;
1533+
}
1534+
}
1535+
1536+
double total_count = metric.histogram.sample_count;
1537+
1538+
if (command_name.empty()) {
1539+
tmp_stream << "Command: UNKNOWN\r\n";
1540+
} else {
1541+
tmp_stream << "Command: " << command_name << "\r\n";
1542+
}
1543+
1544+
double tp99_threshold = total_count * 0.99;
1545+
double tp999_threshold = total_count * 0.999;
1546+
double tp9999_threshold = total_count * 0.9999;
1547+
double tp99 = 0, tp999 = 0, tp9999 = 0;
1548+
1549+
for (const auto& bucket : metric.histogram.bucket) {
1550+
if (bucket.cumulative_count >= tp99_threshold && tp99 == 0) {
1551+
tp99 = bucket.upper_bound;
1552+
}
1553+
if (bucket.cumulative_count >= tp999_threshold && tp999 == 0) {
1554+
tp999 = bucket.upper_bound;
1555+
}
1556+
if (bucket.cumulative_count >= tp9999_threshold && tp9999 == 0) {
1557+
tp9999 = bucket.upper_bound;
1558+
break;
1559+
}
1560+
}
1561+
tmp_stream << "TP99 ms: " << tp99 << "\r\n";
1562+
tmp_stream << "TP999 ms: " << tp999 << "\r\n";
1563+
tmp_stream << "TP9999 ms: " << tp9999 << "\r\n";
1564+
tmp_stream << "----------------------\r\n";
1565+
}
1566+
}
1567+
1568+
info.append(tmp_stream.str());
1569+
}
1570+
1571+
void InfoCmd::InfoSlowCommand(std::string& info) {
1572+
std::stringstream tmp_stream;
1573+
tmp_stream.precision(2);
1574+
tmp_stream.setf(std::ios::fixed);
1575+
auto stats = g_pika_cmd_table_manager->GetSlowCommandCount();
1576+
tmp_stream << "# SlowCommand Count" << "\r\n";
1577+
for (auto iter : stats) {
1578+
if (iter.second.cmd_count != 0) {
1579+
tmp_stream << iter.first << ":slow_count=" << iter.second.cmd_count << "\r\n";
1580+
}
1581+
}
1582+
info.append(tmp_stream.str());
1583+
}
1584+
15021585
void InfoCmd::InfoCache(std::string& info, std::shared_ptr<DB> db) {
15031586
std::stringstream tmp_stream;
15041587
tmp_stream << "# Cache" << "\r\n";

src/pika_client_conn.cc

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,10 @@
77
#include <glog/logging.h>
88
#include <utility>
99
#include <vector>
10-
10+
#include <prometheus/exposer.h>
11+
#include <prometheus/registry.h>
12+
#include <prometheus/counter.h>
13+
#include <prometheus/histogram.h>
1114
#include "include/pika_admin.h"
1215
#include "include/pika_client_conn.h"
1316
#include "include/pika_cmd_table_manager.h"
@@ -221,19 +224,21 @@ std::shared_ptr<Cmd> PikaClientConn::DoCmd(const PikaCmdArgsType& argv, const st
221224
c_ptr->Execute();
222225

223226
time_stat_->process_done_ts_ = pstd::NowMicros();
227+
g_pika_cmd_table_manager->GetHistogram(opt).Observe(time_stat_->total_time() / 1000);
224228
auto cmdstat_map = g_pika_cmd_table_manager->GetCommandStatMap();
225229
(*cmdstat_map)[opt].cmd_count.fetch_add(1);
226230
(*cmdstat_map)[opt].cmd_time_consuming.fetch_add(time_stat_->total_time());
227231

228232
if (g_pika_conf->slowlog_slower_than() >= 0) {
229-
ProcessSlowlog(argv, c_ptr);
233+
ProcessSlowlog(argv, c_ptr, opt);
230234
}
231235

232236
return c_ptr;
233237
}
234238

235-
void PikaClientConn::ProcessSlowlog(const PikaCmdArgsType& argv, std::shared_ptr<Cmd> c_ptr) {
239+
void PikaClientConn::ProcessSlowlog(const PikaCmdArgsType& argv, std::shared_ptr<Cmd> c_ptr, const std::string& opt) {
236240
if (time_stat_->total_time() > g_pika_conf->slowlog_slower_than()) {
241+
g_pika_cmd_table_manager->UpdateSlowCommandCount(opt);
237242
g_pika_server->SlowlogPushEntry(argv, time_stat_->start_ts() / 1000000, time_stat_->total_time());
238243
if (g_pika_conf->slowlog_write_errorlog()) {
239244
bool trim = false;

src/pika_cmd_table_manager.cc

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,18 @@
1414

1515
extern std::unique_ptr<PikaConf> g_pika_conf;
1616

17+
void PikaCmdTableManager::ResetCommandCount() {
18+
{
19+
std::unique_lock<std::shared_mutex> write_lock(slow_command_mutex_);
20+
slow_command_count_.clear();
21+
}
22+
std::atomic_store(&data_, std::make_shared<HistogramData>());
23+
}
24+
1725
PikaCmdTableManager::PikaCmdTableManager() {
1826
cmds_ = std::make_unique<CmdTable>();
1927
cmds_->reserve(300);
28+
std::atomic_store(&data_, std::make_shared<HistogramData>());
2029
}
2130

2231
void PikaCmdTableManager::InitCmdTable(void) {
@@ -63,6 +72,50 @@ void PikaCmdTableManager::RenameCommand(const std::string before, const std::str
6372
}
6473
}
6574

75+
prometheus::Histogram& PikaCmdTableManager::GetHistogram(const std::string& opt) {
76+
auto current_data = std::atomic_load(&data_);
77+
{
78+
auto it = current_data->histograms.find(opt);
79+
if (it != current_data->histograms.end()) {
80+
return *(it->second);
81+
}
82+
}
83+
84+
std::lock_guard<std::mutex> lock(data_mutex_);
85+
auto& new_histogram = current_data->family->Add(
86+
{{"command", opt}},
87+
prometheus::Histogram::BucketBoundaries{0.5, 1, 2, 3, 5, 7, 10, 15, 20, 30, 40, 50, 65, 75, 85, 100, 125, 140, 150, 160, 175, 185, 200, 300, 400, 500, 750, 1000, 2000, 5000, 10000}
88+
);
89+
current_data->histograms[opt] = &new_histogram;
90+
return new_histogram;
91+
}
92+
93+
std::shared_ptr<HistogramData> PikaCmdTableManager::GetHistogramsData() {
94+
return std::atomic_load(&data_);
95+
}
96+
97+
void PikaCmdTableManager::UpdateSlowCommandCount(const std::string& opt) {
98+
{
99+
std::shared_lock<std::shared_mutex> read_lock(slow_command_mutex_);
100+
if (slow_command_count_.find(opt) != slow_command_count_.end()) {
101+
slow_command_count_[opt].cmd_count.fetch_add(1);
102+
return;
103+
}
104+
}
105+
106+
{
107+
std::unique_lock<std::shared_mutex> write_lock(slow_command_mutex_);
108+
slow_command_count_[opt];
109+
}
110+
111+
slow_command_count_[opt].cmd_count.fetch_add(1);
112+
}
113+
114+
std::unordered_map<std::string, CommandStatistics> PikaCmdTableManager::GetSlowCommandCount() {
115+
std::shared_lock<std::shared_mutex> lock(slow_command_mutex_);
116+
return slow_command_count_;
117+
}
118+
66119
std::unordered_map<std::string, CommandStatistics>* PikaCmdTableManager::GetCommandStatMap() {
67120
return &cmdstat_map_;
68121
}

src/pika_server.cc

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1033,6 +1033,16 @@ void PikaServer::ResetLastSecQuerynum() {
10331033
statistic_.ResetDBLastSecQuerynum();
10341034
}
10351035

1036+
void PikaServer::ResetCommandCount() {
1037+
thread_local uint64_t last_reset_time = 0;
1038+
auto current_time = pstd::NowMicros();
1039+
if (current_time - last_reset_time < 60 * 1000 * 1000) {
1040+
return;
1041+
}
1042+
last_reset_time = current_time;
1043+
g_pika_cmd_table_manager->ResetCommandCount();
1044+
}
1045+
10361046
void PikaServer::UpdateQueryNumAndExecCountDB(const std::string& db_name, const std::string& command, bool is_write) {
10371047
std::string cmd(command);
10381048
statistic_.server_stat.qps.querynum++;
@@ -1139,6 +1149,8 @@ void PikaServer::DoTimingTask() {
11391149
ResetLastSecQuerynum();
11401150
// Auto update network instantaneous metric
11411151
AutoUpdateNetworkMetric();
1152+
// Reset command statistics
1153+
ResetCommandCount();
11421154
ProcessCronTask();
11431155
UpdateCacheInfo();
11441156
// Print the queue status periodically

0 commit comments

Comments
 (0)