Skip to content

Commit 11fefec

Browse files
jameseh96daverigby
authored andcommitted
MB-41760: Add disk-failures stats group
Provide ns_server with disk failure stats; can be fetched frequently with minimal overhead. $ cbstats -u ... -p ... localhost:12000 disk-failures ep_data_read_failed: 0 ep_data_write_failed: 0 Change-Id: I90eabc27ccce03fcd2057cb1ca2353bd030b3e5f Reviewed-on: http://review.couchbase.org/c/kv_engine/+/138633 Tested-by: Build Bot <[email protected]> Reviewed-by: Richard de Mellow <[email protected]>
1 parent 1778627 commit 11fefec

File tree

3 files changed

+29
-10
lines changed

3 files changed

+29
-10
lines changed

engines/ep/management/cbstats

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -794,6 +794,10 @@ def stats_diskinfo(mc, with_detail=None):
794794

795795
stats_formatter(stats_perform(mc, cmd_str))
796796

797+
@cmd
798+
def stats_diskfailures(mc):
799+
stats_formatter(stats_perform(mc, "disk-failures"))
800+
797801
@cmd
798802
def stats_eviction(mc):
799803
if output_json:
@@ -949,6 +953,7 @@ def main():
949953
c.addCommand('checkpoint', stats_checkpoint, 'checkpoint [vbid]')
950954
c.addCommand('config', stats_config, 'config')
951955
c.addCommand('diskinfo', stats_diskinfo, 'diskinfo [detail]')
956+
c.addCommand('disk-failures', stats_diskfailures, 'disk-failures')
952957
c.addCommand('durability-monitor', stats_durability_monitor, 'durability-monitor [vbid]')
953958
c.addCommand('eviction', stats_eviction, 'eviction')
954959
c.addCommand('scheduler', stats_scheduler, 'scheduler')

engines/ep/src/ep_engine.cc

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2965,20 +2965,12 @@ ENGINE_ERROR_CODE EventuallyPersistentEngine::doEngineStats(
29652965
collector.addStat(Key::ep_cursors_dropped, epstats.cursorsDropped);
29662966
collector.addStat(Key::ep_cursor_memory_freed, epstats.cursorMemoryFreed);
29672967

2968+
doDiskFailureStats(collector);
2969+
29682970
// Note: These are also reported per-shard in 'kvstore' stats, however
29692971
// we want to be able to graph these over time, and hence need to expose
29702972
// to ns_sever at the top-level.
29712973
size_t value = 0;
2972-
if (kvBucket->getKVStoreStat("failure_compaction", value,
2973-
KVBucketIface::KVSOption::BOTH)) {
2974-
// Total data write failures is compaction failures plus commit failures
2975-
auto writeFailure = value + epstats.commitFailed;
2976-
collector.addStat(Key::ep_data_write_failed, writeFailure);
2977-
}
2978-
if (kvBucket->getKVStoreStat("failure_get", value,
2979-
KVBucketIface::KVSOption::BOTH)) {
2980-
collector.addStat(Key::ep_data_read_failed, value);
2981-
}
29822974
if (kvBucket->getKVStoreStat("io_document_write_bytes",
29832975
value,
29842976
KVBucketIface::KVSOption::RW)) {
@@ -4513,6 +4505,22 @@ ENGINE_ERROR_CODE EventuallyPersistentEngine::doDiskinfoStats(
45134505
return ENGINE_EINVAL;
45144506
}
45154507

4508+
void EventuallyPersistentEngine::doDiskFailureStats(
4509+
BucketStatCollector& collector) {
4510+
using namespace cb::stats;
4511+
size_t value = 0;
4512+
if (kvBucket->getKVStoreStat(
4513+
"failure_compaction", value, KVBucketIface::KVSOption::BOTH)) {
4514+
// Total data write failures is compaction failures plus commit failures
4515+
auto writeFailure = value + stats.commitFailed;
4516+
collector.addStat(Key::ep_data_write_failed, writeFailure);
4517+
}
4518+
if (kvBucket->getKVStoreStat(
4519+
"failure_get", value, KVBucketIface::KVSOption::BOTH)) {
4520+
collector.addStat(Key::ep_data_read_failed, value);
4521+
}
4522+
}
4523+
45164524
ENGINE_ERROR_CODE EventuallyPersistentEngine::doPrivilegedStats(
45174525
const void* cookie, const AddStatFn& add_stat, std::string_view key) {
45184526
// Privileged stats - need Stats priv (and not just SimpleStats).
@@ -4692,6 +4700,10 @@ ENGINE_ERROR_CODE EventuallyPersistentEngine::getStats(
46924700
return doScopeStats(
46934701
cookie, add_stat, std::string(key.data(), key.size()));
46944702
}
4703+
if (cb_isPrefix(key, "disk-failures")) {
4704+
doDiskFailureStats(bucketCollector);
4705+
return ENGINE_SUCCESS;
4706+
}
46954707
if (key[0] == '_') {
46964708
return doPrivilegedStats(cookie, add_stat, key);
46974709
}

engines/ep/src/ep_engine.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1010,6 +1010,8 @@ class EventuallyPersistentEngine : public EngineIface, public DcpIface {
10101010
const AddStatFn& add_stat,
10111011
std::string_view statKey);
10121012

1013+
void doDiskFailureStats(BucketStatCollector& collector);
1014+
10131015
ENGINE_ERROR_CODE doPrivilegedStats(const void* cookie,
10141016
const AddStatFn& add_stat,
10151017
std::string_view statKey);

0 commit comments

Comments
 (0)