diff --git a/Jenkinsfile b/Jenkinsfile index 7e046f96b9a..2bf09ffc348 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -108,7 +108,8 @@ void fixup_rpmlintrc() { '/usr/bin/hello_drpc', '/usr/bin/daos_firmware', '/usr/bin/daos_admin', - '/usr/bin/daos_server'] + '/usr/bin/daos_server', + '/usr/bin/ddb'] String content = readFile(file: 'utils/rpms/daos.rpmlintrc') + '\n\n' + '# https://daosio.atlassian.net/browse/DAOS-11534\n' diff --git a/ci/codespell.ignores b/ci/codespell.ignores index 7d430aaa4c5..c625d1b17a5 100644 --- a/ci/codespell.ignores +++ b/ci/codespell.ignores @@ -34,3 +34,5 @@ expres signalling laf cacl +chk +falloc diff --git a/debian/changelog b/debian/changelog index 177e5d2410e..162361824c8 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +daos (2.5.101-4) unstable; urgency=medium + [ Fan Yong ] + * NOOP change to keep in parity with RPM version + + -- Fan Yong Fri, 05 Apr 2024 09:30:00 +0900 + daos (2.5.101-3) unstable; urgency=medium [ Ashley M. Pittman ] * Updated pydaos install process diff --git a/debian/daos-server-tests.install b/debian/daos-server-tests.install index c12498fb890..ae15938b219 100644 --- a/debian/daos-server-tests.install +++ b/debian/daos-server-tests.install @@ -8,6 +8,7 @@ usr/bin/smd_ut usr/bin/bio_ut usr/bin/vea_ut usr/bin/vos_tests +usr/bin/ddb_tests usr/bin/vea_stress usr/bin/vos_perf usr/bin/obj_ctl diff --git a/debian/daos-server.install b/debian/daos-server.install index e34d8e92ae8..fb1e8af9a67 100644 --- a/debian/daos-server.install +++ b/debian/daos-server.install @@ -8,7 +8,10 @@ usr/bin/daos_server_helper usr/bin/daos_server usr/bin/daos_engine usr/bin/daos_metrics +usr/bin/ddb +usr/lib64/daos_srv/libchk.so usr/lib64/daos_srv/libcont.so +usr/lib64/daos_srv/libddb.so usr/lib64/daos_srv/libdtx.so usr/lib64/daos_srv/libmgmt.so usr/lib64/daos_srv/libobj.so diff --git a/src/SConscript b/src/SConscript index dc96d0303a9..72fce0633a2 100644 --- a/src/SConscript +++ b/src/SConscript @@ -106,6 +106,7 @@ def scons(): # Build each DAOS component SConscript('rsvc/SConscript') + SConscript('chk/SConscript') SConscript('mgmt/SConscript') SConscript('pool/SConscript') SConscript('container/SConscript') @@ -128,6 +129,9 @@ def scons(): # Build utilities SConscript('utils/SConscript') + # Build ddb + SConscript('ddb/SConscript') + # Build the control plane components SConscript('control/SConscript') diff --git a/src/bio/bio_context.c b/src/bio/bio_context.c index 365e35953e6..297694c6e6a 100644 --- a/src/bio/bio_context.c +++ b/src/bio/bio_context.c @@ -9,8 +9,6 @@ #include "bio_internal.h" #include "bio_wal.h" -#define BIO_BLOB_HDR_MAGIC (0xb0b51ed5) - struct blob_cp_arg { spdk_blob_id bca_id; struct spdk_blob *bca_blob; diff --git a/src/bio/bio_internal.h b/src/bio/bio_internal.h index be01df5d3d6..302c73f6411 100644 --- a/src/bio/bio_internal.h +++ b/src/bio/bio_internal.h @@ -19,6 +19,7 @@ #include "smd.pb-c.h" +#define BIO_BLOB_HDR_MAGIC (0xb0b51ed5) #define BIO_DMA_PAGE_SHIFT 12 /* 4K */ #define BIO_DMA_PAGE_SZ (1UL << BIO_DMA_PAGE_SHIFT) #define BIO_XS_CNT_MAX BIO_MAX_VOS_TGT_CNT /* Max VOS xstreams per blobstore */ diff --git a/src/cart/crt_corpc.c b/src/cart/crt_corpc.c index 25b96e85178..5d7b6d79c6e 100644 --- a/src/cart/crt_corpc.c +++ b/src/cart/crt_corpc.c @@ -57,8 +57,8 @@ crt_corpc_info_init(struct crt_rpc_priv *rpc_priv, rpc_priv->crp_flags |= CRT_RPC_FLAG_COLL; if (co_info->co_grp_priv->gp_primary) rpc_priv->crp_flags |= CRT_RPC_FLAG_PRIMARY_GRP; - if (flags & CRT_RPC_FLAG_FILTER_INVERT) - rpc_priv->crp_flags |= CRT_RPC_FLAG_FILTER_INVERT; + rpc_priv->crp_flags |= flags & (CRT_RPC_FLAG_FILTER_INVERT | + CRT_RPC_FLAG_CO_FAILOUT); co_hdr->coh_grpid = grp_priv->gp_pub.cg_grpid; co_hdr->coh_filter_ranks = co_info->co_filter_ranks; @@ -906,6 +906,11 @@ crt_corpc_req_hdlr(struct crt_rpc_priv *rpc_priv) } forward_done: + if (rc != 0 && rpc_priv->crp_flags & CRT_RPC_FLAG_CO_FAILOUT) { + crt_corpc_complete(rpc_priv); + goto out; + } + /* NOOP bcast (no child and root excluded) */ if (co_info->co_child_num == 0 && co_info->co_root_excluded) crt_corpc_complete(rpc_priv); diff --git a/src/chk/SConscript b/src/chk/SConscript new file mode 100644 index 00000000000..79398dfe3df --- /dev/null +++ b/src/chk/SConscript @@ -0,0 +1,31 @@ +# pylint: disable=consider-using-f-string +# pylint: disable-next=wrong-spelling-in-comment +"""Build check library""" + + +def scons(): + """Execute build""" + Import('env', 'prereqs') + + env.AppendUnique(LIBPATH=[Dir('.')]) + + denv = env.Clone() + + # common + prereqs.require(denv, 'argobots', 'protobufc') + chk_pb = denv.SharedObject(['chk.pb-c.c']) + Export('chk_pb') + + if not prereqs.server_requested(): + return + + # chk + chk = denv.d_library('chk', + [chk_pb, 'chk_srv.c', 'chk_common.c', 'chk_vos.c', + 'chk_rpc.c', 'chk_upcall.c', 'chk_iv.c', 'chk_leader.c', + 'chk_engine.c'], install_off="../..") + denv.Install('$PREFIX/lib64/daos_srv', chk) + + +if __name__ == "SCons.Script": + scons() diff --git a/src/chk/chk.pb-c.c b/src/chk/chk.pb-c.c new file mode 100644 index 00000000000..7550d85834b --- /dev/null +++ b/src/chk/chk.pb-c.c @@ -0,0 +1,595 @@ +/* Generated by the protocol buffer compiler. DO NOT EDIT! */ +/* Generated from: chk.proto */ + +/* Do not generate deprecated warnings for self */ +#ifndef PROTOBUF_C__NO_DEPRECATED +#define PROTOBUF_C__NO_DEPRECATED +#endif + +#include "chk.pb-c.h" +void chk__check_report__init + (Chk__CheckReport *message) +{ + static const Chk__CheckReport init_value = CHK__CHECK_REPORT__INIT; + *message = init_value; +} +size_t chk__check_report__get_packed_size + (const Chk__CheckReport *message) +{ + assert(message->base.descriptor == &chk__check_report__descriptor); + return protobuf_c_message_get_packed_size ((const ProtobufCMessage*)(message)); +} +size_t chk__check_report__pack + (const Chk__CheckReport *message, + uint8_t *out) +{ + assert(message->base.descriptor == &chk__check_report__descriptor); + return protobuf_c_message_pack ((const ProtobufCMessage*)message, out); +} +size_t chk__check_report__pack_to_buffer + (const Chk__CheckReport *message, + ProtobufCBuffer *buffer) +{ + assert(message->base.descriptor == &chk__check_report__descriptor); + return protobuf_c_message_pack_to_buffer ((const ProtobufCMessage*)message, buffer); +} +Chk__CheckReport * + chk__check_report__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data) +{ + return (Chk__CheckReport *) + protobuf_c_message_unpack (&chk__check_report__descriptor, + allocator, len, data); +} +void chk__check_report__free_unpacked + (Chk__CheckReport *message, + ProtobufCAllocator *allocator) +{ + if(!message) + return; + assert(message->base.descriptor == &chk__check_report__descriptor); + protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator); +} +static const ProtobufCFieldDescriptor chk__check_report__field_descriptors[18] = +{ + { + "seq", + 1, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_UINT64, + 0, /* quantifier_offset */ + offsetof(Chk__CheckReport, seq), + NULL, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "class", + 2, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_ENUM, + 0, /* quantifier_offset */ + offsetof(Chk__CheckReport, class_), + &chk__check_inconsist_class__descriptor, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "action", + 3, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_ENUM, + 0, /* quantifier_offset */ + offsetof(Chk__CheckReport, action), + &chk__check_inconsist_action__descriptor, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "result", + 4, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_INT32, + 0, /* quantifier_offset */ + offsetof(Chk__CheckReport, result), + NULL, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "rank", + 5, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_UINT32, + 0, /* quantifier_offset */ + offsetof(Chk__CheckReport, rank), + NULL, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "target", + 6, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_UINT32, + 0, /* quantifier_offset */ + offsetof(Chk__CheckReport, target), + NULL, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "pool_uuid", + 7, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_STRING, + 0, /* quantifier_offset */ + offsetof(Chk__CheckReport, pool_uuid), + NULL, + &protobuf_c_empty_string, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "pool_label", + 8, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_STRING, + 0, /* quantifier_offset */ + offsetof(Chk__CheckReport, pool_label), + NULL, + &protobuf_c_empty_string, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "cont_uuid", + 9, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_STRING, + 0, /* quantifier_offset */ + offsetof(Chk__CheckReport, cont_uuid), + NULL, + &protobuf_c_empty_string, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "cont_label", + 10, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_STRING, + 0, /* quantifier_offset */ + offsetof(Chk__CheckReport, cont_label), + NULL, + &protobuf_c_empty_string, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "objid", + 11, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_STRING, + 0, /* quantifier_offset */ + offsetof(Chk__CheckReport, objid), + NULL, + &protobuf_c_empty_string, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "dkey", + 12, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_STRING, + 0, /* quantifier_offset */ + offsetof(Chk__CheckReport, dkey), + NULL, + &protobuf_c_empty_string, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "akey", + 13, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_STRING, + 0, /* quantifier_offset */ + offsetof(Chk__CheckReport, akey), + NULL, + &protobuf_c_empty_string, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "timestamp", + 14, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_STRING, + 0, /* quantifier_offset */ + offsetof(Chk__CheckReport, timestamp), + NULL, + &protobuf_c_empty_string, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "msg", + 15, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_STRING, + 0, /* quantifier_offset */ + offsetof(Chk__CheckReport, msg), + NULL, + &protobuf_c_empty_string, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "act_choices", + 16, + PROTOBUF_C_LABEL_REPEATED, + PROTOBUF_C_TYPE_ENUM, + offsetof(Chk__CheckReport, n_act_choices), + offsetof(Chk__CheckReport, act_choices), + &chk__check_inconsist_action__descriptor, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "act_details", + 17, + PROTOBUF_C_LABEL_REPEATED, + PROTOBUF_C_TYPE_STRING, + offsetof(Chk__CheckReport, n_act_details), + offsetof(Chk__CheckReport, act_details), + NULL, + &protobuf_c_empty_string, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "act_msgs", + 18, + PROTOBUF_C_LABEL_REPEATED, + PROTOBUF_C_TYPE_STRING, + offsetof(Chk__CheckReport, n_act_msgs), + offsetof(Chk__CheckReport, act_msgs), + NULL, + &protobuf_c_empty_string, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, +}; +static const unsigned chk__check_report__field_indices_by_name[] = { + 15, /* field[15] = act_choices */ + 16, /* field[16] = act_details */ + 17, /* field[17] = act_msgs */ + 2, /* field[2] = action */ + 12, /* field[12] = akey */ + 1, /* field[1] = class */ + 9, /* field[9] = cont_label */ + 8, /* field[8] = cont_uuid */ + 11, /* field[11] = dkey */ + 14, /* field[14] = msg */ + 10, /* field[10] = objid */ + 7, /* field[7] = pool_label */ + 6, /* field[6] = pool_uuid */ + 4, /* field[4] = rank */ + 3, /* field[3] = result */ + 0, /* field[0] = seq */ + 5, /* field[5] = target */ + 13, /* field[13] = timestamp */ +}; +static const ProtobufCIntRange chk__check_report__number_ranges[1 + 1] = +{ + { 1, 0 }, + { 0, 18 } +}; +const ProtobufCMessageDescriptor chk__check_report__descriptor = +{ + PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, + "chk.CheckReport", + "CheckReport", + "Chk__CheckReport", + "chk", + sizeof(Chk__CheckReport), + 18, + chk__check_report__field_descriptors, + chk__check_report__field_indices_by_name, + 1, chk__check_report__number_ranges, + (ProtobufCMessageInit) chk__check_report__init, + NULL,NULL,NULL /* reserved[123] */ +}; +static const ProtobufCEnumValue chk__check_inconsist_class__enum_values_by_number[22] = +{ + { "CIC_NONE", "CHK__CHECK_INCONSIST_CLASS__CIC_NONE", 0 }, + { "CIC_POOL_LESS_SVC_WITH_QUORUM", "CHK__CHECK_INCONSIST_CLASS__CIC_POOL_LESS_SVC_WITH_QUORUM", 1 }, + { "CIC_POOL_LESS_SVC_WITHOUT_QUORUM", "CHK__CHECK_INCONSIST_CLASS__CIC_POOL_LESS_SVC_WITHOUT_QUORUM", 2 }, + { "CIC_POOL_MORE_SVC", "CHK__CHECK_INCONSIST_CLASS__CIC_POOL_MORE_SVC", 3 }, + { "CIC_POOL_NONEXIST_ON_MS", "CHK__CHECK_INCONSIST_CLASS__CIC_POOL_NONEXIST_ON_MS", 4 }, + { "CIC_POOL_NONEXIST_ON_ENGINE", "CHK__CHECK_INCONSIST_CLASS__CIC_POOL_NONEXIST_ON_ENGINE", 5 }, + { "CIC_POOL_BAD_SVCL", "CHK__CHECK_INCONSIST_CLASS__CIC_POOL_BAD_SVCL", 6 }, + { "CIC_POOL_BAD_LABEL", "CHK__CHECK_INCONSIST_CLASS__CIC_POOL_BAD_LABEL", 7 }, + { "CIC_ENGINE_NONEXIST_IN_MAP", "CHK__CHECK_INCONSIST_CLASS__CIC_ENGINE_NONEXIST_IN_MAP", 8 }, + { "CIC_ENGINE_DOWN_IN_MAP", "CHK__CHECK_INCONSIST_CLASS__CIC_ENGINE_DOWN_IN_MAP", 9 }, + { "CIC_ENGINE_HAS_NO_STORAGE", "CHK__CHECK_INCONSIST_CLASS__CIC_ENGINE_HAS_NO_STORAGE", 10 }, + { "CIC_CONT_NONEXIST_ON_PS", "CHK__CHECK_INCONSIST_CLASS__CIC_CONT_NONEXIST_ON_PS", 11 }, + { "CIC_CONT_BAD_LABEL", "CHK__CHECK_INCONSIST_CLASS__CIC_CONT_BAD_LABEL", 12 }, + { "CIC_DTX_CORRUPTED", "CHK__CHECK_INCONSIST_CLASS__CIC_DTX_CORRUPTED", 13 }, + { "CIC_DTX_ORPHAN", "CHK__CHECK_INCONSIST_CLASS__CIC_DTX_ORPHAN", 14 }, + { "CIC_CSUM_LOST", "CHK__CHECK_INCONSIST_CLASS__CIC_CSUM_LOST", 15 }, + { "CIC_CSUM_FAILURE", "CHK__CHECK_INCONSIST_CLASS__CIC_CSUM_FAILURE", 16 }, + { "CIC_OBJ_LOST_REP", "CHK__CHECK_INCONSIST_CLASS__CIC_OBJ_LOST_REP", 17 }, + { "CIC_OBJ_LOST_EC_SHARD", "CHK__CHECK_INCONSIST_CLASS__CIC_OBJ_LOST_EC_SHARD", 18 }, + { "CIC_OBJ_LOST_EC_DATA", "CHK__CHECK_INCONSIST_CLASS__CIC_OBJ_LOST_EC_DATA", 19 }, + { "CIC_OBJ_DATA_INCONSIST", "CHK__CHECK_INCONSIST_CLASS__CIC_OBJ_DATA_INCONSIST", 20 }, + { "CIC_UNKNOWN", "CHK__CHECK_INCONSIST_CLASS__CIC_UNKNOWN", 100 }, +}; +static const ProtobufCIntRange chk__check_inconsist_class__value_ranges[] = { +{0, 0},{100, 21},{0, 22} +}; +static const ProtobufCEnumValueIndex chk__check_inconsist_class__enum_values_by_name[22] = +{ + { "CIC_CONT_BAD_LABEL", 12 }, + { "CIC_CONT_NONEXIST_ON_PS", 11 }, + { "CIC_CSUM_FAILURE", 16 }, + { "CIC_CSUM_LOST", 15 }, + { "CIC_DTX_CORRUPTED", 13 }, + { "CIC_DTX_ORPHAN", 14 }, + { "CIC_ENGINE_DOWN_IN_MAP", 9 }, + { "CIC_ENGINE_HAS_NO_STORAGE", 10 }, + { "CIC_ENGINE_NONEXIST_IN_MAP", 8 }, + { "CIC_NONE", 0 }, + { "CIC_OBJ_DATA_INCONSIST", 20 }, + { "CIC_OBJ_LOST_EC_DATA", 19 }, + { "CIC_OBJ_LOST_EC_SHARD", 18 }, + { "CIC_OBJ_LOST_REP", 17 }, + { "CIC_POOL_BAD_LABEL", 7 }, + { "CIC_POOL_BAD_SVCL", 6 }, + { "CIC_POOL_LESS_SVC_WITHOUT_QUORUM", 2 }, + { "CIC_POOL_LESS_SVC_WITH_QUORUM", 1 }, + { "CIC_POOL_MORE_SVC", 3 }, + { "CIC_POOL_NONEXIST_ON_ENGINE", 5 }, + { "CIC_POOL_NONEXIST_ON_MS", 4 }, + { "CIC_UNKNOWN", 21 }, +}; +const ProtobufCEnumDescriptor chk__check_inconsist_class__descriptor = +{ + PROTOBUF_C__ENUM_DESCRIPTOR_MAGIC, + "chk.CheckInconsistClass", + "CheckInconsistClass", + "Chk__CheckInconsistClass", + "chk", + 22, + chk__check_inconsist_class__enum_values_by_number, + 22, + chk__check_inconsist_class__enum_values_by_name, + 2, + chk__check_inconsist_class__value_ranges, + NULL,NULL,NULL,NULL /* reserved[1234] */ +}; +static const ProtobufCEnumValue chk__check_inconsist_action__enum_values_by_number[13] = +{ + { "CIA_DEFAULT", "CHK__CHECK_INCONSIST_ACTION__CIA_DEFAULT", 0 }, + { "CIA_INTERACT", "CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT", 1 }, + { "CIA_IGNORE", "CHK__CHECK_INCONSIST_ACTION__CIA_IGNORE", 2 }, + { "CIA_DISCARD", "CHK__CHECK_INCONSIST_ACTION__CIA_DISCARD", 3 }, + { "CIA_READD", "CHK__CHECK_INCONSIST_ACTION__CIA_READD", 4 }, + { "CIA_TRUST_MS", "CHK__CHECK_INCONSIST_ACTION__CIA_TRUST_MS", 5 }, + { "CIA_TRUST_PS", "CHK__CHECK_INCONSIST_ACTION__CIA_TRUST_PS", 6 }, + { "CIA_TRUST_TARGET", "CHK__CHECK_INCONSIST_ACTION__CIA_TRUST_TARGET", 7 }, + { "CIA_TRUST_MAJORITY", "CHK__CHECK_INCONSIST_ACTION__CIA_TRUST_MAJORITY", 8 }, + { "CIA_TRUST_LATEST", "CHK__CHECK_INCONSIST_ACTION__CIA_TRUST_LATEST", 9 }, + { "CIA_TRUST_OLDEST", "CHK__CHECK_INCONSIST_ACTION__CIA_TRUST_OLDEST", 10 }, + { "CIA_TRUST_EC_PARITY", "CHK__CHECK_INCONSIST_ACTION__CIA_TRUST_EC_PARITY", 11 }, + { "CIA_TRUST_EC_DATA", "CHK__CHECK_INCONSIST_ACTION__CIA_TRUST_EC_DATA", 12 }, +}; +static const ProtobufCIntRange chk__check_inconsist_action__value_ranges[] = { +{0, 0},{0, 13} +}; +static const ProtobufCEnumValueIndex chk__check_inconsist_action__enum_values_by_name[13] = +{ + { "CIA_DEFAULT", 0 }, + { "CIA_DISCARD", 3 }, + { "CIA_IGNORE", 2 }, + { "CIA_INTERACT", 1 }, + { "CIA_READD", 4 }, + { "CIA_TRUST_EC_DATA", 12 }, + { "CIA_TRUST_EC_PARITY", 11 }, + { "CIA_TRUST_LATEST", 9 }, + { "CIA_TRUST_MAJORITY", 8 }, + { "CIA_TRUST_MS", 5 }, + { "CIA_TRUST_OLDEST", 10 }, + { "CIA_TRUST_PS", 6 }, + { "CIA_TRUST_TARGET", 7 }, +}; +const ProtobufCEnumDescriptor chk__check_inconsist_action__descriptor = +{ + PROTOBUF_C__ENUM_DESCRIPTOR_MAGIC, + "chk.CheckInconsistAction", + "CheckInconsistAction", + "Chk__CheckInconsistAction", + "chk", + 13, + chk__check_inconsist_action__enum_values_by_number, + 13, + chk__check_inconsist_action__enum_values_by_name, + 1, + chk__check_inconsist_action__value_ranges, + NULL,NULL,NULL,NULL /* reserved[1234] */ +}; +static const ProtobufCEnumValue chk__check_flag__enum_values_by_number[8] = +{ + { "CF_NONE", "CHK__CHECK_FLAG__CF_NONE", 0 }, + { "CF_DRYRUN", "CHK__CHECK_FLAG__CF_DRYRUN", 1 }, + { "CF_RESET", "CHK__CHECK_FLAG__CF_RESET", 2 }, + { "CF_FAILOUT", "CHK__CHECK_FLAG__CF_FAILOUT", 4 }, + { "CF_AUTO", "CHK__CHECK_FLAG__CF_AUTO", 8 }, + { "CF_ORPHAN_POOL", "CHK__CHECK_FLAG__CF_ORPHAN_POOL", 16 }, + { "CF_NO_FAILOUT", "CHK__CHECK_FLAG__CF_NO_FAILOUT", 32 }, + { "CF_NO_AUTO", "CHK__CHECK_FLAG__CF_NO_AUTO", 64 }, +}; +static const ProtobufCIntRange chk__check_flag__value_ranges[] = { +{0, 0},{4, 3},{8, 4},{16, 5},{32, 6},{64, 7},{0, 8} +}; +static const ProtobufCEnumValueIndex chk__check_flag__enum_values_by_name[8] = +{ + { "CF_AUTO", 4 }, + { "CF_DRYRUN", 1 }, + { "CF_FAILOUT", 3 }, + { "CF_NONE", 0 }, + { "CF_NO_AUTO", 7 }, + { "CF_NO_FAILOUT", 6 }, + { "CF_ORPHAN_POOL", 5 }, + { "CF_RESET", 2 }, +}; +const ProtobufCEnumDescriptor chk__check_flag__descriptor = +{ + PROTOBUF_C__ENUM_DESCRIPTOR_MAGIC, + "chk.CheckFlag", + "CheckFlag", + "Chk__CheckFlag", + "chk", + 8, + chk__check_flag__enum_values_by_number, + 8, + chk__check_flag__enum_values_by_name, + 6, + chk__check_flag__value_ranges, + NULL,NULL,NULL,NULL /* reserved[1234] */ +}; +static const ProtobufCEnumValue chk__check_inst_status__enum_values_by_number[7] = +{ + { "CIS_INIT", "CHK__CHECK_INST_STATUS__CIS_INIT", 0 }, + { "CIS_RUNNING", "CHK__CHECK_INST_STATUS__CIS_RUNNING", 1 }, + { "CIS_COMPLETED", "CHK__CHECK_INST_STATUS__CIS_COMPLETED", 2 }, + { "CIS_STOPPED", "CHK__CHECK_INST_STATUS__CIS_STOPPED", 3 }, + { "CIS_FAILED", "CHK__CHECK_INST_STATUS__CIS_FAILED", 4 }, + { "CIS_PAUSED", "CHK__CHECK_INST_STATUS__CIS_PAUSED", 5 }, + { "CIS_IMPLICATED", "CHK__CHECK_INST_STATUS__CIS_IMPLICATED", 6 }, +}; +static const ProtobufCIntRange chk__check_inst_status__value_ranges[] = { +{0, 0},{0, 7} +}; +static const ProtobufCEnumValueIndex chk__check_inst_status__enum_values_by_name[7] = +{ + { "CIS_COMPLETED", 2 }, + { "CIS_FAILED", 4 }, + { "CIS_IMPLICATED", 6 }, + { "CIS_INIT", 0 }, + { "CIS_PAUSED", 5 }, + { "CIS_RUNNING", 1 }, + { "CIS_STOPPED", 3 }, +}; +const ProtobufCEnumDescriptor chk__check_inst_status__descriptor = +{ + PROTOBUF_C__ENUM_DESCRIPTOR_MAGIC, + "chk.CheckInstStatus", + "CheckInstStatus", + "Chk__CheckInstStatus", + "chk", + 7, + chk__check_inst_status__enum_values_by_number, + 7, + chk__check_inst_status__enum_values_by_name, + 1, + chk__check_inst_status__value_ranges, + NULL,NULL,NULL,NULL /* reserved[1234] */ +}; +static const ProtobufCEnumValue chk__check_pool_status__enum_values_by_number[8] = +{ + { "CPS_UNCHECKED", "CHK__CHECK_POOL_STATUS__CPS_UNCHECKED", 0 }, + { "CPS_CHECKING", "CHK__CHECK_POOL_STATUS__CPS_CHECKING", 1 }, + { "CPS_CHECKED", "CHK__CHECK_POOL_STATUS__CPS_CHECKED", 2 }, + { "CPS_FAILED", "CHK__CHECK_POOL_STATUS__CPS_FAILED", 3 }, + { "CPS_PAUSED", "CHK__CHECK_POOL_STATUS__CPS_PAUSED", 4 }, + { "CPS_PENDING", "CHK__CHECK_POOL_STATUS__CPS_PENDING", 5 }, + { "CPS_STOPPED", "CHK__CHECK_POOL_STATUS__CPS_STOPPED", 6 }, + { "CPS_IMPLICATED", "CHK__CHECK_POOL_STATUS__CPS_IMPLICATED", 7 }, +}; +static const ProtobufCIntRange chk__check_pool_status__value_ranges[] = { +{0, 0},{0, 8} +}; +static const ProtobufCEnumValueIndex chk__check_pool_status__enum_values_by_name[8] = +{ + { "CPS_CHECKED", 2 }, + { "CPS_CHECKING", 1 }, + { "CPS_FAILED", 3 }, + { "CPS_IMPLICATED", 7 }, + { "CPS_PAUSED", 4 }, + { "CPS_PENDING", 5 }, + { "CPS_STOPPED", 6 }, + { "CPS_UNCHECKED", 0 }, +}; +const ProtobufCEnumDescriptor chk__check_pool_status__descriptor = +{ + PROTOBUF_C__ENUM_DESCRIPTOR_MAGIC, + "chk.CheckPoolStatus", + "CheckPoolStatus", + "Chk__CheckPoolStatus", + "chk", + 8, + chk__check_pool_status__enum_values_by_number, + 8, + chk__check_pool_status__enum_values_by_name, + 1, + chk__check_pool_status__value_ranges, + NULL,NULL,NULL,NULL /* reserved[1234] */ +}; +static const ProtobufCEnumValue chk__check_scan_phase__enum_values_by_number[11] = +{ + { "CSP_PREPARE", "CHK__CHECK_SCAN_PHASE__CSP_PREPARE", 0 }, + { "CSP_POOL_LIST", "CHK__CHECK_SCAN_PHASE__CSP_POOL_LIST", 1 }, + { "CSP_POOL_MBS", "CHK__CHECK_SCAN_PHASE__CSP_POOL_MBS", 2 }, + { "CSP_POOL_CLEANUP", "CHK__CHECK_SCAN_PHASE__CSP_POOL_CLEANUP", 3 }, + { "CSP_CONT_LIST", "CHK__CHECK_SCAN_PHASE__CSP_CONT_LIST", 4 }, + { "CSP_CONT_CLEANUP", "CHK__CHECK_SCAN_PHASE__CSP_CONT_CLEANUP", 5 }, + { "CSP_DTX_RESYNC", "CHK__CHECK_SCAN_PHASE__CSP_DTX_RESYNC", 6 }, + { "CSP_OBJ_SCRUB", "CHK__CHECK_SCAN_PHASE__CSP_OBJ_SCRUB", 7 }, + { "CSP_REBUILD", "CHK__CHECK_SCAN_PHASE__CSP_REBUILD", 8 }, + { "CSP_AGGREGATION", "CHK__CHECK_SCAN_PHASE__CSP_AGGREGATION", 9 }, + { "CSP_DONE", "CHK__CHECK_SCAN_PHASE__CSP_DONE", 10 }, +}; +static const ProtobufCIntRange chk__check_scan_phase__value_ranges[] = { +{0, 0},{0, 11} +}; +static const ProtobufCEnumValueIndex chk__check_scan_phase__enum_values_by_name[11] = +{ + { "CSP_AGGREGATION", 9 }, + { "CSP_CONT_CLEANUP", 5 }, + { "CSP_CONT_LIST", 4 }, + { "CSP_DONE", 10 }, + { "CSP_DTX_RESYNC", 6 }, + { "CSP_OBJ_SCRUB", 7 }, + { "CSP_POOL_CLEANUP", 3 }, + { "CSP_POOL_LIST", 1 }, + { "CSP_POOL_MBS", 2 }, + { "CSP_PREPARE", 0 }, + { "CSP_REBUILD", 8 }, +}; +const ProtobufCEnumDescriptor chk__check_scan_phase__descriptor = +{ + PROTOBUF_C__ENUM_DESCRIPTOR_MAGIC, + "chk.CheckScanPhase", + "CheckScanPhase", + "Chk__CheckScanPhase", + "chk", + 11, + chk__check_scan_phase__enum_values_by_number, + 11, + chk__check_scan_phase__enum_values_by_name, + 1, + chk__check_scan_phase__value_ranges, + NULL,NULL,NULL,NULL /* reserved[1234] */ +}; diff --git a/src/chk/chk.pb-c.h b/src/chk/chk.pb-c.h new file mode 100644 index 00000000000..e8c73483678 --- /dev/null +++ b/src/chk/chk.pb-c.h @@ -0,0 +1,502 @@ +/* Generated by the protocol buffer compiler. DO NOT EDIT! */ +/* Generated from: chk.proto */ + +#ifndef PROTOBUF_C_chk_2eproto__INCLUDED +#define PROTOBUF_C_chk_2eproto__INCLUDED + +#include + +PROTOBUF_C__BEGIN_DECLS + +#if PROTOBUF_C_VERSION_NUMBER < 1003000 +# error This file was generated by a newer version of protoc-c which is incompatible with your libprotobuf-c headers. Please update your headers. +#elif 1003000 < PROTOBUF_C_MIN_COMPILER_VERSION +# error This file was generated by an older version of protoc-c which is incompatible with your libprotobuf-c headers. Please regenerate this file with a newer version of protoc-c. +#endif + + +typedef struct _Chk__CheckReport Chk__CheckReport; + + +/* --- enums --- */ + +/* + * Kinds of DAOS global inconsistency. + */ +typedef enum _Chk__CheckInconsistClass { + /* + * Consistent cases. + */ + CHK__CHECK_INCONSIST_CLASS__CIC_NONE = 0, + /* + * Only a subset of the pool services are present but we will have a quorum. + * Default action: CIA_IGNORE. + */ + CHK__CHECK_INCONSIST_CLASS__CIC_POOL_LESS_SVC_WITH_QUORUM = 1, + /* + * Only a subset of the pool services are present, and we don't have a quorum. + * Default action: CIA_INTERACT. + */ + CHK__CHECK_INCONSIST_CLASS__CIC_POOL_LESS_SVC_WITHOUT_QUORUM = 2, + /* + * More members are reported than the pool service was created with. + * Default action: CIA_DISCARD. Remove unrecognized pool service. + */ + CHK__CHECK_INCONSIST_CLASS__CIC_POOL_MORE_SVC = 3, + /* + * Engine(s) claim the pool which is not registered to MS. + * Default action: CIA_READD. Register the pool to the MS. + */ + CHK__CHECK_INCONSIST_CLASS__CIC_POOL_NONEXIST_ON_MS = 4, + /* + * Pool is registered to MS but not claimed by any engine. + * Default action: CIA_DISCARD. De-register pool from MS. + */ + CHK__CHECK_INCONSIST_CLASS__CIC_POOL_NONEXIST_ON_ENGINE = 5, + /* + * Svcl list stored in MS does not match the actual PS membership. + * Default action: CIA_TRUST_PS. Refresh svcl list in MS DB. + */ + CHK__CHECK_INCONSIST_CLASS__CIC_POOL_BAD_SVCL = 6, + /* + * The pool label recorded by MS does not match the pool label property from PS. + * Default action: CIA_TRUST_PS. Refresh label in MS DB. + */ + CHK__CHECK_INCONSIST_CLASS__CIC_POOL_BAD_LABEL = 7, + /* + * An engine has some allocated storage but does not appear in pool map. + * Default action: CIA_DISCARD. Associated files and blobs will be deleted from the engine. + */ + CHK__CHECK_INCONSIST_CLASS__CIC_ENGINE_NONEXIST_IN_MAP = 8, + /* + * An engine has some allocated storage and is marked as down/downout in pool map. + * Default action: CIA_IGNORE. It can be reintegrated after CR scan. + */ + CHK__CHECK_INCONSIST_CLASS__CIC_ENGINE_DOWN_IN_MAP = 9, + /* + * An engine is referenced in pool map, but no storage is actually allocated on this engine. + * Default action: CIA_DISCARD. Evict the rank from pool map, give left things to rebuild. + */ + CHK__CHECK_INCONSIST_CLASS__CIC_ENGINE_HAS_NO_STORAGE = 10, + /* + * Containers that have storage allocated on engine but does not exist in the PS. + * Default action: CIA_DISCARD. Destrory the unrecognized container. + */ + CHK__CHECK_INCONSIST_CLASS__CIC_CONT_NONEXIST_ON_PS = 11, + /* + * The container label recorded by PS does not match the container label property. + * Default action: CIA_TRUST_PS. Refresh label property on related target(s). + */ + CHK__CHECK_INCONSIST_CLASS__CIC_CONT_BAD_LABEL = 12, + /* + * The DTX is corrupted, some participant RDG(s) may be lost. + * Default action: CIA_INTERACT. + */ + CHK__CHECK_INCONSIST_CLASS__CIC_DTX_CORRUPTED = 13, + /* + * The DTX entry on leader does not exist, then not sure the status. + * Default action: CIA_DISCARD. It is equal to abort the DTX and may lost data on related + * shard, then we may found data inconsistency in subseqeunt CR scan phase, at that time, + * such data inconsistency will be fixed. + */ + CHK__CHECK_INCONSIST_CLASS__CIC_DTX_ORPHAN = 14, + /* + * The checksum information is lost. + * Default action: CIA_READD. We have to trust the data and recalculate the checksum. If + * data is corrupted, then we may hit data inconsistency in subseqeunt CR scan phase, at + * that time, such data inconsistency will be fixed. + */ + CHK__CHECK_INCONSIST_CLASS__CIC_CSUM_LOST = 15, + /* + * Checksum related inconsistency or data corruption. + * Default action: CIA_DISCARD. Then we will hit data lost in subseqeunt CR scan phase, + * at that time, such data inconsistency will be fixed. + */ + CHK__CHECK_INCONSIST_CLASS__CIC_CSUM_FAILURE = 16, + /* + * Replicated object lost some replica(s). + * Default action: CIA_READD. Copy from another valid replica. + */ + CHK__CHECK_INCONSIST_CLASS__CIC_OBJ_LOST_REP = 17, + /* + * EC object lost parity or data shard(s). + * Default action: CIA_READD. Trust other available shards and recalculate the lost one(s). + */ + CHK__CHECK_INCONSIST_CLASS__CIC_OBJ_LOST_EC_SHARD = 18, + /* + * EC object lost too many shards that exceeds its redundancy. + * Default action: CIA_INTERACT. Ask the admin to decide whether keep or remove the object. + */ + CHK__CHECK_INCONSIST_CLASS__CIC_OBJ_LOST_EC_DATA = 19, + /* + * Data inconsistency among replicas + * Default action: CIA_TRUST_LATEST. Try to keep the latest data. If all have the same epoch, + * then ask the admin (CIA_INTERACT) to decide which one will be trusted. + */ + CHK__CHECK_INCONSIST_CLASS__CIC_OBJ_DATA_INCONSIST = 20, + /* + * Unknown inconsistency. + * Default action: CIA_IGNORE. + */ + CHK__CHECK_INCONSIST_CLASS__CIC_UNKNOWN = 100 + PROTOBUF_C__FORCE_ENUM_TO_BE_INT_SIZE(CHK__CHECK_INCONSIST_CLASS) +} Chk__CheckInconsistClass; +/* + * Actions for how to handle kinds of inconsistency. + */ +typedef enum _Chk__CheckInconsistAction { + /* + * Default action, depends on the detailed inconsistency class. + */ + CHK__CHECK_INCONSIST_ACTION__CIA_DEFAULT = 0, + /* + * Interact with administrator for further action. + */ + CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT = 1, + /* + * Ignore but log the inconsistency. + */ + CHK__CHECK_INCONSIST_ACTION__CIA_IGNORE = 2, + /* + * Discard the unrecognized element: pool service, pool itself, container, and so on. + */ + CHK__CHECK_INCONSIST_ACTION__CIA_DISCARD = 3, + /* + * Re-add the missing element: pool to MS, target to pool map, and so on. + */ + CHK__CHECK_INCONSIST_ACTION__CIA_READD = 4, + /* + * Trust the information recorded in MS DB. + */ + CHK__CHECK_INCONSIST_ACTION__CIA_TRUST_MS = 5, + /* + * Trust the information recorded in PS DB. + */ + CHK__CHECK_INCONSIST_ACTION__CIA_TRUST_PS = 6, + /* + * Trust the information recorded by target(s). + */ + CHK__CHECK_INCONSIST_ACTION__CIA_TRUST_TARGET = 7, + /* + * Trust the majority parts (if have). + */ + CHK__CHECK_INCONSIST_ACTION__CIA_TRUST_MAJORITY = 8, + /* + * Trust the one with latest (pool map or epoch) information. Keep the latest data. + */ + CHK__CHECK_INCONSIST_ACTION__CIA_TRUST_LATEST = 9, + /* + * Trust the one with oldest (pool map or epoch) information. Rollback to old version. + */ + CHK__CHECK_INCONSIST_ACTION__CIA_TRUST_OLDEST = 10, + /* + * Trust EC parity shard. + */ + CHK__CHECK_INCONSIST_ACTION__CIA_TRUST_EC_PARITY = 11, + /* + * Trust EC data shard. + */ + CHK__CHECK_INCONSIST_ACTION__CIA_TRUST_EC_DATA = 12 + PROTOBUF_C__FORCE_ENUM_TO_BE_INT_SIZE(CHK__CHECK_INCONSIST_ACTION) +} Chk__CheckInconsistAction; +/* + * The flags to control DAOS check general behavior, not related with any detailed inconsistency. + */ +typedef enum _Chk__CheckFlag { + CHK__CHECK_FLAG__CF_NONE = 0, + /* + * Only scan without real repairing inconsistency. + */ + CHK__CHECK_FLAG__CF_DRYRUN = 1, + /* + * Start DAOS check from the beginning. + * Otherwise, resume the DAOS check from the latest checkpoint by default. + */ + CHK__CHECK_FLAG__CF_RESET = 2, + /* + * Stop DAOS check if hit unknown inconsistency or fail to repair some inconsistency. + * Otherwise, mark 'fail' on related component and continue to handle next one by default. + */ + CHK__CHECK_FLAG__CF_FAILOUT = 4, + /* + * If the admin does not want to interact with engine during check scan, then CIA_INTERACT + * will be converted to CIA_IGNORE. That will overwrite the CheckInconsistPolicy. + */ + CHK__CHECK_FLAG__CF_AUTO = 8, + /* + * Handle orphan pool when start the check instance. If not specify the flag, some orphan + * pool(s) may be not handled (by default) unless all pools are checked from the scratch. + */ + CHK__CHECK_FLAG__CF_ORPHAN_POOL = 16, + /* + * Overwrite former set CF_FAILOUT flag, cannot be specified together with CF_FAILOUT. + */ + CHK__CHECK_FLAG__CF_NO_FAILOUT = 32, + /* + * Overwrite former set CF_AUTO flag, cannot be specified together with CF_AUTO. + */ + CHK__CHECK_FLAG__CF_NO_AUTO = 64 + PROTOBUF_C__FORCE_ENUM_TO_BE_INT_SIZE(CHK__CHECK_FLAG) +} Chk__CheckFlag; +/* + * The status of DAOS check instance. + */ +typedef enum _Chk__CheckInstStatus { + /* + * DAOS check has never been run. + */ + CHK__CHECK_INST_STATUS__CIS_INIT = 0, + /* + * DAOS check is still in process. + */ + CHK__CHECK_INST_STATUS__CIS_RUNNING = 1, + /* + * All passes have been done for all required pools. + */ + CHK__CHECK_INST_STATUS__CIS_COMPLETED = 2, + /* + * DAOS check has been explicitly stopped, do not allow to rejoin. + */ + CHK__CHECK_INST_STATUS__CIS_STOPPED = 3, + /* + * DAOS check auto stopped for some unrecoverable failure, do not rejoin. + */ + CHK__CHECK_INST_STATUS__CIS_FAILED = 4, + /* + * DAOS check has been paused because engine exit, allow to rejoin. + */ + CHK__CHECK_INST_STATUS__CIS_PAUSED = 5, + /* + * Check on the engine exit for other engine failure, do not rejoin. + */ + CHK__CHECK_INST_STATUS__CIS_IMPLICATED = 6 + PROTOBUF_C__FORCE_ENUM_TO_BE_INT_SIZE(CHK__CHECK_INST_STATUS) +} Chk__CheckInstStatus; +/* + * The pool status for DAOS check. + */ +typedef enum _Chk__CheckPoolStatus { + /* + * DAOS check has not started against this pool. + */ + CHK__CHECK_POOL_STATUS__CPS_UNCHECKED = 0, + /* + * The pool is being checked. + */ + CHK__CHECK_POOL_STATUS__CPS_CHECKING = 1, + /* + * DAOS check has successfully completed all the passes on this pool. + */ + CHK__CHECK_POOL_STATUS__CPS_CHECKED = 2, + /* + * DAOS check could not be completed due to some unrecoverable failure. + */ + CHK__CHECK_POOL_STATUS__CPS_FAILED = 3, + /* + * Checking the pool has been paused because engine exit. + */ + CHK__CHECK_POOL_STATUS__CPS_PAUSED = 4, + /* + * Waiting for the decision from the admin. + */ + CHK__CHECK_POOL_STATUS__CPS_PENDING = 5, + /* + * DAOS check on the pool has been stopped explicitly. + */ + CHK__CHECK_POOL_STATUS__CPS_STOPPED = 6, + /* + * Check on the pool is stopped because of other pool or engine failure. + */ + CHK__CHECK_POOL_STATUS__CPS_IMPLICATED = 7 + PROTOBUF_C__FORCE_ENUM_TO_BE_INT_SIZE(CHK__CHECK_POOL_STATUS) +} Chk__CheckPoolStatus; +/* + * DAOS check engine scan phases. + */ +typedef enum _Chk__CheckScanPhase { + /* + * Initial phase, prepare to start check on related engines. + */ + CHK__CHECK_SCAN_PHASE__CSP_PREPARE = 0, + /* + * Pool list consolidation. + */ + CHK__CHECK_SCAN_PHASE__CSP_POOL_LIST = 1, + /* + * Pool membership. + */ + CHK__CHECK_SCAN_PHASE__CSP_POOL_MBS = 2, + /* + * Pool cleanup. + */ + CHK__CHECK_SCAN_PHASE__CSP_POOL_CLEANUP = 3, + /* + * Container list consolidation. + */ + CHK__CHECK_SCAN_PHASE__CSP_CONT_LIST = 4, + /* + * Container cleanup. + */ + CHK__CHECK_SCAN_PHASE__CSP_CONT_CLEANUP = 5, + /* + * DTX resync and cleanup. + */ + CHK__CHECK_SCAN_PHASE__CSP_DTX_RESYNC = 6, + /* + * RP/EC shards consistency verification with checksum scrub if have. + */ + CHK__CHECK_SCAN_PHASE__CSP_OBJ_SCRUB = 7, + /* + * Object rebuild. + */ + CHK__CHECK_SCAN_PHASE__CSP_REBUILD = 8, + /* + * EC aggregation & VOS aggregation. + */ + CHK__CHECK_SCAN_PHASE__CSP_AGGREGATION = 9, + /* + * All done. + */ + CHK__CHECK_SCAN_PHASE__CSP_DONE = 10 + PROTOBUF_C__FORCE_ENUM_TO_BE_INT_SIZE(CHK__CHECK_SCAN_PHASE) +} Chk__CheckScanPhase; + +/* --- messages --- */ + +/* + * DAOS check engine reports the found inconsistency and repair result to control plane. + * If the repair action is CIA_INTERACT, then the control plane will reply current dRPC + * firstly, and then interact with the admin for the repair decision in another section + * and tell DAOS check engine via another DRPC_METHOD_MGMT_CHK_ACT dRPC call. + * If the CheckReport::msg is not enough to help admin to make the decision, then we + * may have to leverage DAOS debug tools to dump more information from related target. + */ +struct _Chk__CheckReport +{ + ProtobufCMessage base; + /* + * DAOS Check event sequence, unique for the instance. + */ + uint64_t seq; + /* + * Inconsistency class + */ + Chk__CheckInconsistClass class_; + /* + * The action taken to repair the inconsistency + */ + Chk__CheckInconsistAction action; + /* + * Repair result: zero is for repaired successfully. + * negative value if failed to repair. + * positive value is for CIA_IGNORE or dryrun mode. + * It is meaningless if the action is CIA_INTERACT. + */ + int32_t result; + /* + * Inconsistency happened on which rank if applicable. + */ + uint32_t rank; + /* + * Inconsistency happened on which target in the rank if applicable. + */ + uint32_t target; + /* + * The consistency is in which pool if applicable. + */ + char *pool_uuid; + /* + * The pool label, if available. + */ + char *pool_label; + /* + * The consistency is in which container if applicable. + */ + char *cont_uuid; + /* + * The container label, if available. + */ + char *cont_label; + /* + * The consistency is in which object if applicable. + */ + char *objid; + /* + * The consistency is in which dkey if applicable. + */ + char *dkey; + /* + * The consistency is in which akey if applicable. + */ + char *akey; + /* + * The time of report (and repair) the inconsistency. + */ + char *timestamp; + /* + * Information to describe the inconsistency in detail. + */ + char *msg; + /* + * Interactive mode options (first is suggested). + */ + size_t n_act_choices; + Chk__CheckInconsistAction *act_choices; + /* + * Details for each potential action (length should match actions). + */ + size_t n_act_details; + char **act_details; + /* + * Formatted messages containing details for each action choice. + */ + size_t n_act_msgs; + char **act_msgs; +}; +#define CHK__CHECK_REPORT__INIT \ + { PROTOBUF_C_MESSAGE_INIT (&chk__check_report__descriptor) \ + , 0, CHK__CHECK_INCONSIST_CLASS__CIC_NONE, CHK__CHECK_INCONSIST_ACTION__CIA_DEFAULT, 0, 0, 0, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, 0,NULL, 0,NULL, 0,NULL } + + +/* Chk__CheckReport methods */ +void chk__check_report__init + (Chk__CheckReport *message); +size_t chk__check_report__get_packed_size + (const Chk__CheckReport *message); +size_t chk__check_report__pack + (const Chk__CheckReport *message, + uint8_t *out); +size_t chk__check_report__pack_to_buffer + (const Chk__CheckReport *message, + ProtobufCBuffer *buffer); +Chk__CheckReport * + chk__check_report__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data); +void chk__check_report__free_unpacked + (Chk__CheckReport *message, + ProtobufCAllocator *allocator); +/* --- per-message closures --- */ + +typedef void (*Chk__CheckReport_Closure) + (const Chk__CheckReport *message, + void *closure_data); + +/* --- services --- */ + + +/* --- descriptors --- */ + +extern const ProtobufCEnumDescriptor chk__check_inconsist_class__descriptor; +extern const ProtobufCEnumDescriptor chk__check_inconsist_action__descriptor; +extern const ProtobufCEnumDescriptor chk__check_flag__descriptor; +extern const ProtobufCEnumDescriptor chk__check_inst_status__descriptor; +extern const ProtobufCEnumDescriptor chk__check_pool_status__descriptor; +extern const ProtobufCEnumDescriptor chk__check_scan_phase__descriptor; +extern const ProtobufCMessageDescriptor chk__check_report__descriptor; + +PROTOBUF_C__END_DECLS + + +#endif /* PROTOBUF_C_chk_2eproto__INCLUDED */ diff --git a/src/chk/chk_common.c b/src/chk/chk_common.c new file mode 100644 index 00000000000..fda4efc9973 --- /dev/null +++ b/src/chk/chk_common.c @@ -0,0 +1,1288 @@ +/** + * (C) Copyright 2022-2023 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ + +#define D_LOGFAC DD_FAC(chk) + +#include +#include +#include +#include +#include +#include +#include + +#include "chk.pb-c.h" +#include "chk_internal.h" + +struct chk_pool_bundle { + d_list_t *cpb_head; + uuid_t cpb_uuid; + uint32_t *cpb_shard_nr; + d_rank_t cpb_rank; + struct chk_instance *cpb_ins; + /* Pointer to the pool bookmark. */ + struct chk_bookmark *cpb_bk; + void *cpb_data; + chk_pool_free_data_t cpb_free_cb; +}; + +static int +chk_pool_hkey_size(void) +{ + return sizeof(uuid_t); +} + +static void +chk_pool_hkey_gen(struct btr_instance *tins, d_iov_t *key_iov, void *hkey) +{ + D_ASSERT(key_iov->iov_len == sizeof(uuid_t)); + + memcpy(hkey, key_iov->iov_buf, key_iov->iov_len); +} + +static int +chk_pool_alloc(struct btr_instance *tins, d_iov_t *key_iov, d_iov_t *val_iov, + struct btr_record *rec, d_iov_t *val_out) +{ + struct chk_pool_bundle *cpb = val_iov->iov_buf; + struct chk_pool_rec *cpr = NULL; + struct chk_pool_shard *cps = NULL; + int rc = 0; + + D_ASSERT(cpb != NULL); + + D_ALLOC_PTR(cpr); + if (cpr == NULL) + D_GOTO(out, rc = -DER_NOMEM); + + if (cpb->cpb_data != NULL) { + D_ALLOC_PTR(cps); + if (cps == NULL) + D_GOTO(out, rc = -DER_NOMEM); + } + + rc = ABT_mutex_create(&cpr->cpr_mutex); + if (rc != 0) + D_GOTO(out, rc = dss_abterr2der(rc)); + + rc = ABT_cond_create(&cpr->cpr_cond); + if (rc != 0) + D_GOTO(out, rc = dss_abterr2der(rc)); + + D_INIT_LIST_HEAD(&cpr->cpr_shutdown_link); + D_INIT_LIST_HEAD(&cpr->cpr_shard_list); + D_INIT_LIST_HEAD(&cpr->cpr_pending_list); + cpr->cpr_refs = 1; + uuid_copy(cpr->cpr_uuid, cpb->cpb_uuid); + cpr->cpr_thread = ABT_THREAD_NULL; + if (cpb->cpb_bk != NULL) + memcpy(&cpr->cpr_bk, cpb->cpb_bk, sizeof(cpr->cpr_bk)); + cpr->cpr_ins = cpb->cpb_ins; + + rec->rec_off = umem_ptr2off(&tins->ti_umm, cpr); + d_list_add_tail(&cpr->cpr_link, cpb->cpb_head); + + if (cps != NULL) { + cps->cps_rank = cpb->cpb_rank; + cps->cps_data = cpb->cpb_data; + cps->cps_free_cb = cpb->cpb_free_cb; + + d_list_add_tail(&cps->cps_link, &cpr->cpr_shard_list); + cpr->cpr_shard_nr++; + if (cpb->cpb_shard_nr != NULL) + (*cpb->cpb_shard_nr)++; + } + + d_iov_set(val_out, cpr, sizeof(*cpr)); + +out: + if (rc != 0 && cpr != NULL) { + if (cpr->cpr_mutex != ABT_MUTEX_NULL) + ABT_mutex_free(&cpr->cpr_mutex); + if (cpr->cpr_cond != ABT_COND_NULL) + ABT_cond_free(&cpr->cpr_cond); + D_FREE(cps); + D_FREE(cpr); + } + + return rc; +} + +static int +chk_pool_free(struct btr_instance *tins, struct btr_record *rec, void *args) +{ + struct chk_pool_rec *cpr = umem_off2ptr(&tins->ti_umm, rec->rec_off); + d_iov_t *val_iov = args; + + rec->rec_off = UMOFF_NULL; + if (val_iov != 0) + d_iov_set(val_iov, cpr, sizeof(*cpr)); + else + chk_pool_put(cpr); + + return 0; +} + +static int +chk_pool_fetch(struct btr_instance *tins, struct btr_record *rec, + d_iov_t *key_iov, d_iov_t *val_iov) +{ + struct chk_pool_rec *cpr; + + D_ASSERT(val_iov != NULL); + + cpr = umem_off2ptr(&tins->ti_umm, rec->rec_off); + d_iov_set(val_iov, cpr, sizeof(*cpr)); + + return 0; +} + +static int +chk_pool_update(struct btr_instance *tins, struct btr_record *rec, + d_iov_t *key, d_iov_t *val, d_iov_t *val_out) +{ + struct chk_pool_bundle *cpb = val->iov_buf; + struct chk_pool_rec *cpr = umem_off2ptr(&tins->ti_umm, rec->rec_off); + struct chk_pool_shard *cps; + int rc = 0; + + D_ASSERT(cpb != NULL); + D_ASSERT(cpb->cpb_data != NULL); + + D_ALLOC_PTR(cps); + if (cps == NULL) + D_GOTO(out, rc = -DER_NOMEM); + + cps->cps_rank = cpb->cpb_rank; + cps->cps_data = cpb->cpb_data; + cps->cps_free_cb = cpb->cpb_free_cb; + + d_list_add_tail(&cps->cps_link, &cpr->cpr_shard_list); + cpr->cpr_shard_nr++; + if (cpb->cpb_shard_nr != NULL) + (*cpb->cpb_shard_nr)++; + + d_iov_set(val_out, cpr, sizeof(*cpr)); + +out: + return rc; +} + +btr_ops_t chk_pool_ops = { + .to_hkey_size = chk_pool_hkey_size, + .to_hkey_gen = chk_pool_hkey_gen, + .to_rec_alloc = chk_pool_alloc, + .to_rec_free = chk_pool_free, + .to_rec_fetch = chk_pool_fetch, + .to_rec_update = chk_pool_update, +}; + +struct chk_pending_bundle { + d_list_t *cpb_pool_head; + d_list_t *cpb_rank_head; + d_rank_t cpb_rank; + uuid_t cpb_uuid; + uint32_t cpb_class; + uint64_t cpb_seq; +}; + +static int +chk_pending_hkey_size(void) +{ + return sizeof(uint64_t); +} + +static void +chk_pending_hkey_gen(struct btr_instance *tins, d_iov_t *key_iov, void *hkey) +{ + D_ASSERT(key_iov->iov_len == sizeof(uint64_t)); + + memcpy(hkey, key_iov->iov_buf, key_iov->iov_len); +} + +static int +chk_pending_alloc(struct btr_instance *tins, d_iov_t *key_iov, d_iov_t *val_iov, + struct btr_record *rec, d_iov_t *val_out) +{ + struct chk_pending_bundle *cpb = val_iov->iov_buf; + struct chk_pending_rec *cpr = NULL; + int rc = 0; + + D_ASSERT(cpb != NULL); + D_ASSERT(val_out != NULL); + + D_ALLOC_PTR(cpr); + if (cpr == NULL) + D_GOTO(out, rc = -DER_NOMEM); + + rc = ABT_mutex_create(&cpr->cpr_mutex); + if (rc != 0) + D_GOTO(out, rc = dss_abterr2der(rc)); + + rc = ABT_cond_create(&cpr->cpr_cond); + if (rc != 0) + D_GOTO(out, rc = dss_abterr2der(rc)); + + uuid_copy(cpr->cpr_uuid, cpb->cpb_uuid); + cpr->cpr_seq = cpb->cpb_seq; + cpr->cpr_rank = cpb->cpb_rank; + cpr->cpr_class = cpb->cpb_class; + cpr->cpr_action = CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT; + + if (cpb->cpb_rank_head != NULL) + d_list_add_tail(&cpr->cpr_rank_link, cpb->cpb_rank_head); + else + D_INIT_LIST_HEAD(&cpr->cpr_rank_link); + + rec->rec_off = umem_ptr2off(&tins->ti_umm, cpr); + d_list_add_tail(&cpr->cpr_pool_link, cpb->cpb_pool_head); + + d_iov_set(val_out, cpr, sizeof(*cpr)); + +out: + if (rc != 0) { + if (cpr != NULL) { + if (cpr->cpr_mutex != ABT_MUTEX_NULL) + ABT_mutex_free(&cpr->cpr_mutex); + if (cpr->cpr_cond != ABT_COND_NULL) + ABT_cond_free(&cpr->cpr_cond); + D_FREE(cpr); + } + } + + return rc; +} + +static int +chk_pending_free(struct btr_instance *tins, struct btr_record *rec, void *args) +{ + struct chk_pending_rec *cpr = umem_off2ptr(&tins->ti_umm, rec->rec_off); + d_iov_t *val_iov = args; + + rec->rec_off = UMOFF_NULL; + d_list_del_init(&cpr->cpr_pool_link); + d_list_del_init(&cpr->cpr_rank_link); + + if (val_iov != NULL) { + d_iov_set(val_iov, cpr, sizeof(*cpr)); + } else { + ABT_mutex_lock(cpr->cpr_mutex); + if (cpr->cpr_busy) { + cpr->cpr_exiting = 1; + ABT_cond_broadcast(cpr->cpr_cond); + ABT_mutex_unlock(cpr->cpr_mutex); + } else { + ABT_mutex_unlock(cpr->cpr_mutex); + chk_pending_destroy(cpr); + } + } + + return 0; +} + +static int +chk_pending_fetch(struct btr_instance *tins, struct btr_record *rec, + d_iov_t *key_iov, d_iov_t *val_iov) +{ + struct chk_pending_rec *cpr; + + D_ASSERT(val_iov != NULL); + + cpr = umem_off2ptr(&tins->ti_umm, rec->rec_off); + d_iov_set(val_iov, cpr, sizeof(*cpr)); + + return 0; +} + +static int +chk_pending_update(struct btr_instance *tins, struct btr_record *rec, + d_iov_t *key, d_iov_t *val, d_iov_t *val_out) +{ + struct chk_pending_rec *cpr = umem_off2ptr(&tins->ti_umm, rec->rec_off); + + D_WARN("The interaction for pool "DF_UUIDF" with inconsistency %u hit sequence conflict " + DF_X64", need retry\n", DP_UUID(cpr->cpr_uuid), cpr->cpr_class, cpr->cpr_seq); + + return -DER_AGAIN; +} + +btr_ops_t chk_pending_ops = { + .to_hkey_size = chk_pending_hkey_size, + .to_hkey_gen = chk_pending_hkey_gen, + .to_rec_alloc = chk_pending_alloc, + .to_rec_free = chk_pending_free, + .to_rec_fetch = chk_pending_fetch, + .to_rec_update = chk_pending_update, +}; + +void +chk_ranks_dump(uint32_t rank_nr, d_rank_t *ranks) +{ + char buf[80]; + char *ptr = buf; + int rc; + int i; + + if (unlikely(rank_nr == 0)) + return; + + D_INFO("Ranks List:\n"); + + while (rank_nr >= 8) { + D_INFO("%8u %8u %8u %8u %8u %8u %8u %8u\n", + ranks[0], ranks[1], ranks[2], ranks[3], + ranks[4], ranks[5], ranks[6], ranks[7]); + rank_nr -= 8; + ranks += 8; + } + + if (rank_nr > 0) { + rc = snprintf(ptr, 79, "%8u", ranks[0]); + D_ASSERT(rc > 0); + ptr += rc; + + for (i = 1; i < rank_nr; i++) { + rc = snprintf(ptr, 79 - 8 * i, " %8u", ranks[i]); + D_ASSERT(rc > 0); + ptr += rc; + } + + D_INFO("%s\n", buf); + } +} + +void +chk_pools_dump(d_list_t *head, int pool_nr, uuid_t pools[]) +{ + struct chk_pool_rec *cpr; + int i = 0; + + if (head != NULL && !d_list_empty(head)) { + D_INFO("Pools List:\n"); + d_list_for_each_entry(cpr, head, cpr_link) { + if (cpr->cpr_for_orphan) + D_INFO(DF_UUIDF" (for orphan/dangling)\n", DP_UUID(cpr->cpr_uuid)); + else + D_INFO(DF_UUIDF"\n", DP_UUID(cpr->cpr_uuid)); + } + } else if (pool_nr > 0) { + D_INFO("Pools List:\n"); + do { + D_INFO(DF_UUIDF"\n", DP_UUID(pools[i++])); + } while (i < pool_nr); + } else { + D_INFO("Pools List: all\n"); + } +} + +void +chk_pool_remove_nowait(struct chk_pool_rec *cpr) +{ + d_iov_t kiov; + int rc; + + cpr->cpr_skip = 1; + d_iov_set(&kiov, cpr->cpr_uuid, sizeof(uuid_t)); + rc = dbtree_delete(cpr->cpr_ins->ci_pool_hdl, BTR_PROBE_EQ, &kiov, NULL); + if (rc != 0 && rc != -DER_NONEXIST && rc != -DER_NO_HDL) + D_WARN("Failed to delete pool record: "DF_RC"\n", DP_RC(rc)); +} + +void +chk_pool_start_svc(struct chk_pool_rec *cpr, int *ret) +{ + int rc = 0; + + ABT_mutex_lock(cpr->cpr_mutex); + + if (!cpr->cpr_started) { + rc = ds_pool_start_with_svc(cpr->cpr_uuid); + if (rc == 0) + cpr->cpr_started = 1; + else + D_WARN("Cannot start (1) the pool for "DF_UUIDF" after check: "DF_RC"\n", + DP_UUID(cpr->cpr_uuid), DP_RC(rc)); + } + + if (cpr->cpr_started && !cpr->cpr_start_post) { + rc = ds_pool_chk_post(cpr->cpr_uuid); + if (rc != 0) { + D_WARN("Cannot post handle (1) pool start for " + DF_UUIDF" after check: "DF_RC"\n", + DP_UUID(cpr->cpr_uuid), DP_RC(rc)); + /* Failed to post handle pool start, have to stop it. */ + chk_pool_shutdown(cpr, true); + } else { + cpr->cpr_start_post = 1; + } + } + + ABT_mutex_unlock(cpr->cpr_mutex); + + if (ret != NULL) + *ret = rc; +} + +static void +chk_pool_wait(struct chk_pool_rec *cpr) +{ + struct chk_instance *ins = cpr->cpr_ins; + struct chk_pending_rec *pending; + struct chk_pending_rec *tmp; + + D_ASSERT(cpr->cpr_refs > 0); + /* + * The caller chk_pool_stop_one() must firstly delete the cpr from the pool + * tree, then stop it here. So chk_pool_wait() will not be called repeatedly. + */ + D_ASSERT(cpr->cpr_stop == 0); + + ABT_mutex_lock(cpr->cpr_mutex); + if (cpr->cpr_thread != ABT_THREAD_NULL) { + cpr->cpr_stop = 1; + ABT_cond_broadcast(cpr->cpr_cond); + ABT_mutex_unlock(cpr->cpr_mutex); + + /* Cleanup all pending records belong to this pool. */ + ABT_rwlock_wrlock(ins->ci_abt_lock); + d_list_for_each_entry_safe(pending, tmp, &cpr->cpr_pending_list, cpr_pool_link) + chk_pending_wakeup(ins, pending); + ABT_rwlock_unlock(ins->ci_abt_lock); + + /* Wait for related pool ULT to exit. */ + ABT_thread_free(&cpr->cpr_thread); + } else { + ABT_mutex_unlock(cpr->cpr_mutex); + } +} + +void +chk_pool_stop_one(struct chk_instance *ins, uuid_t uuid, int status, uint32_t phase, int *ret) +{ + struct chk_bookmark *cbk; + struct chk_pool_rec *cpr; + d_iov_t kiov; + d_iov_t riov; + char uuid_str[DAOS_UUID_STR_SIZE]; + int rc = 0; + + /* + * Remove the pool record from the tree firstly, that will cause related scan ULT + * for such pool to exit, and then can update the pool's bookmark without race. + */ + + d_iov_set(&riov, NULL, 0); + d_iov_set(&kiov, uuid, sizeof(uuid_t)); + rc = dbtree_delete(ins->ci_pool_hdl, BTR_PROBE_EQ, &kiov, &riov); + if (rc != 0) { + if (rc == -DER_NONEXIST || rc == -DER_NO_HDL) + rc = 0; + else + D_ERROR("%s on rank %u failed to delete pool record " + DF_UUIDF" with status %u, phase %u: "DF_RC"\n", + ins->ci_is_leader ? "leader" : "engine", dss_self_rank(), + DP_UUID(uuid), status, phase, DP_RC(rc)); + } else { + cpr = (struct chk_pool_rec *)riov.iov_buf; + cbk = &cpr->cpr_bk; + + chk_pool_wait(cpr); + + if ((cbk->cb_pool_status == CHK__CHECK_POOL_STATUS__CPS_CHECKING || + cbk->cb_pool_status == CHK__CHECK_POOL_STATUS__CPS_PENDING) && + !DAOS_FAIL_CHECK(DAOS_CHK_ENGINE_DEATH)) { + if (phase != CHK_INVAL_PHASE && phase > cbk->cb_phase) + cbk->cb_phase = phase; + cbk->cb_pool_status = status; + if (status == CHK__CHECK_POOL_STATUS__CPS_STOPPED) + ins->ci_pool_stopped = 1; + cbk->cb_time.ct_stop_time = time(NULL); + uuid_unparse_lower(uuid, uuid_str); + rc = chk_bk_update_pool(cbk, uuid_str); + } + + if (!ins->ci_is_leader && + (cpr->cpr_bk.cb_pool_status != CHK__CHECK_POOL_STATUS__CPS_CHECKED || + cpr->cpr_not_export_ps || DAOS_FAIL_CHECK(DAOS_CHK_ENGINE_DEATH))) + chk_pool_shutdown(cpr, false); + + /* Drop the reference that is held when create in chk_pool_alloc(). */ + chk_pool_put(cpr); + } + + if (ret != NULL) + *ret = rc; +} + +void +chk_pool_stop_all(struct chk_instance *ins, uint32_t status, int *ret) +{ + struct chk_pool_rec *cpr; + struct chk_pool_rec *tmp; + + /* + * Hold reference on each before stop one to guarantee that the next + * 'tmp' will not be unlinked from the list during stop current cpr. + */ + d_list_for_each_entry(cpr, &ins->ci_pool_list, cpr_link) + chk_pool_get(cpr); + + d_list_for_each_entry_safe(cpr, tmp, &ins->ci_pool_list, cpr_link) { + if (ret == NULL || *ret == 0) + chk_pool_stop_one(ins, cpr->cpr_uuid, status, CHK_INVAL_PHASE, ret); + chk_pool_put(cpr); + } +} + +int +chk_pools_pause_cb(struct sys_db *db, char *table, d_iov_t *key, void *args) +{ + struct chk_traverse_pools_args *ctpa = args; + char *uuid_str = key->iov_buf; + struct chk_bookmark cbk; + int rc = 0; + + if (!daos_is_valid_uuid_string(uuid_str)) + D_GOTO(out, rc = 0); + + rc = chk_bk_fetch_pool(&cbk, uuid_str); + if (rc != 0) + goto out; + + if (cbk.cb_magic != CHK_BK_MAGIC_POOL || cbk.cb_gen != ctpa->ctpa_gen) + goto out; + + if (cbk.cb_pool_status != CHK__CHECK_POOL_STATUS__CPS_CHECKING && + cbk.cb_pool_status != CHK__CHECK_POOL_STATUS__CPS_PENDING) + goto out; + + cbk.cb_pool_status = CHK__CHECK_POOL_STATUS__CPS_PAUSED; + cbk.cb_time.ct_stop_time = time(NULL); + rc = chk_bk_update_pool(&cbk, uuid_str); + +out: + return rc == -DER_NONEXIST ? 0 : rc; +} + +int +chk_pools_cleanup_cb(struct sys_db *db, char *table, d_iov_t *key, void *args) +{ + char *uuid_str = key->iov_buf; + struct chk_bookmark cbk; + int rc = 0; + + if (!daos_is_valid_uuid_string(uuid_str)) + D_GOTO(out, rc = 0); + + rc = chk_bk_fetch_pool(&cbk, uuid_str); + if (rc == 0) + rc = chk_bk_delete_pool(uuid_str); + +out: + return rc == -DER_NONEXIST ? 0 : rc; +} + +int +chk_pool_start_one(struct chk_instance *ins, uuid_t uuid, uint64_t gen) +{ + struct chk_bookmark cbk = { 0 }; + char uuid_str[DAOS_UUID_STR_SIZE]; + int rc; + + uuid_unparse_lower(uuid, uuid_str); + rc = chk_bk_fetch_pool(&cbk, uuid_str); + if (rc != 0 && rc != -DER_NONEXIST) + goto out; + + if (cbk.cb_magic != CHK_BK_MAGIC_POOL) { + memset(&cbk, 0, sizeof(cbk)); + cbk.cb_magic = CHK_BK_MAGIC_POOL; + cbk.cb_version = DAOS_CHK_VERSION; + cbk.cb_phase = CHK__CHECK_SCAN_PHASE__CSP_PREPARE; + } + + cbk.cb_gen = gen; + rc = chk_pool_add_shard(ins->ci_pool_hdl, &ins->ci_pool_list, uuid, + dss_self_rank(), &cbk, ins, NULL, NULL, NULL, NULL); + +out: + return rc; +} + +int +chk_pools_load_list(struct chk_instance *ins, uint64_t gen, uint32_t flags, + int pool_nr, uuid_t pools[], uint32_t *phase) +{ + struct chk_bookmark cbk; + char uuid_str[DAOS_UUID_STR_SIZE]; + d_rank_t myrank = dss_self_rank(); + int i; + int rc = 0; + + for (i = 0; i < pool_nr; i++) { + if (!ins->ci_is_leader) { + rc = ds_mgmt_pool_exist(pools[i]); + /* "rc == 0" means non-exist, skip it. */ + if (rc == 0) + continue; + if (rc < 0) + break; + } + + uuid_unparse_lower(pools[i], uuid_str); + rc = chk_bk_fetch_pool(&cbk, uuid_str); + if (rc != 0 && rc != -DER_NONEXIST) + break; + + if (rc == -DER_NONEXIST || flags & CHK__CHECK_FLAG__CF_RESET) { + memset(&cbk, 0, sizeof(cbk)); + cbk.cb_magic = CHK_BK_MAGIC_POOL; + cbk.cb_version = DAOS_CHK_VERSION; + cbk.cb_phase = CHK__CHECK_SCAN_PHASE__CSP_PREPARE; + cbk.cb_pool_status = CHK__CHECK_POOL_STATUS__CPS_UNCHECKED; + } + + /* + * For dryrun mode, restart from the scratch since we did not trace and repair + * former inconsistency. + */ + if (rc == 0 && ins->ci_start_flags & CSF_RESET_NONCOMP && + cbk.cb_phase != CHK__CHECK_SCAN_PHASE__CSP_DONE) { + memset(&cbk, 0, sizeof(cbk)); + cbk.cb_magic = CHK_BK_MAGIC_POOL; + cbk.cb_version = DAOS_CHK_VERSION; + cbk.cb_phase = CHK__CHECK_SCAN_PHASE__CSP_PREPARE; + cbk.cb_pool_status = CHK__CHECK_POOL_STATUS__CPS_UNCHECKED; + } + + /* + * NOTE: For check leader, if the specified pool has been checked, then do not load + * it for current check instance. But for check engine, if the pool is in the + * check list, then load it even if its former check has completed, otherwise, + * it may be handled as dangling pool. + */ + + if (rc == 0 && cbk.cb_phase == CHK__CHECK_SCAN_PHASE__CSP_DONE && ins->ci_is_leader) + continue; + + /* + * Here, we only update the pool bookmark in DRAM, the caller will store the update + * persistently sometime later. + */ + cbk.cb_gen = gen; + rc = chk_pool_add_shard(ins->ci_pool_hdl, &ins->ci_pool_list, pools[i], + myrank, &cbk, ins, NULL, NULL, NULL, NULL); + if (rc != 0) + break; + + if (phase != NULL && cbk.cb_phase < *phase) + *phase = cbk.cb_phase; + } + + return rc; +} + +int +chk_pools_load_from_db(struct sys_db *db, char *table, d_iov_t *key, void *args) +{ + struct chk_traverse_pools_args *ctpa = args; + struct chk_instance *ins = ctpa->ctpa_ins; + char *uuid_str = key->iov_buf; + uuid_t uuid; + struct chk_bookmark cbk; + int rc = 0; + + if (!daos_is_valid_uuid_string(uuid_str)) + D_GOTO(out, rc = 0); + + rc = chk_bk_fetch_pool(&cbk, uuid_str); + if (rc != 0) + goto out; + + if (cbk.cb_phase == CHK__CHECK_SCAN_PHASE__CSP_DONE) + goto out; + + uuid_parse(uuid_str, uuid); + + if (!ins->ci_is_leader) { + rc = ds_mgmt_pool_exist(uuid); + /* "rc == 0" means non-exist, skip it. */ + if (rc <= 0) + goto out; + } + + /* + * For dryrun mode, restart from the scratch since we did not trace and repair + * former inconsistency. + */ + if (ins->ci_start_flags & CSF_RESET_NONCOMP) { + memset(&cbk, 0, sizeof(cbk)); + cbk.cb_magic = CHK_BK_MAGIC_POOL; + cbk.cb_version = DAOS_CHK_VERSION; + cbk.cb_phase = CHK__CHECK_SCAN_PHASE__CSP_PREPARE; + cbk.cb_pool_status = CHK__CHECK_POOL_STATUS__CPS_UNCHECKED; + } + + /* + * Here, we only update the pool bookmark in DRAM, the caller will store the update + * persistently sometime later. + */ + cbk.cb_gen = ctpa->ctpa_gen; + rc = chk_pool_add_shard(ins->ci_pool_hdl, &ins->ci_pool_list, uuid, + dss_self_rank(), &cbk, ins, NULL, NULL, NULL, NULL); + if (rc == 0 && ctpa->ctpa_phase > cbk.cb_phase) + ctpa->ctpa_phase = cbk.cb_phase; + +out: + return rc; +} + +int +chk_pools_update_bk(struct chk_instance *ins, uint32_t phase) +{ + struct chk_bookmark *cbk; + struct chk_pool_rec *cpr; + struct chk_pool_rec *tmp; + char uuid_str[DAOS_UUID_STR_SIZE]; + int rc = 0; + int rc1; + + /* + * Hold reference on each before update to guarantee that the next 'tmp' + * will not be unlinked from the list during current 'cpr' update. + */ + d_list_for_each_entry(cpr, &ins->ci_pool_list, cpr_link) + chk_pool_get(cpr); + + d_list_for_each_entry_safe(cpr, tmp, &ins->ci_pool_list, cpr_link) { + cbk = &cpr->cpr_bk; + if (cbk->cb_phase < phase && + cbk->cb_pool_status == CHK__CHECK_POOL_STATUS__CPS_CHECKING) { + cbk->cb_phase = phase; + uuid_unparse_lower(cpr->cpr_uuid, uuid_str); + rc1 = chk_bk_update_pool(cbk, uuid_str); + if (rc1 != 0) + rc = rc1; + } + chk_pool_put(cpr); + } + + return rc; +} + +int +chk_pool_handle_notify(struct chk_instance *ins, struct chk_iv *iv) +{ + struct chk_pool_rec *cpr = NULL; + struct chk_bookmark *cbk; + d_iov_t kiov; + d_iov_t riov; + char uuid_str[DAOS_UUID_STR_SIZE]; + int rc = 0; + + d_iov_set(&riov, NULL, 0); + d_iov_set(&kiov, iv->ci_uuid, sizeof(uuid_t)); + rc = dbtree_lookup(ins->ci_pool_hdl, &kiov, &riov); + if (rc != 0) { + if (rc == -DER_NONEXIST || rc == -DER_NO_HDL) + rc = -DER_NOTAPPLICABLE; + + D_GOTO(out, rc); + } + + cpr = (struct chk_pool_rec *)riov.iov_buf; + chk_pool_get(cpr); + cbk = &cpr->cpr_bk; + + if (cpr->cpr_stop || unlikely(iv->ci_phase < cbk->cb_phase)) + D_GOTO(out, rc = -DER_NOTAPPLICABLE); + + if (cpr->cpr_done) + goto out; + + if (iv->ci_pool_status == CHK__CHECK_POOL_STATUS__CPS_CHECKED) { + cpr->cpr_done = 1; + if (iv->ci_pool_destroyed) { + cpr->cpr_destroyed = 1; + cpr->cpr_not_export_ps = 1; + } + } else if (iv->ci_pool_status == CHK__CHECK_POOL_STATUS__CPS_FAILED || + iv->ci_pool_status == CHK__CHECK_POOL_STATUS__CPS_IMPLICATED) { + cpr->cpr_skip = 1; + if (cpr->cpr_thread == ABT_THREAD_NULL) + cpr->cpr_done = 1; + } else if (iv->ci_pool_status != CHK__CHECK_POOL_STATUS__CPS_CHECKING) { + D_GOTO(out, rc = -DER_NOTAPPLICABLE); + } + + if (!ins->ci_is_leader && !cpr->cpr_destroyed && cpr->cpr_done) { + if (iv->ci_pool_status == CHK__CHECK_POOL_STATUS__CPS_CHECKED && + !cpr->cpr_not_export_ps) { + chk_pool_start_svc(cpr, NULL); + } else if (ins->ci_sched_running && !ins->ci_sched_exiting) { + chk_pool_get(cpr); + d_list_add_tail(&cpr->cpr_shutdown_link, &ins->ci_pool_shutdown_list); + } + } + + if (iv->ci_phase != cbk->cb_phase || iv->ci_pool_status != cbk->cb_pool_status || + cpr->cpr_destroyed) { + cbk->cb_phase = iv->ci_phase; + cbk->cb_pool_status = iv->ci_pool_status; + uuid_unparse_lower(cpr->cpr_uuid, uuid_str); + rc = chk_bk_update_pool(cbk, uuid_str); + } + +out: + if (cpr != NULL) + chk_pool_put(cpr); + + return rc; +} + +int +chk_pool_add_shard(daos_handle_t hdl, d_list_t *head, uuid_t uuid, d_rank_t rank, + struct chk_bookmark *bk, struct chk_instance *ins, + uint32_t *shard_nr, void *data, chk_pool_free_data_t free_cb, + struct chk_pool_rec **cpr) +{ + struct chk_pool_bundle rbund; + d_iov_t kiov; + d_iov_t riov; + d_iov_t viov; + int rc; + + rbund.cpb_head = head; + rbund.cpb_shard_nr = shard_nr; + uuid_copy(rbund.cpb_uuid, uuid); + rbund.cpb_rank = rank; + rbund.cpb_bk = bk; + rbund.cpb_ins = ins; + rbund.cpb_data = data; + rbund.cpb_free_cb = free_cb; + + d_iov_set(&riov, &rbund, sizeof(rbund)); + d_iov_set(&kiov, uuid, sizeof(uuid_t)); + d_iov_set(&viov, NULL, 0); + rc = dbtree_upsert(hdl, BTR_PROBE_EQ, DAOS_INTENT_UPDATE, &kiov, &riov, &viov); + if (rc == 0 && cpr != NULL) + *cpr = (struct chk_pool_rec *)viov.iov_buf; + + D_CDEBUG(rc != 0, DLOG_ERR, DLOG_DBG, + "Add pool shard "DF_UUIDF" for rank %u: "DF_RC"\n", + DP_UUID(uuid), rank, DP_RC(rc)); + + return rc; +} + +void +chk_pool_shard_cleanup(struct chk_instance *ins) +{ + struct chk_pool_rec *cpr; + struct chk_pool_shard *cps; + + d_list_for_each_entry(cpr, &ins->ci_pool_list, cpr_link) { + D_ASSERT(cpr->cpr_thread == ABT_THREAD_NULL); + D_ASSERT(d_list_empty(&cpr->cpr_pending_list)); + D_ASSERT(cpr->cpr_mbs == NULL); + + while ((cps = d_list_pop_entry(&cpr->cpr_shard_list, struct chk_pool_shard, + cps_link)) != NULL) { + if (cps->cps_free_cb != NULL) + cps->cps_free_cb(cps->cps_data); + else + D_FREE(cps->cps_data); + D_FREE(cps); + } + + cpr->cpr_shard_nr = 0; + } +} + +int +chk_pending_add(struct chk_instance *ins, d_list_t *pool_head, d_list_t *rank_head, uuid_t uuid, + uint64_t seq, uint32_t rank, uint32_t cla, struct chk_pending_rec **cpr) +{ + struct chk_pending_bundle rbund; + d_iov_t kiov; + d_iov_t riov; + d_iov_t viov; + int rc; + + D_ASSERT(cpr != NULL); + + uuid_copy(rbund.cpb_uuid, uuid); + rbund.cpb_pool_head = pool_head; + rbund.cpb_rank_head = rank_head; + rbund.cpb_seq = seq; + rbund.cpb_rank = rank; + rbund.cpb_class = cla; + + d_iov_set(&viov, NULL, 0); + d_iov_set(&riov, &rbund, sizeof(rbund)); + d_iov_set(&kiov, &seq, sizeof(seq)); + + /* The access may from multiple XS (on check engine), so taking the lock firstly. */ + ABT_rwlock_wrlock(ins->ci_abt_lock); + rc = dbtree_upsert(ins->ci_pending_hdl, BTR_PROBE_EQ, DAOS_INTENT_UPDATE, + &kiov, &riov, &viov); + if (rc == 0) { + *cpr = (struct chk_pending_rec *)viov.iov_buf; + (*cpr)->cpr_busy = 1; + } + ABT_rwlock_unlock(ins->ci_abt_lock); + + D_CDEBUG(rc != 0, DLOG_ERR, DLOG_DBG, + "Add pending record with gen "DF_X64", seq "DF_X64", rank %u, class %u: "DF_RC"\n", + ins->ci_bk.cb_gen, seq, rank, cla, DP_RC(rc)); + + return rc; +} + +int +chk_pending_del(struct chk_instance *ins, uint64_t seq, bool locked, struct chk_pending_rec **cpr) +{ + d_iov_t kiov; + d_iov_t riov; + int rc; + + d_iov_set(&riov, NULL, 0); + d_iov_set(&kiov, &seq, sizeof(seq)); + + if (!locked) + ABT_rwlock_wrlock(ins->ci_abt_lock); + rc = dbtree_delete(ins->ci_pending_hdl, BTR_PROBE_EQ, &kiov, &riov); + if (!locked) + ABT_rwlock_unlock(ins->ci_abt_lock); + + if (rc == 0) + *cpr = (struct chk_pending_rec *)riov.iov_buf; + else + *cpr = NULL; + + D_CDEBUG(rc != 0, DLOG_ERR, DLOG_DBG, + "Del pending record with gen "DF_X64", seq "DF_X64": "DF_RC"\n", + ins->ci_bk.cb_gen, seq, DP_RC(rc)); + + return rc; +} + +int +chk_pending_wakeup(struct chk_instance *ins, struct chk_pending_rec *cpr) +{ + d_iov_t kiov; + d_iov_t riov; + int rc = 0; + + d_iov_set(&riov, NULL, 0); + d_iov_set(&kiov, &cpr->cpr_seq, sizeof(cpr->cpr_seq)); + rc = dbtree_delete(ins->ci_pending_hdl, BTR_PROBE_EQ, &kiov, &riov); + if (rc != 0) { + D_ASSERT(rc != -DER_NONEXIST); + + D_ERROR("Failed to remove pending rec for seq "DF_X64": "DF_RC"\n", + cpr->cpr_seq, DP_RC(rc)); + } else { + D_ASSERT(cpr == riov.iov_buf); + + ABT_mutex_lock(cpr->cpr_mutex); + if (cpr->cpr_busy) { + /* + * Notify the owner who is blocked on the pending record + * and will release the pending record after using it. + */ + cpr->cpr_exiting = 1; + ABT_cond_broadcast(cpr->cpr_cond); + ABT_mutex_unlock(cpr->cpr_mutex); + } else { + ABT_mutex_unlock(cpr->cpr_mutex); + chk_pending_destroy(cpr); + } + } + + return rc; +} + +void +chk_pending_destroy(struct chk_pending_rec *cpr) +{ + D_ASSERT(d_list_empty(&cpr->cpr_pool_link)); + D_ASSERT(d_list_empty(&cpr->cpr_rank_link)); + + if (cpr->cpr_cond != ABT_COND_NULL) + ABT_cond_free(&cpr->cpr_cond); + + if (cpr->cpr_mutex != ABT_MUTEX_NULL) + ABT_mutex_free(&cpr->cpr_mutex); + + D_FREE(cpr); +} + +int +chk_prop_prepare(d_rank_t leader, uint32_t flags, int phase, + uint32_t policy_nr, struct chk_policy *policies, + d_rank_list_t *ranks, struct chk_property *prop) +{ + int rc = 0; + int i; + + prop->cp_leader = leader; + if (!(flags & CHK__CHECK_FLAG__CF_DRYRUN)) + prop->cp_flags &= ~CHK__CHECK_FLAG__CF_DRYRUN; + if (flags & CHK__CHECK_FLAG__CF_NO_FAILOUT) + prop->cp_flags &= ~CHK__CHECK_FLAG__CF_FAILOUT; + if (flags & CHK__CHECK_FLAG__CF_NO_AUTO) + prop->cp_flags &= ~CHK__CHECK_FLAG__CF_AUTO; + prop->cp_flags |= flags & ~(CHK__CHECK_FLAG__CF_RESET | + CHK__CHECK_FLAG__CF_ORPHAN_POOL | + CHK__CHECK_FLAG__CF_NO_FAILOUT | + CHK__CHECK_FLAG__CF_NO_AUTO); + prop->cp_phase = phase; + if (ranks != NULL) + prop->cp_rank_nr = ranks->rl_nr; + + /* Reuse former policies if "policy_nr == 0". */ + if (policy_nr > 0) { + memset(prop->cp_policies, 0, sizeof(Chk__CheckInconsistAction) * CHK_POLICY_MAX); + for (i = 0; i < policy_nr; i++) { + if (unlikely(policies[i].cp_class >= CHK_POLICY_MAX)) { + D_ERROR("Invalid DAOS inconsistency class %u\n", + policies[i].cp_class); + D_GOTO(out, rc = -DER_INVAL); + } + + prop->cp_policies[policies[i].cp_class] = policies[i].cp_action; + } + } + + rc = chk_prop_update(prop, ranks); + +out: + return rc; +} + +uint32_t +chk_pool_merge_status(uint32_t status_a, uint32_t status_b) +{ + if (status_a == CHK__CHECK_POOL_STATUS__CPS_PENDING || + status_b == CHK__CHECK_POOL_STATUS__CPS_PENDING) + return CHK__CHECK_POOL_STATUS__CPS_PENDING; + + if (status_a == CHK__CHECK_POOL_STATUS__CPS_CHECKING || + status_b == CHK__CHECK_POOL_STATUS__CPS_CHECKING) + return CHK__CHECK_POOL_STATUS__CPS_CHECKING; + + if (status_a == CHK__CHECK_POOL_STATUS__CPS_FAILED || + status_b == CHK__CHECK_POOL_STATUS__CPS_FAILED) + return CHK__CHECK_POOL_STATUS__CPS_FAILED; + + if (status_a == CHK__CHECK_POOL_STATUS__CPS_STOPPED || + status_b == CHK__CHECK_POOL_STATUS__CPS_STOPPED) + return CHK__CHECK_POOL_STATUS__CPS_STOPPED; + + if (status_a == CHK__CHECK_POOL_STATUS__CPS_IMPLICATED || + status_b == CHK__CHECK_POOL_STATUS__CPS_IMPLICATED) + return CHK__CHECK_POOL_STATUS__CPS_IMPLICATED; + + if (status_a == CHK__CHECK_POOL_STATUS__CPS_CHECKED || + status_b == CHK__CHECK_POOL_STATUS__CPS_CHECKED) + return CHK__CHECK_POOL_STATUS__CPS_CHECKED; + + if (status_a == CHK__CHECK_POOL_STATUS__CPS_PAUSED || + status_b == CHK__CHECK_POOL_STATUS__CPS_PAUSED) + return CHK__CHECK_POOL_STATUS__CPS_PAUSED; + + D_ASSERTF(status_a == CHK__CHECK_POOL_STATUS__CPS_UNCHECKED && + status_b == CHK__CHECK_POOL_STATUS__CPS_UNCHECKED, + "Invalid pool status: %u/%u\n", status_a, status_b); + + return CHK__CHECK_POOL_STATUS__CPS_UNCHECKED; +} + +void +chk_ins_merge_info(uint32_t *status_dst, uint32_t status_src, uint32_t *phase_dst, + uint32_t phase_src, uint64_t *gen_dst, uint64_t gen_src) +{ + /* + * Current leader may be not the latest check instance leader, some engine(s) may not take + * part in related old check at that time. Here, we want to query the latest check status, + * so we always trust the latest generation unless the old instance is still running. + */ + + if (*gen_dst < gen_src) { + if (*status_dst == CHK__CHECK_INST_STATUS__CIS_RUNNING) + return; + goto trust_all; + } + + + if (*gen_dst > gen_src) { + if (status_src == CHK__CHECK_INST_STATUS__CIS_RUNNING) + goto trust_all; + return; + } + + if (*phase_dst > phase_src) + *phase_dst = phase_src; + + if (*status_dst == CHK__CHECK_INST_STATUS__CIS_RUNNING) + return; + + if (status_src == CHK__CHECK_INST_STATUS__CIS_RUNNING) + goto trust_status; + + if (*status_dst == CHK__CHECK_INST_STATUS__CIS_FAILED) + return; + + if (status_src == CHK__CHECK_INST_STATUS__CIS_FAILED) + goto trust_status; + + if (*status_dst == CHK__CHECK_INST_STATUS__CIS_STOPPED) + return; + + if (status_src == CHK__CHECK_INST_STATUS__CIS_STOPPED) + goto trust_status; + + if (*status_dst == CHK__CHECK_INST_STATUS__CIS_IMPLICATED) + return; + + if (status_src == CHK__CHECK_INST_STATUS__CIS_IMPLICATED) + goto trust_status; + + if (*status_dst == CHK__CHECK_INST_STATUS__CIS_COMPLETED) { + /* + * There is race with check query when set 'COMPLETED' status on multiple check + * engines. Under such case, set phase in check query result as 'DONE' to avoid + * confused 'non-DONE' phase + 'COMPLETED' status. + */ + *phase_dst = CHK__CHECK_SCAN_PHASE__CSP_DONE; + return; + } + + if (status_src == CHK__CHECK_INST_STATUS__CIS_COMPLETED) { + /* + * There is race with check query when set 'COMPLETED' status on multiple check + * engines. Under such case, set phase in check query result as 'DONE' to avoid + * confused 'non-DONE' phase + 'COMPLETED' status. + */ + *phase_dst = CHK__CHECK_SCAN_PHASE__CSP_DONE; + goto trust_status; + } + + if (*status_dst == CHK__CHECK_INST_STATUS__CIS_PAUSED) + return; + + if (status_src == CHK__CHECK_INST_STATUS__CIS_PAUSED) + goto trust_status; + + D_ASSERTF(*status_dst == CHK__CHECK_INST_STATUS__CIS_INIT && + status_src == CHK__CHECK_INST_STATUS__CIS_INIT, + "Invalid ins status: %u/%u\n", *status_dst, status_src); + + return; + +trust_all: + *phase_dst = phase_src; + *gen_dst = gen_src; + +trust_status: + *status_dst = status_src; +} + +int +chk_ins_init(struct chk_instance **p_ins) +{ + struct chk_instance *ins = NULL; + int rc = 0; + + D_ASSERT(p_ins != NULL); + + D_ALLOC_PTR(ins); + if (ins == NULL) + D_GOTO(out_init, rc = -DER_NOMEM); + + ins->ci_sched = ABT_THREAD_NULL; + + ins->ci_rank_hdl = DAOS_HDL_INVAL; + D_INIT_LIST_HEAD(&ins->ci_rank_list); + + ins->ci_pool_hdl = DAOS_HDL_INVAL; + D_INIT_LIST_HEAD(&ins->ci_pool_list); + + ins->ci_pending_hdl = DAOS_HDL_INVAL; + D_INIT_LIST_HEAD(&ins->ci_pool_shutdown_list); + + rc = ABT_rwlock_create(&ins->ci_abt_lock); + if (rc != ABT_SUCCESS) + D_GOTO(out_init, rc = dss_abterr2der(rc)); + + rc = ABT_mutex_create(&ins->ci_abt_mutex); + if (rc != ABT_SUCCESS) + D_GOTO(out_lock, rc = dss_abterr2der(rc)); + + rc = ABT_cond_create(&ins->ci_abt_cond); + if (rc != ABT_SUCCESS) + D_GOTO(out_mutex, rc = dss_abterr2der(rc)); + + D_INIT_LIST_HEAD(&ins->ci_dead_ranks); + + D_GOTO(out_init, rc = 0); + +out_mutex: + ABT_mutex_free(&ins->ci_abt_mutex); +out_lock: + ABT_rwlock_free(&ins->ci_abt_lock); +out_init: + if (rc == 0) + *p_ins = ins; + + return rc; +} + +void +chk_ins_fini(struct chk_instance **p_ins) +{ + struct chk_instance *ins; + + D_ASSERT(p_ins != NULL); + + ins = *p_ins; + if (ins == NULL) + return; + + ins->ci_inited = 0; + chk_iv_ns_cleanup(&ins->ci_iv_ns); + + if (ins->ci_iv_group != NULL) + crt_group_secondary_destroy(ins->ci_iv_group); + + d_rank_list_free(ins->ci_ranks); + D_ASSERT(d_list_empty(&ins->ci_dead_ranks)); + + D_ASSERT(daos_handle_is_inval(ins->ci_rank_hdl)); + D_ASSERT(d_list_empty(&ins->ci_rank_list)); + + D_ASSERT(daos_handle_is_inval(ins->ci_pool_hdl)); + D_ASSERT(d_list_empty(&ins->ci_pool_list)); + + D_ASSERT(daos_handle_is_inval(ins->ci_pending_hdl)); + D_ASSERT(d_list_empty(&ins->ci_pool_shutdown_list)); + + if (ins->ci_sched != ABT_THREAD_NULL) + ABT_thread_free(&ins->ci_sched); + + if (ins->ci_abt_cond != ABT_COND_NULL) + ABT_cond_free(&ins->ci_abt_cond); + + if (ins->ci_abt_mutex != ABT_MUTEX_NULL) + ABT_mutex_free(&ins->ci_abt_mutex); + + if (ins->ci_abt_lock != ABT_RWLOCK_NULL) + ABT_rwlock_free(&ins->ci_abt_lock); + + D_FREE(ins); + *p_ins = NULL; +} diff --git a/src/chk/chk_engine.c b/src/chk/chk_engine.c new file mode 100644 index 00000000000..bdb142ea8bc --- /dev/null +++ b/src/chk/chk_engine.c @@ -0,0 +1,3516 @@ +/** + * (C) Copyright 2022-2023 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ + +#define D_LOGFAC DD_FAC(chk) + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "chk.pb-c.h" +#include "chk_internal.h" + +#define DF_ENGINE "Check engine (gen: "DF_X64")" +#define DP_ENGINE(ins) (ins)->ci_bk.cb_gen + +static struct chk_instance *chk_engine; + +struct chk_query_pool_args { + struct chk_instance *cqpa_ins; + uint32_t cqpa_cap; + uint32_t cqpa_idx; + struct chk_query_pool_shard *cqpa_shards; +}; + +struct chk_cont_list_args { + uuid_t ccla_pool; + uint32_t ccla_cap; + uint32_t ccla_idx; + uuid_t *ccla_conts; +}; + +struct chk_cont_list_aggregator { + uuid_t ccla_pool; + d_list_t ccla_list; + daos_handle_t ccla_toh; + struct btr_root ccla_btr; + uint32_t ccla_count; +}; + +struct chk_cont_rec { + d_list_t ccr_link; + uuid_t ccr_uuid; + struct chk_cont_list_aggregator *ccr_aggregator; + daos_prop_t *ccr_label_prop; + d_iov_t ccr_label_cs; + uint32_t ccr_label_checked:1, + ccr_skip:1; +}; + +struct chk_cont_bundle { + struct chk_cont_list_aggregator *ccb_aggregator; + uuid_t ccb_uuid; +}; + +struct chk_cont_label_cb_args { + struct chk_cont_list_aggregator *cclca_aggregator; + struct cont_svc *cclca_svc; + struct chk_pool_rec *cclca_cpr; +}; + +struct chk_pool_mbs_args { + struct ds_pool_svc *cpma_svc; + struct chk_pool_rec *cpma_cpr; +}; + +static int chk_engine_report(struct chk_report_unit *cru, uint64_t *seq, int *decision); + +static int +chk_cont_hkey_size(void) +{ + return sizeof(uuid_t); +} + +static void +chk_cont_hkey_gen(struct btr_instance *tins, d_iov_t *key_iov, void *hkey) +{ + D_ASSERT(key_iov->iov_len == sizeof(uuid_t)); + + memcpy(hkey, key_iov->iov_buf, key_iov->iov_len); +} + +static int +chk_cont_alloc(struct btr_instance *tins, d_iov_t *key_iov, d_iov_t *val_iov, + struct btr_record *rec, d_iov_t *val_out) +{ + struct chk_cont_bundle *ccb = val_iov->iov_buf; + struct chk_cont_rec *ccr; + int rc = 0; + + D_ALLOC_PTR(ccr); + if (ccr == NULL) + D_GOTO(out, rc = -DER_NOMEM); + + uuid_copy(ccr->ccr_uuid, ccb->ccb_uuid); + ccr->ccr_aggregator = ccb->ccb_aggregator; + d_list_add_tail(&ccr->ccr_link, &ccb->ccb_aggregator->ccla_list); + ccb->ccb_aggregator->ccla_count++; + + rec->rec_off = umem_ptr2off(&tins->ti_umm, ccr); + +out: + return rc; +} + +static int +chk_cont_free(struct btr_instance *tins, struct btr_record *rec, void *args) +{ + struct chk_cont_rec *ccr = umem_off2ptr(&tins->ti_umm, rec->rec_off); + + rec->rec_off = UMOFF_NULL; + + ccr->ccr_aggregator->ccla_count--; + d_list_del(&ccr->ccr_link); + daos_prop_free(ccr->ccr_label_prop); + daos_iov_free(&ccr->ccr_label_cs); + D_FREE(ccr); + + return 0; +} + +static int +chk_cont_fetch(struct btr_instance *tins, struct btr_record *rec, + d_iov_t *key_iov, d_iov_t *val_iov) +{ + struct chk_cont_rec *ccr; + + D_ASSERT(val_iov != NULL); + + ccr = umem_off2ptr(&tins->ti_umm, rec->rec_off); + d_iov_set(val_iov, ccr, sizeof(*ccr)); + + return 0; +} + +static int +chk_cont_update(struct btr_instance *tins, struct btr_record *rec, + d_iov_t *key, d_iov_t *val, d_iov_t *val_out) +{ + return 0; +} + +btr_ops_t chk_cont_ops = { + .to_hkey_size = chk_cont_hkey_size, + .to_hkey_gen = chk_cont_hkey_gen, + .to_rec_alloc = chk_cont_alloc, + .to_rec_free = chk_cont_free, + .to_rec_fetch = chk_cont_fetch, + .to_rec_update = chk_cont_update, +}; + +static void +chk_engine_exit(struct chk_instance *ins, uint32_t ins_phase, uint32_t ins_status, + uint32_t pool_status) +{ + struct chk_bookmark *cbk = &ins->ci_bk; + struct chk_pool_rec *cpr; + struct chk_iv iv = { 0 }; + int rc; + + ins->ci_sched_exiting = 1; + + while ((cpr = d_list_pop_entry(&ins->ci_pool_shutdown_list, struct chk_pool_rec, + cpr_shutdown_link)) != NULL) { + chk_pool_shutdown(cpr, false); + chk_pool_put(cpr); + } + + chk_pool_stop_all(ins, pool_status, NULL); + + chk_destroy_pending_tree(ins); + chk_destroy_pool_tree(ins); + + if (DAOS_FAIL_CHECK(DAOS_CHK_ENGINE_DEATH)) + goto out; + + if (cbk->cb_ins_status == CHK__CHECK_INST_STATUS__CIS_RUNNING) { + cbk->cb_ins_status = ins_status; + if (ins_phase != CHK_INVAL_PHASE) + cbk->cb_phase = ins_phase; + cbk->cb_time.ct_stop_time = time(NULL); + rc = chk_bk_update_engine(cbk); + if (rc != 0) + D_WARN(DF_ENGINE" failed to update engine bookmark: "DF_RC"\n", + DP_ENGINE(ins), DP_RC(rc)); + } + + if (ins_status != CHK__CHECK_INST_STATUS__CIS_PAUSED && + ins_status != CHK__CHECK_INST_STATUS__CIS_STOPPED && + ins_status != CHK__CHECK_INST_STATUS__CIS_IMPLICATED && ins->ci_iv_ns != NULL) { + if (DAOS_FAIL_CHECK(DAOS_CHK_PS_NOTIFY_LEADER)) + goto out; + + iv.ci_gen = cbk->cb_gen; + iv.ci_phase = cbk->cb_phase; + iv.ci_ins_status = ins_status; + iv.ci_to_leader = 1; + + /* Notify the leader that check instance exit on the engine. */ + rc = chk_iv_update(ins->ci_iv_ns, &iv, CRT_IV_SHORTCUT_TO_ROOT, + CRT_IV_SYNC_NONE, true); + D_CDEBUG(rc != 0, DLOG_ERR, DLOG_INFO, + DF_ENGINE" on rank %u notify leader for its exit, status %u: rc = %d\n", + DP_ENGINE(ins), dss_self_rank(), ins_status, rc); + } + +out: + ins->ci_sched_exiting = 0; +} + +static int +chk_engine_post_repair(struct chk_pool_rec *cpr, int *result, bool update) +{ + struct chk_bookmark *cbk = &cpr->cpr_bk; + char uuid_str[DAOS_UUID_STR_SIZE]; + int rc = 0; + + if (unlikely(*result > 0)) + *result = 0; + + if (*result != 0) { + if (cpr->cpr_ins->ci_prop.cp_flags & CHK__CHECK_FLAG__CF_FAILOUT) { + cbk->cb_time.ct_stop_time = time(NULL); + cbk->cb_pool_status = CHK__CHECK_POOL_STATUS__CPS_FAILED; + } else { + *result = 0; + } + } + + if (update) { + uuid_unparse_lower(cpr->cpr_uuid, uuid_str); + rc = chk_bk_update_pool(cbk, uuid_str); + } + + return *result != 0 ? *result : rc; +} + +static int +chk_engine_pm_orphan(struct chk_pool_rec *cpr, d_rank_t rank, int index) +{ + struct chk_instance *ins = cpr->cpr_ins; + struct chk_property *prop = &ins->ci_prop; + struct chk_bookmark *cbk = &cpr->cpr_bk; + d_rank_list_t ranks = { 0 }; + struct chk_report_unit cru = { 0 }; + char *strs[2]; + d_iov_t iovs[2]; + d_sg_list_t sgl; + d_sg_list_t *details = NULL; + Chk__CheckInconsistClass cla; + Chk__CheckInconsistAction act; + char msg[CHK_MSG_BUFLEN] = { 0 }; + uint64_t seq = 0; + uint32_t options[2]; + uint32_t option_nr = 0; + uint32_t detail_nr = 0; + int decision = -1; + int result = 0; + int rc = 0; + + /* + * NOTE: The subsequent check after handling orphan pm entry will not access the + * orphan pm entry. So here even if we failed to handle the orphan pm entry, + * it will not affect the subsequent check. Then does not set cpr_skip for + * this case. + */ + if (index < 0) + cla = CHK__CHECK_INCONSIST_CLASS__CIC_ENGINE_NONEXIST_IN_MAP; + else + cla = CHK__CHECK_INCONSIST_CLASS__CIC_ENGINE_DOWN_IN_MAP; + act = prop->cp_policies[cla]; + cbk->cb_statistics.cs_total++; + + switch (act) { + case CHK__CHECK_INCONSIST_ACTION__CIA_DEFAULT: + /* + * If the rank does not exists in the pool map, then destroy the orphan pool rank + * to release space by default. + * + * NOTE: Currently, we does not support to add the orphan pool rank into the pool + * map. If want to add them, it can be done via pool extend after DAOS check. + * + * Fall through. + */ + case CHK__CHECK_INCONSIST_ACTION__CIA_TRUST_PS: + case CHK__CHECK_INCONSIST_ACTION__CIA_DISCARD: + act = CHK__CHECK_INCONSIST_ACTION__CIA_DISCARD; + if (prop->cp_flags & CHK__CHECK_FLAG__CF_DRYRUN) { + cbk->cb_statistics.cs_repaired++; + } else { + if (index < 0) { + ranks.rl_ranks = &rank; + ranks.rl_nr = 1; + result = ds_mgmt_tgt_pool_destroy_ranks(cpr->cpr_uuid, &ranks); + } else { + result = ds_mgmt_tgt_pool_shard_destroy(cpr->cpr_uuid, index, rank); + } + + if (result != 0) + cbk->cb_statistics.cs_failed++; + else + cbk->cb_statistics.cs_repaired++; + } + break; + case CHK__CHECK_INCONSIST_ACTION__CIA_IGNORE: + /* Report the inconsistency without repair. */ + cbk->cb_statistics.cs_ignored++; + break; + default: + /* + * If the specified action is not applicable to the inconsistency, + * then switch to interaction mode for the decision from admin. + * + * Fall through. + */ + case CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT: + if (prop->cp_flags & CHK__CHECK_FLAG__CF_AUTO) { + /* Ignore the inconsistency if admin does not want interaction. */ + act = CHK__CHECK_INCONSIST_ACTION__CIA_IGNORE; + cbk->cb_statistics.cs_ignored++; + } else { + act = CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT; + + options[0] = CHK__CHECK_INCONSIST_ACTION__CIA_DISCARD; + options[1] = CHK__CHECK_INCONSIST_ACTION__CIA_IGNORE; + option_nr = 2; + + strs[0] = "Discard the orphan pool shard to release space [suggested]."; + strs[1] = "Keep the orphan pool shard on engine, repair nothing."; + + d_iov_set(&iovs[0], strs[0], strlen(strs[0])); + d_iov_set(&iovs[1], strs[1], strlen(strs[1])); + + sgl.sg_nr = 2; + sgl.sg_nr_out = 0; + sgl.sg_iovs = iovs; + + details = &sgl; + detail_nr = 1; + } + break; + } + +report: + cru.cru_gen = cbk->cb_gen; + cru.cru_cla = cla; + cru.cru_act = option_nr != 0 ? CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT : act; + cru.cru_rank = dss_self_rank(); + cru.cru_option_nr = option_nr; + cru.cru_detail_nr = detail_nr; + cru.cru_pool = (uuid_t *)&cpr->cpr_uuid; + cru.cru_pool_label = cpr->cpr_label; + snprintf(msg, CHK_MSG_BUFLEN - 1, + "Check engine detects orphan %s entry in pool map for " + DF_UUIDF", rank %u, index %d\n", + index < 0 ? "rank" : "target", DP_UUID(cpr->cpr_uuid), rank, index); + cru.cru_msg = msg; + cru.cru_options = options; + cru.cru_details = details; + cru.cru_result = result; + + rc = chk_engine_report(&cru, &seq, &decision); + + D_CDEBUG(result != 0 || rc != 0, DLOG_ERR, DLOG_INFO, + DF_ENGINE" detects orphan %s entry in pool map for " + DF_UUIDF", rank %u, index %d, action %u (%s), handle_rc %d, " + "report_rc %d, decision %d\n", + DP_ENGINE(ins), index < 0 ? "rank" : "target", DP_UUID(cpr->cpr_uuid), rank, + index, act, option_nr ? "need interact" : "no interact", result, rc, decision); + + if (rc < 0 && option_nr > 0) { + cbk->cb_statistics.cs_failed++; + result = rc; + } + + if (rc > 0 || result != 0 || option_nr == 0) + goto out; + + option_nr = 0; + detail_nr = 0; + + switch (decision) { + default: + D_ERROR(DF_ENGINE" got invalid decision %d for orphan %s entry in pool map " + "for pool "DF_UUIDF", rank %u, index %d. Ignore the inconsistency.\n", + DP_ENGINE(ins), decision, index < 0 ? "rank" : "target", + DP_UUID(cpr->cpr_uuid), rank, index); + /* + * Invalid option, ignore the inconsistency. + * + * Fall through. + */ + case CHK__CHECK_INCONSIST_ACTION__CIA_IGNORE: + act = CHK__CHECK_INCONSIST_ACTION__CIA_IGNORE; + cbk->cb_statistics.cs_ignored++; + break; + case CHK__CHECK_INCONSIST_ACTION__CIA_DISCARD: + act = CHK__CHECK_INCONSIST_ACTION__CIA_DISCARD; + if (prop->cp_flags & CHK__CHECK_FLAG__CF_DRYRUN) { + cbk->cb_statistics.cs_repaired++; + } else { + if (index < 0) { + ranks.rl_ranks = &rank; + ranks.rl_nr = 1; + result = ds_mgmt_tgt_pool_destroy_ranks(cpr->cpr_uuid, &ranks); + } else { + result = ds_mgmt_tgt_pool_shard_destroy(cpr->cpr_uuid, index, rank); + } + + if (result != 0) + cbk->cb_statistics.cs_failed++; + else + cbk->cb_statistics.cs_repaired++; + } + break; + } + + goto report; + +out: + return chk_engine_post_repair(cpr, &result, rc <= 0); +} + +static int +chk_engine_pm_dangling(struct chk_pool_rec *cpr, struct pool_map *map, struct pool_component *comp, + uint32_t status) +{ + struct chk_instance *ins = cpr->cpr_ins; + struct chk_property *prop = &ins->ci_prop; + struct chk_bookmark *cbk = &cpr->cpr_bk; + struct chk_report_unit cru = { 0 }; + char *strs[2]; + d_iov_t iovs[2]; + d_sg_list_t sgl; + d_sg_list_t *details = NULL; + Chk__CheckInconsistClass cla; + Chk__CheckInconsistAction act; + char suggested[CHK_MSG_BUFLEN] = { 0 }; + char msg[CHK_MSG_BUFLEN] = { 0 }; + uint64_t seq = 0; + uint32_t options[2]; + uint32_t option_nr = 0; + uint32_t detail_nr = 0; + int decision = -1; + int result = 0; + int rc = 0; + + D_ASSERTF(status == PO_COMP_ST_DOWNOUT || status == PO_COMP_ST_DOWN, + "Unexpected pool map status %u\n", status); + + cla = CHK__CHECK_INCONSIST_CLASS__CIC_ENGINE_HAS_NO_STORAGE; + act = prop->cp_policies[cla]; + cbk->cb_statistics.cs_total++; + + switch (act) { + case CHK__CHECK_INCONSIST_ACTION__CIA_DEFAULT: + /* + * If the target does not has storage on the engine, then mark it as 'DOWN' or + * 'DOWNOUT' in the pool map by default. + * + * Fall through. + */ + case CHK__CHECK_INCONSIST_ACTION__CIA_TRUST_TARGET: + act = CHK__CHECK_INCONSIST_ACTION__CIA_TRUST_TARGET; + /* + * NOTE: For dryrun mode, we will not persistently store the change in + * subsequent step. Here we only fix the inconsistency in DRAM. + */ + cpr->cpr_map_refreshed = 1; + comp->co_status = status; + comp->co_fseq = pool_map_bump_version(map); + cbk->cb_statistics.cs_repaired++; + break; + case CHK__CHECK_INCONSIST_ACTION__CIA_IGNORE: + /* Report the inconsistency without repair. */ + cbk->cb_statistics.cs_ignored++; + /* + * For the pool with dangling map entry, if not repair, then the subsequent + * check (based on pool map) may fail, then have to skip to avoid confusing. + */ + cpr->cpr_skip = 1; + break; + default: + /* + * If the specified action is not applicable to the inconsistency, + * then switch to interaction mode for the decision from admin. + * + * Fall through. + */ + case CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT: + if (prop->cp_flags & CHK__CHECK_FLAG__CF_AUTO) { + /* Ignore the inconsistency if admin does not want interaction. */ + act = CHK__CHECK_INCONSIST_ACTION__CIA_IGNORE; + cbk->cb_statistics.cs_ignored++; + cpr->cpr_skip = 1; + } else { + act = CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT; + + options[0] = CHK__CHECK_INCONSIST_ACTION__CIA_TRUST_TARGET; + options[1] = CHK__CHECK_INCONSIST_ACTION__CIA_IGNORE; + option_nr = 2; + + snprintf(suggested, CHK_MSG_BUFLEN - 1, + "Change pool map for the dangling map entry as %s [suggested].", + pool_map_status2name(status)); + strs[0] = suggested; + strs[1] = "Keep the dangling map entry in pool map, repair nothing."; + + d_iov_set(&iovs[0], strs[0], strlen(strs[0])); + d_iov_set(&iovs[1], strs[1], strlen(strs[1])); + + sgl.sg_nr = 2; + sgl.sg_nr_out = 0; + sgl.sg_iovs = iovs; + + details = &sgl; + detail_nr = 1; + } + break; + } + +report: + cru.cru_gen = cbk->cb_gen; + cru.cru_cla = cla; + cru.cru_act = option_nr != 0 ? CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT : act; + cru.cru_rank = dss_self_rank(); + cru.cru_option_nr = option_nr; + cru.cru_detail_nr = detail_nr; + cru.cru_pool = (uuid_t *)&cpr->cpr_uuid; + cru.cru_pool_label = cpr->cpr_label; + snprintf(msg, CHK_MSG_BUFLEN - 1, + "Check engine detects dangling %s entry in pool map for pool " + DF_UUIDF", rank %u, index %u, (want) mark as %s\n", + comp->co_type == PO_COMP_TP_RANK ? "rank" : "target", + DP_UUID(cpr->cpr_uuid), comp->co_rank, comp->co_index, + pool_map_status2name(status)); + cru.cru_msg = msg; + cru.cru_options = options; + cru.cru_details = details; + cru.cru_result = result; + + rc = chk_engine_report(&cru, &seq, &decision); + + D_CDEBUG(result != 0 || rc != 0, DLOG_ERR, DLOG_INFO, + DF_ENGINE" detects dangling %s entry in pool map for pool " + DF_UUIDF" rank %u, index %u, action %u (%s), handle_rc %d, report_rc %d, " + "decision %d, (want) mark as %s\n", + DP_ENGINE(ins), comp->co_type == PO_COMP_TP_RANK ? "rank" : "target", + DP_UUID(cpr->cpr_uuid), comp->co_rank, comp->co_index, act, + option_nr ? "need interact" : "no interact", result, rc, decision, + pool_map_status2name(status)); + + if (rc < 0 && option_nr > 0) { + cbk->cb_statistics.cs_failed++; + /* + * Skip the pool with dangling pm entry if failed to interact with admin for + * further action. + */ + cpr->cpr_skip = 1; + result = rc; + } + + if (rc > 0 || result != 0 || option_nr == 0) + goto out; + + option_nr = 0; + detail_nr = 0; + + switch (decision) { + default: + D_ERROR(DF_ENGINE" got invalid decision %d for dangling %s entry in pool map " + "for pool "DF_UUIDF", rank %u, index %u, (want) mark as %s. Ignore.\n", + DP_ENGINE(ins), decision, + comp->co_type == PO_COMP_TP_RANK ? "rank" : "target", + DP_UUID(cpr->cpr_uuid), comp->co_rank, comp->co_index, + pool_map_status2name(status)); + /* + * Invalid option, ignore the inconsistency. + * + * Fall through. + */ + case CHK__CHECK_INCONSIST_ACTION__CIA_IGNORE: + act = CHK__CHECK_INCONSIST_ACTION__CIA_IGNORE; + cbk->cb_statistics.cs_ignored++; + cpr->cpr_skip = 1; + break; + case CHK__CHECK_INCONSIST_ACTION__CIA_TRUST_TARGET: + act = CHK__CHECK_INCONSIST_ACTION__CIA_TRUST_TARGET; + /* + * NOTE: For dryrun mode, we will not persistently store the change in + * subsequent step. Here we only fix the inconsistency in DRAM. + */ + cpr->cpr_map_refreshed = 1; + comp->co_status = status; + comp->co_fseq = pool_map_bump_version(map); + cbk->cb_statistics.cs_repaired++; + break; + } + + goto report; + +out: + return chk_engine_post_repair(cpr, &result, rc <= 0); +} + +static int +chk_engine_pm_unknown_target(struct chk_pool_rec *cpr, struct pool_component *comp) +{ + struct chk_instance *ins = cpr->cpr_ins; + struct chk_bookmark *cbk = &cpr->cpr_bk; + struct chk_report_unit cru = { 0 }; + Chk__CheckInconsistClass cla; + Chk__CheckInconsistAction act; + char msg[CHK_MSG_BUFLEN] = { 0 }; + uint64_t seq = 0; + int rc; + + cla = CHK__CHECK_INCONSIST_CLASS__CIC_UNKNOWN; + act = CHK__CHECK_INCONSIST_ACTION__CIA_IGNORE; + cbk->cb_statistics.cs_total++; + cbk->cb_statistics.cs_ignored++; + /* Skip the pool with unknown pm entry. */ + cpr->cpr_skip = 1; + + cru.cru_gen = cbk->cb_gen; + cru.cru_cla = cla; + cru.cru_act = act; + cru.cru_rank = dss_self_rank(); + cru.cru_pool = (uuid_t *)&cpr->cpr_uuid; + cru.cru_pool_label = cpr->cpr_label; + snprintf(msg, CHK_MSG_BUFLEN - 1, + "Check engine detects unknown target entry in pool map for pool " + DF_UUIDF", rank %u, index %u, status %u, skip it. You can change " + "its status via DAOS debug tool if it is not for downgraded case.\n", + DP_UUID(cpr->cpr_uuid), comp->co_rank, comp->co_index, comp->co_status); + cru.cru_msg = msg; + cru.cru_result = 0; + + rc = chk_engine_report(&cru, &seq, NULL); + + D_CDEBUG(rc != 0, DLOG_ERR, DLOG_INFO, + DF_ENGINE" detects unknown target entry in pool map for pool "DF_UUIDF", rank %u, " + "target %u, action %u (no interact), handle_rc 0, report_rc %d, decision 0\n", + DP_ENGINE(ins), DP_UUID(cpr->cpr_uuid), comp->co_rank, comp->co_index, act, rc); + + return chk_engine_post_repair(cpr, &rc, rc <= 0); +} + +static int +chk_engine_pool_mbs_one(struct chk_pool_rec *cpr, struct pool_map *map, struct chk_pool_mbs *mbs) +{ + struct pool_domain *dom; + struct pool_component *comp; + int i; + int rc = 0; + bool unknown; + + dom = pool_map_find_node_by_rank(map, mbs->cpm_rank); + if (dom == NULL) { + D_ASSERT(mbs->cpm_rank != dss_self_rank()); + + rc = chk_engine_pm_orphan(cpr, mbs->cpm_rank, -1); + goto out; + } + + for (i = 0; i < dom->do_target_nr; i++) { + comp = &dom->do_targets[i].ta_comp; + unknown = false; + + switch (comp->co_status) { + case PO_COMP_ST_DOWN: + /* + * NOTE: In the future, we may support to add the target (if exist) back. + * + * Fall through. + */ + case PO_COMP_ST_DOWNOUT: + if (comp->co_index < mbs->cpm_tgt_nr && + (mbs->cpm_tgt_status[comp->co_index] == DS_POOL_TGT_EMPTY || + mbs->cpm_tgt_status[comp->co_index] == DS_POOL_TGT_NORMAL)) + rc = chk_engine_pm_orphan(cpr, mbs->cpm_rank, comp->co_index); + /* + * Otherwise if the down/downout entry only exists in pool map, + * then it is useless, do nothing. + */ + break; + case PO_COMP_ST_NEW: + if (comp->co_index >= mbs->cpm_tgt_nr || + mbs->cpm_tgt_status[comp->co_index] == DS_POOL_TGT_NONEXIST || + mbs->cpm_tgt_status[comp->co_index] == DS_POOL_TGT_EMPTY) + /* Dangling new entry in pool map, directly mark as 'DOWNOUT'. */ + rc = chk_engine_pm_dangling(cpr, map, comp, PO_COMP_ST_DOWNOUT); + break; + default: + D_WARN(DF_ENGINE" hit knownn pool target status %u for "DF_UUIDF + " with rank %u, index %u, ID %u\n", + DP_ENGINE(cpr->cpr_ins), comp->co_status, DP_UUID(cpr->cpr_uuid), + mbs->cpm_rank, comp->co_index, comp->co_id); + unknown = true; + /* Fall through. */ + case PO_COMP_ST_UP: + case PO_COMP_ST_UPIN: + case PO_COMP_ST_DRAIN: + if (comp->co_index >= mbs->cpm_tgt_nr || + mbs->cpm_tgt_status[comp->co_index] == DS_POOL_TGT_NONEXIST || + mbs->cpm_tgt_status[comp->co_index] == DS_POOL_TGT_EMPTY) + /* + * Some data may be on the lost target, mark as 'DOWN' that + * will be handled via rebuild in subsequent process. + */ + rc = chk_engine_pm_dangling(cpr, map, comp, PO_COMP_ST_DOWN); + else if (mbs->cpm_tgt_status[comp->co_index] == DS_POOL_TGT_NORMAL && + unknown) + /* + * NOTE: The unknown status maybe because of downgraded from new + * layout? It is better to keep it there with reporting it + * to admin who can adjust the status via DAOS debug tool. + */ + rc = chk_engine_pm_unknown_target(cpr, comp); + break; + } + + if (rc != 0) + goto out; + + /* + * Set the target status as DS_POOL_TGT_NONEXIST in + * DRAM to bypass the subsequent orphan entry check. + */ + if (comp->co_index < mbs->cpm_tgt_nr) + mbs->cpm_tgt_status[comp->co_index] = DS_POOL_TGT_NONEXIST; + + comp->co_flags |= PO_COMPF_CHK_DONE; + } + + dom->do_comp.co_flags |= PO_COMPF_CHK_DONE; + + for (i = 0; i < mbs->cpm_tgt_nr; i++) { + /* + * All checked cpm_tgt_status[x] have been marked as 'DS_POOL_TGT_NONEXIST' + * in above for() block. So here, these left ones must be orphan targets. + */ + if (mbs->cpm_tgt_status[i] == DS_POOL_TGT_EMPTY || + mbs->cpm_tgt_status[i] == DS_POOL_TGT_NORMAL) { + rc = chk_engine_pm_orphan(cpr, mbs->cpm_rank, i); + if (rc != 0) + goto out; + } + } + +out: + return rc; +} + +static int +chk_engine_find_dangling_pm(struct chk_pool_rec *cpr, struct pool_map *map) +{ + struct pool_domain *doms = NULL; + struct pool_component *r_comp; + struct pool_component *t_comp; + int rank_nr; + int rc = 0; + int i; + int j; + bool down; + + rank_nr = pool_map_find_nodes(map, PO_COMP_ID_ALL, &doms); + if (rank_nr <= 0) + D_GOTO(out, rc = rank_nr); + + for (i = 0; i < rank_nr; i++) { + r_comp = &doms[i].do_comp; + if (r_comp->co_flags & PO_COMPF_CHK_DONE || + r_comp->co_status == PO_COMP_ST_DOWN || r_comp->co_status == PO_COMP_ST_DOWNOUT) + continue; + + down = false; + + for (j = 0; j < doms[i].do_target_nr; j++) { + t_comp = &doms[i].do_targets[j].ta_comp; + + switch (t_comp->co_status) { + case PO_COMP_ST_DOWN: + down = true; + break; + case PO_COMP_ST_DOWNOUT: + /* Do nothing. */ + break; + case PO_COMP_ST_NEW: + /* Dangling new entry in pool map, directly mark as 'DOWNOUT'. */ + rc = chk_engine_pm_dangling(cpr, map, t_comp, PO_COMP_ST_DOWNOUT); + break; + default: + D_WARN(DF_ENGINE" hit knownn pool target status %u for "DF_UUIDF + " with rank %u, index %u, ID %u\n", DP_ENGINE(cpr->cpr_ins), + t_comp->co_status, DP_UUID(cpr->cpr_uuid), + t_comp->co_rank, t_comp->co_index, t_comp->co_id); + /* Fall through. */ + case PO_COMP_ST_UP: + case PO_COMP_ST_UPIN: + case PO_COMP_ST_DRAIN: + down = true; + /* + * Some data may be on the lost target, mark as 'DOWN' that + * will be handled via rebuild in subsequent process. + */ + rc = chk_engine_pm_dangling(cpr, map, t_comp, PO_COMP_ST_DOWN); + break; + } + + if (rc != 0) + goto out; + + t_comp->co_flags |= PO_COMPF_CHK_DONE; + } + + /* dangling parent domain. */ + rc = chk_engine_pm_dangling(cpr, map, r_comp, + down ? PO_COMP_ST_DOWN : PO_COMP_ST_DOWNOUT); + if (rc != 0) + goto out; + + r_comp->co_flags |= PO_COMPF_CHK_DONE; + } + +out: + return rc; +} + +static int +chk_engine_bad_pool_label(struct chk_pool_rec *cpr, struct ds_pool_svc *svc) +{ + struct chk_instance *ins = cpr->cpr_ins; + struct chk_bookmark *cbk = &cpr->cpr_bk; + daos_prop_t *label = NULL; + struct chk_report_unit cru = { 0 }; + Chk__CheckInconsistClass cla; + Chk__CheckInconsistAction act; + char msg[CHK_MSG_BUFLEN] = { 0 }; + uint64_t seq = cpr->cpr_label_seq; + int result = 0; + int rc = 0; + + cla = CHK__CHECK_INCONSIST_CLASS__CIC_POOL_BAD_LABEL; + act = CHK__CHECK_INCONSIST_ACTION__CIA_TRUST_MS; + cbk->cb_statistics.cs_total++; + + rc = ds_pool_prop_fetch(ds_pool_svc2pool(svc), DAOS_PO_QUERY_PROP_LABEL, &label); + if (rc != 0 && rc != -DER_NONEXIST) + D_GOTO(report, result = rc); + + if (ins->ci_prop.cp_flags & CHK__CHECK_FLAG__CF_DRYRUN) { + cbk->cb_statistics.cs_repaired++; + } else { + result = ds_pool_svc_update_label(svc, cpr->cpr_label); + if (result != 0) + cbk->cb_statistics.cs_failed++; + else + cbk->cb_statistics.cs_repaired++; + } + +report: + cru.cru_gen = cbk->cb_gen; + cru.cru_cla = cla; + cru.cru_act = act; + cru.cru_rank = dss_self_rank(); + cru.cru_pool = (uuid_t *)&cpr->cpr_uuid; + cru.cru_pool_label = cpr->cpr_label; + snprintf(msg, CHK_MSG_BUFLEN - 1, + "Check engine detects corrupted pool label: %s (MS) vs %s (PS).\n", + cpr->cpr_label != NULL ? cpr->cpr_label : "(null)", + label != NULL ? label->dpp_entries[0].dpe_str : "(null)"); + cru.cru_msg = msg; + cru.cru_result = result; + + rc = chk_engine_report(&cru, &seq, NULL); + + D_CDEBUG(result != 0 || rc != 0, DLOG_ERR, DLOG_INFO, + DF_ENGINE" detects corrupted label %s (MS) vs %s (PS) for pool " + DF_UUIDF", action %u (no interact), handle_rc %d, report_rc %d\n", + DP_ENGINE(ins), cpr->cpr_label != NULL ? cpr->cpr_label : "(null)", + label != NULL ? label->dpp_entries[0].dpe_str : "(null)", + DP_UUID(cpr->cpr_uuid), act, result, rc); + + /* + * It is not fatal even if failed to repair inconsistent pool label, + * then do not skip current pool for subsequent DAOS check. + */ + + daos_prop_free(label); + + return chk_engine_post_repair(cpr, &result, rc <= 0); +} + +static int +chk_engine_cont_list_init(uuid_t pool, struct chk_cont_list_aggregator *aggregator) +{ + struct umem_attr uma = { 0 }; + + uma.uma_id = UMEM_CLASS_VMEM; + uuid_copy(aggregator->ccla_pool, pool); + D_INIT_LIST_HEAD(&aggregator->ccla_list); + + return dbtree_create_inplace(DBTREE_CLASS_CHK_CONT, 0, CHK_BTREE_ORDER, &uma, + &aggregator->ccla_btr, &aggregator->ccla_toh); +} + +static void +chk_engine_cont_list_fini(struct chk_cont_list_aggregator *aggregator) +{ + if (daos_handle_is_valid(aggregator->ccla_toh)) { + dbtree_destroy(aggregator->ccla_toh, NULL); + aggregator->ccla_toh = DAOS_HDL_INVAL; + } +} + +static int +chk_engine_cont_list_reduce_internal(struct chk_cont_list_aggregator *aggregator, + uuid_t *conts, uint32_t count) +{ + struct chk_cont_bundle ccb = { 0 }; + d_iov_t kiov; + d_iov_t riov; + int i; + int rc = 0; + + ccb.ccb_aggregator = aggregator; + d_iov_set(&riov, &ccb, sizeof(ccb)); + + for (i = 0; i < count; i++) { + uuid_copy(ccb.ccb_uuid, conts[i]); + d_iov_set(&kiov, conts[i], sizeof(uuid_t)); + rc = dbtree_upsert(aggregator->ccla_toh, BTR_PROBE_EQ, DAOS_INTENT_UPDATE, + &kiov, &riov, NULL); + if (rc != 0) { + D_ERROR("Failed to upsert "DF_UUIDF"/"DF_UUIDF" for cont list: "DF_RC"\n", + DP_UUID(aggregator->ccla_pool), DP_UUID(conts[i]), DP_RC(rc)); + break; + } + } + + return rc; +} + +static int +chk_engine_cont_list_remote_cb(struct chk_co_rpc_cb_args *cb_args) +{ + return chk_engine_cont_list_reduce_internal(cb_args->cb_priv, cb_args->cb_data, + cb_args->cb_nr); +} + +static int +chk_engine_cont_orphan(struct chk_pool_rec *cpr, struct chk_cont_rec *ccr, struct cont_svc *svc) +{ + struct chk_instance *ins = cpr->cpr_ins; + struct chk_property *prop = &ins->ci_prop; + struct chk_bookmark *cbk = &cpr->cpr_bk; + struct chk_report_unit cru = { 0 }; + char *strs[2]; + d_iov_t iovs[2]; + d_sg_list_t sgl; + d_sg_list_t *details = NULL; + Chk__CheckInconsistClass cla; + Chk__CheckInconsistAction act; + char msg[CHK_MSG_BUFLEN] = { 0 }; + uint64_t seq = 0; + uint32_t options[2]; + uint32_t option_nr = 0; + uint32_t detail_nr = 0; + int decision = -1; + int result = 0; + int rc = 0; + + cla = CHK__CHECK_INCONSIST_CLASS__CIC_CONT_NONEXIST_ON_PS; + act = prop->cp_policies[cla]; + cbk->cb_statistics.cs_total++; + + switch (act) { + case CHK__CHECK_INCONSIST_ACTION__CIA_DEFAULT: + /* + * If the container is not registered to the container service, then destroy the + * orphan container to release space by default. + * + * NOTE: Currently, we do not support to add the orphan container back to the CS, + * that may be implemented in the future when we have enough information to + * recover necessary prop/attr for the orphan container. + * + * Fall through. + */ + case CHK__CHECK_INCONSIST_ACTION__CIA_TRUST_PS: + case CHK__CHECK_INCONSIST_ACTION__CIA_DISCARD: + act = CHK__CHECK_INCONSIST_ACTION__CIA_DISCARD; + if (prop->cp_flags & CHK__CHECK_FLAG__CF_DRYRUN) { + cbk->cb_statistics.cs_repaired++; + } else { + result = ds_cont_destroy_orphan(svc, ccr->ccr_uuid); + if (result != 0) + cbk->cb_statistics.cs_failed++; + else + cbk->cb_statistics.cs_repaired++; + } + break; + case CHK__CHECK_INCONSIST_ACTION__CIA_IGNORE: + /* Report the inconsistency without repair. */ + cbk->cb_statistics.cs_ignored++; + break; + default: + /* + * If the specified action is not applicable to the inconsistency, + * then switch to interaction mode for the decision from admin. + * + * Fall through. + */ + case CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT: + if (prop->cp_flags & CHK__CHECK_FLAG__CF_AUTO) { + /* Ignore the inconsistency if admin does not want interaction. */ + act = CHK__CHECK_INCONSIST_ACTION__CIA_IGNORE; + cbk->cb_statistics.cs_ignored++; + } else { + act = CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT; + + options[0] = CHK__CHECK_INCONSIST_ACTION__CIA_DISCARD; + options[1] = CHK__CHECK_INCONSIST_ACTION__CIA_IGNORE; + option_nr = 2; + + strs[0] = "Destroy the orphan container to release space [suggested]."; + strs[1] = "Keep the orphan container on engines, repair nothing."; + + d_iov_set(&iovs[0], strs[0], strlen(strs[0])); + d_iov_set(&iovs[1], strs[1], strlen(strs[1])); + + sgl.sg_nr = 2; + sgl.sg_nr_out = 0; + sgl.sg_iovs = iovs; + + details = &sgl; + detail_nr = 1; + } + break; + } + +report: + cru.cru_gen = cbk->cb_gen; + cru.cru_cla = cla; + cru.cru_act = option_nr != 0 ? CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT : act; + cru.cru_rank = dss_self_rank(); + cru.cru_option_nr = option_nr; + cru.cru_detail_nr = detail_nr; + cru.cru_pool = (uuid_t *)&cpr->cpr_uuid; + cru.cru_pool_label = cpr->cpr_label; + cru.cru_cont = (uuid_t *)&ccr->ccr_uuid; + if (ccr->ccr_label_prop != NULL) + cru.cru_cont_label = ccr->ccr_label_prop->dpp_entries[0].dpe_str; + snprintf(msg, CHK_MSG_BUFLEN - 1, + "Check engine detects orphan container "DF_UUIDF"/"DF_UUIDF"\n", + DP_UUID(cpr->cpr_uuid), DP_UUID(ccr->ccr_uuid)); + cru.cru_msg = msg; + cru.cru_options = options; + cru.cru_details = details; + cru.cru_result = result; + + rc = chk_engine_report(&cru, &seq, &decision); + + D_CDEBUG(result != 0 || rc != 0, DLOG_ERR, DLOG_INFO, + DF_ENGINE" detects orphan container " + DF_UUIDF"/"DF_UUIDF", action %u (%s), handle_rc %d, report_rc %d, decision %d\n", + DP_ENGINE(ins), DP_UUID(cpr->cpr_uuid), DP_UUID(ccr->ccr_uuid), act, + option_nr ? "need interact" : "no interact", result, rc, decision); + + if (rc < 0 && option_nr > 0) { + cbk->cb_statistics.cs_failed++; + result = rc; + } + + if (rc > 0 || result != 0 || option_nr == 0) + goto out; + + option_nr = 0; + detail_nr = 0; + + switch (decision) { + default: + D_ERROR(DF_ENGINE" got invalid decision %d for orphan container " + DF_UUIDF"/"DF_UUIDF". Ignore the inconsistency.\n", + DP_ENGINE(ins), decision, DP_UUID(cpr->cpr_uuid), DP_UUID(ccr->ccr_uuid)); + /* + * Invalid option, ignore the inconsistency. + * + * Fall through. + */ + case CHK__CHECK_INCONSIST_ACTION__CIA_IGNORE: + act = CHK__CHECK_INCONSIST_ACTION__CIA_IGNORE; + cbk->cb_statistics.cs_ignored++; + break; + case CHK__CHECK_INCONSIST_ACTION__CIA_DISCARD: + act = CHK__CHECK_INCONSIST_ACTION__CIA_DISCARD; + if (prop->cp_flags & CHK__CHECK_FLAG__CF_DRYRUN) { + cbk->cb_statistics.cs_repaired++; + } else { + result = ds_cont_destroy_orphan(svc, ccr->ccr_uuid); + if (result != 0) + cbk->cb_statistics.cs_failed++; + else + cbk->cb_statistics.cs_repaired++; + } + break; + } + + goto report; + +out: + /* NOTE: For orphan container, mark it as 'skip' since we do not support to add it back. */ + ccr->ccr_skip = 1; + + return chk_engine_post_repair(cpr, &result, rc <= 0); +} + +static daos_prop_t * +chk_engine_build_label_prop(d_iov_t *label) +{ + daos_prop_t *prop; + + prop = daos_prop_alloc(1); + if (prop != NULL) { + prop->dpp_entries[0].dpe_type = DAOS_PROP_CO_LABEL; + D_STRNDUP(prop->dpp_entries[0].dpe_str, label->iov_buf, label->iov_len); + if (prop->dpp_entries[0].dpe_str == NULL) { + daos_prop_free(prop); + prop = NULL; + } + } + + return prop; +} + +static inline bool +chk_engine_cont_target_label_empty(struct chk_cont_rec *ccr) +{ + if (ccr->ccr_label_prop == NULL) + return true; + + if (strncmp(DAOS_PROP_NO_CO_LABEL, ccr->ccr_label_prop->dpp_entries[0].dpe_str, + DAOS_PROP_LABEL_MAX_LEN) == 0) + return true; + + return false; +} + +static inline bool +chk_engine_cont_cs_label_empty(struct chk_cont_rec *ccr) +{ + if (daos_iov_empty(&ccr->ccr_label_cs)) + return true; + + if (strncmp(DAOS_PROP_NO_CO_LABEL, ccr->ccr_label_cs.iov_buf, DAOS_PROP_LABEL_MAX_LEN) == 0) + return true; + + return false; +} + +/* + * Trust the label in container service or in the container property. + * + * \return 1: trust container service. + * \return 0: the same or no trustable. + * \return -1: trust container property. + */ +static inline int +chk_engine_cont_choose_label(struct chk_cont_rec *ccr) +{ + bool trust_cs = true; + bool trust_target = true; + + if (chk_engine_cont_cs_label_empty(ccr)) + trust_cs = false; + + if (chk_engine_cont_target_label_empty(ccr)) + trust_target = false; + + if (!trust_cs && !trust_target) + return 0; + + /* + * If the container label in the container service (cont_svc::cs_uuids) + * exists but does not match the label in the container property, then + * trust the container service and reset the one in container property + * by default. + */ + if (trust_cs) + return 1; + + return -1; +} + +static inline char * +chk_engine_ccr2label(struct chk_cont_rec *ccr, bool prefer_target) +{ + if (prefer_target) { + if (ccr->ccr_label_prop != NULL) + return ccr->ccr_label_prop->dpp_entries[0].dpe_str; + + if (!daos_iov_empty(&ccr->ccr_label_cs)) + return ccr->ccr_label_cs.iov_buf; + } else { + if (!daos_iov_empty(&ccr->ccr_label_cs)) + return ccr->ccr_label_cs.iov_buf; + + if (ccr->ccr_label_prop != NULL) + return ccr->ccr_label_prop->dpp_entries[0].dpe_str; + } + + return NULL; +} + +static int +chk_engine_cont_set_label(struct chk_pool_rec *cpr, struct chk_cont_rec *ccr, struct cont_svc *svc) +{ + struct chk_instance *ins = cpr->cpr_ins; + struct chk_property *prop = &ins->ci_prop; + struct chk_bookmark *cbk = &cpr->cpr_bk; + daos_prop_t *prop_tmp = NULL; + struct chk_report_unit cru = { 0 }; + char strs[3][CHK_MSG_BUFLEN]; + d_iov_t iovs[3]; + d_sg_list_t sgl; + d_sg_list_t *details = NULL; + char *label = NULL; + Chk__CheckInconsistClass cla; + Chk__CheckInconsistAction act; + char msg[CHK_MSG_BUFLEN] = { 0 }; + uint64_t seq = 0; + uint32_t options[3]; + uint32_t option_nr = 0; + uint32_t detail_nr = 0; + int decision = -1; + int result = 0; + int rc = 0; + + cla = CHK__CHECK_INCONSIST_CLASS__CIC_CONT_BAD_LABEL; + act = prop->cp_policies[cla]; + cbk->cb_statistics.cs_total++; + + switch (act) { + case CHK__CHECK_INCONSIST_ACTION__CIA_DEFAULT: + rc = chk_engine_cont_choose_label(ccr); + if (rc > 0) + goto trust_ps; + + if (rc < 0) + goto trust_target; + + goto out; + case CHK__CHECK_INCONSIST_ACTION__CIA_TRUST_PS: + rc = chk_engine_cont_choose_label(ccr); + if (unlikely(rc == 0)) + goto out; + + if (rc < 0) + goto interact; + +trust_ps: + act = CHK__CHECK_INCONSIST_ACTION__CIA_TRUST_PS; + label = chk_engine_ccr2label(ccr, false); + if (prop->cp_flags & CHK__CHECK_FLAG__CF_DRYRUN) { + cbk->cb_statistics.cs_repaired++; + } else { + prop_tmp = chk_engine_build_label_prop(&ccr->ccr_label_cs); + if (prop_tmp == NULL) + D_GOTO(out, result = -DER_NOMEM); + + result = ds_cont_set_label(svc, ccr->ccr_uuid, prop_tmp, + ccr->ccr_label_prop, false); + if (result != 0) + cbk->cb_statistics.cs_failed++; + else + cbk->cb_statistics.cs_repaired++; + } + break; + case CHK__CHECK_INCONSIST_ACTION__CIA_TRUST_TARGET: + rc = chk_engine_cont_choose_label(ccr); + if (unlikely(rc == 0)) + goto out; + + if (rc > 0 && chk_engine_cont_target_label_empty(ccr)) + goto interact; + +trust_target: + act = CHK__CHECK_INCONSIST_ACTION__CIA_TRUST_TARGET; + label = chk_engine_ccr2label(ccr, true); + if (prop->cp_flags & CHK__CHECK_FLAG__CF_DRYRUN) { + cbk->cb_statistics.cs_repaired++; + } else { + if (!daos_iov_empty(&ccr->ccr_label_cs)) { + prop_tmp = chk_engine_build_label_prop(&ccr->ccr_label_cs); + if (prop_tmp == NULL) + D_GOTO(out, result = -DER_NOMEM); + } + + result = ds_cont_set_label(svc, ccr->ccr_uuid, + ccr->ccr_label_prop, prop_tmp, true); + if (result != 0) + cbk->cb_statistics.cs_failed++; + else + cbk->cb_statistics.cs_repaired++; + } + break; + case CHK__CHECK_INCONSIST_ACTION__CIA_IGNORE: + label = chk_engine_ccr2label(ccr, false); + /* Report the inconsistency without repair. */ + cbk->cb_statistics.cs_ignored++; + break; + default: + /* + * If the specified action is not applicable to the inconsistency, + * then switch to interaction mode for the decision from admin. + * + * Fall through. + */ + case CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT: + rc = chk_engine_cont_choose_label(ccr); + if (unlikely(rc == 0)) + goto out; + +interact: + label = chk_engine_ccr2label(ccr, false); + if (prop->cp_flags & CHK__CHECK_FLAG__CF_AUTO) { + /* Ignore the inconsistency if admin does not want interaction. */ + act = CHK__CHECK_INCONSIST_ACTION__CIA_IGNORE; + cbk->cb_statistics.cs_ignored++; + break; + } + + act = CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT; + + options[1] = CHK__CHECK_INCONSIST_ACTION__CIA_IGNORE; + snprintf(strs[1], CHK_MSG_BUFLEN - 1, + "Keep the inconsistent container label: %s (CS) vs %s (property), " + "repair nothing.", daos_iov_empty(&ccr->ccr_label_cs) ? "(null)" : + (char *)ccr->ccr_label_cs.iov_buf, ccr->ccr_label_prop != NULL ? + (char *)ccr->ccr_label_prop->dpp_entries[0].dpe_str : "(null)"); + d_iov_set(&iovs[1], strs[1], strlen(strs[1])); + + if (rc > 0) { + options[0] = CHK__CHECK_INCONSIST_ACTION__CIA_TRUST_PS; + snprintf(strs[0], CHK_MSG_BUFLEN - 1, + "Trust the container label %s in container service [suggested].", + (char *)ccr->ccr_label_cs.iov_buf); + d_iov_set(&iovs[0], strs[0], strlen(strs[0])); + + if (chk_engine_cont_target_label_empty(ccr)) { + option_nr = 2; + sgl.sg_nr = 2; + } else { + options[2] = CHK__CHECK_INCONSIST_ACTION__CIA_TRUST_TARGET; + snprintf(strs[2], CHK_MSG_BUFLEN - 1, + "Trust the container label %s in container property.", + (char *)ccr->ccr_label_prop->dpp_entries[0].dpe_str); + d_iov_set(&iovs[2], strs[2], strlen(strs[2])); + option_nr = 3; + sgl.sg_nr = 3; + } + } else { + options[0] = CHK__CHECK_INCONSIST_ACTION__CIA_TRUST_TARGET; + snprintf(strs[0], CHK_MSG_BUFLEN - 1, + "Trust the container label %s in container property [suggested].", + ccr->ccr_label_prop != NULL ? + (char *)ccr->ccr_label_prop->dpp_entries[0].dpe_str : "(null)"); + d_iov_set(&iovs[0], strs[0], strlen(strs[0])); + + D_ASSERT(chk_engine_cont_cs_label_empty(ccr)); + + option_nr = 2; + sgl.sg_nr = 2; + } + + sgl.sg_nr_out = 0; + sgl.sg_iovs = iovs; + + details = &sgl; + detail_nr = 1; + break; + } + +report: + cru.cru_gen = cbk->cb_gen; + cru.cru_cla = cla; + cru.cru_act = option_nr != 0 ? CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT : act; + cru.cru_rank = dss_self_rank(); + cru.cru_option_nr = option_nr; + cru.cru_detail_nr = detail_nr; + cru.cru_pool = (uuid_t *)&cpr->cpr_uuid; + cru.cru_pool_label = cpr->cpr_label; + cru.cru_cont = (uuid_t *)&ccr->ccr_uuid; + cru.cru_cont_label = label; + snprintf(msg, CHK_MSG_BUFLEN - 1, + "Check engine detects inconsistent container label: %s (CS) vs %s (property).\n", + daos_iov_empty(&ccr->ccr_label_cs) ? "(null)" : (char *)ccr->ccr_label_cs.iov_buf, + ccr->ccr_label_prop != NULL ? (char *)ccr->ccr_label_prop->dpp_entries[0].dpe_str : + "(null)"); + cru.cru_msg = msg; + cru.cru_options = options; + cru.cru_details = details; + cru.cru_result = result; + + rc = chk_engine_report(&cru, &seq, &decision); + + D_CDEBUG(result != 0 || rc != 0, DLOG_ERR, DLOG_INFO, + DF_ENGINE" detects inconsistent container label for "DF_UUIDF"/"DF_UUIDF + ": %s vs %s, action %u (%s), handle_rc %d, report_rc %d, decision %d\n", + DP_ENGINE(ins), DP_UUID(cpr->cpr_uuid), DP_UUID(ccr->ccr_uuid), + daos_iov_empty(&ccr->ccr_label_cs) ? "(null)" : (char *)ccr->ccr_label_cs.iov_buf, + ccr->ccr_label_prop != NULL ? (char *)ccr->ccr_label_prop->dpp_entries[0].dpe_str : + "(null)", act, option_nr ? "need interact" : "no interact", result, rc, decision); + + if (rc < 0 && option_nr > 0) { + cbk->cb_statistics.cs_failed++; + result = rc; + } + + if (rc > 0 || result != 0 || option_nr == 0) + goto out; + + option_nr = 0; + detail_nr = 0; + + switch (decision) { + +ignore: + default: + D_ERROR(DF_ENGINE" got invalid decision %d for inconsistent container label for " + DF_UUIDF"/"DF_UUIDF". Ignore the inconsistency.\n", + DP_ENGINE(ins), decision, DP_UUID(cpr->cpr_uuid), DP_UUID(ccr->ccr_uuid)); + /* + * Invalid option, ignore the inconsistency. + * + * Fall through. + */ + case CHK__CHECK_INCONSIST_ACTION__CIA_IGNORE: + act = CHK__CHECK_INCONSIST_ACTION__CIA_IGNORE; + cbk->cb_statistics.cs_ignored++; + break; + case CHK__CHECK_INCONSIST_ACTION__CIA_TRUST_PS: + if (chk_engine_cont_cs_label_empty(ccr)) + goto ignore; + + act = CHK__CHECK_INCONSIST_ACTION__CIA_TRUST_PS; + label = chk_engine_ccr2label(ccr, false); + if (prop->cp_flags & CHK__CHECK_FLAG__CF_DRYRUN) { + cbk->cb_statistics.cs_repaired++; + } else { + prop_tmp = chk_engine_build_label_prop(&ccr->ccr_label_cs); + if (prop_tmp == NULL) + D_GOTO(out, result = -DER_NOMEM); + + result = ds_cont_set_label(svc, ccr->ccr_uuid, prop_tmp, + ccr->ccr_label_prop, false); + if (result != 0) + cbk->cb_statistics.cs_failed++; + else + cbk->cb_statistics.cs_repaired++; + } + break; + case CHK__CHECK_INCONSIST_ACTION__CIA_TRUST_TARGET: + if (chk_engine_cont_target_label_empty(ccr)) + goto ignore; + + act = CHK__CHECK_INCONSIST_ACTION__CIA_TRUST_TARGET; + label = chk_engine_ccr2label(ccr, true); + if (prop->cp_flags & CHK__CHECK_FLAG__CF_DRYRUN) { + cbk->cb_statistics.cs_repaired++; + } else { + if (!daos_iov_empty(&ccr->ccr_label_cs)) { + prop_tmp = chk_engine_build_label_prop(&ccr->ccr_label_cs); + if (prop_tmp == NULL) + D_GOTO(out, result = -DER_NOMEM); + } + + result = ds_cont_set_label(svc, ccr->ccr_uuid, + ccr->ccr_label_prop, prop_tmp, true); + if (result != 0) + cbk->cb_statistics.cs_failed++; + else + cbk->cb_statistics.cs_repaired++; + } + break; + } + + goto report; + +out: + /* + * It is not fatal even if failed to repair inconsistent container label, + * then do not skip current container for subsequent DAOS check. + */ + + daos_prop_free(prop_tmp); + + return chk_engine_post_repair(cpr, &result, rc <= 0); +} + +static int +chk_engine_cont_label_cb(daos_handle_t ih, d_iov_t *key, d_iov_t *val, void *arg) +{ + struct chk_cont_label_cb_args *cclca = arg; + struct chk_cont_rec *ccr; + d_iov_t kiov; + d_iov_t riov; + int rc = 0; + + /* key is the label prop from CS::cs_uuids, must not be empty. */ + D_ASSERT(key != NULL); + D_ASSERT(key->iov_buf != NULL); + + d_iov_set(&kiov, val->iov_buf, val->iov_len); + d_iov_set(&riov, NULL, 0); + rc = dbtree_lookup(cclca->cclca_aggregator->ccla_toh, &kiov, &riov); + if (rc != 0) + /* + * The container only exists in the container service RDB, but not on + * any pool shard yet. It will be created on related pool shards when + * be opened next time. + */ + D_GOTO(out, rc = (rc == -DER_NONEXIST ? 0 : rc)); + + ccr = riov.iov_buf; + if (ccr->ccr_label_prop == NULL || + strncmp(key->iov_buf, ccr->ccr_label_prop->dpp_entries[0].dpe_str, + DAOS_PROP_LABEL_MAX_LEN) != 0) + rc = daos_iov_copy(&ccr->ccr_label_cs, key); + else + ccr->ccr_label_checked = 1; + +out: + if (!(cclca->cclca_cpr->cpr_ins->ci_prop.cp_flags & CHK__CHECK_FLAG__CF_FAILOUT)) + rc = 0; + + return rc; +} + +static int +chk_engine_cont_cleanup(struct chk_pool_rec *cpr, struct ds_pool_svc *ds_svc, + struct chk_cont_list_aggregator *aggregator) +{ + struct chk_instance *ins = cpr->cpr_ins; + struct cont_svc *svc; + struct chk_cont_rec *ccr; + struct chk_cont_label_cb_args cclca = { 0 }; + int rc = 0; + bool failout; + + if (ins->ci_prop.cp_flags & CHK__CHECK_FLAG__CF_FAILOUT) + failout = true; + else + failout = false; + svc = ds_pool_ps2cs(ds_svc); + + d_list_for_each_entry(ccr, &aggregator->ccla_list, ccr_link) { + rc = ds_cont_existence_check(svc, ccr->ccr_uuid, &ccr->ccr_label_prop); + if (rc == 0) + continue; + + if (rc != -DER_NONEXIST) { + D_CDEBUG(failout, DLOG_ERR, DLOG_DBG, + DF_ENGINE" on rank %u failed to check container " + DF_UUIDF"/"DF_UUIDF": "DF_RC"\n", DP_ENGINE(ins), + dss_self_rank(), DP_UUID(cpr->cpr_uuid), + DP_UUID(ccr->ccr_uuid), DP_RC(rc)); + + if (failout) + goto out; + + ccr->ccr_skip = 1; + continue; + } + + rc = chk_engine_cont_orphan(cpr, ccr, svc); + if (rc != 0) + goto out; + } + + cclca.cclca_aggregator = aggregator; + cclca.cclca_svc = svc; + cclca.cclca_cpr = cpr; + rc = ds_cont_iterate_labels(svc, chk_engine_cont_label_cb, &cclca); + if (rc != 0) + goto out; + + d_list_for_each_entry(ccr, &aggregator->ccla_list, ccr_link) { + if (!ccr->ccr_skip && !ccr->ccr_label_checked) { + rc = chk_engine_cont_set_label(cpr, ccr, svc); + if (rc != 0) + goto out; + } + } + +out: + return rc; +} + +static void +chk_engine_pool_notify(struct chk_pool_rec *cpr) +{ + struct chk_instance *ins =cpr->cpr_ins; + struct chk_bookmark *cbk = &cpr->cpr_bk; + struct chk_iv iv = { 0 }; + int rc; + + iv.ci_gen = cbk->cb_gen; + uuid_copy(iv.ci_uuid, cpr->cpr_uuid); + iv.ci_phase = cbk->cb_phase; + iv.ci_ins_status = ins->ci_bk.cb_ins_status; + iv.ci_pool_status = cbk->cb_pool_status; + iv.ci_from_psl = 1; + + if (!DAOS_FAIL_CHECK(DAOS_CHK_PS_NOTIFY_ENGINE)) { + /* + * Synchronously notify the pool shards with the new check status/phase. + * Because some engine maybe not the (refreshed) pool map. Then we will + * use ins->ci_iv_ns instead of pool->sp_iv_ns to send the notification + * to all engines. Otherwise, the engine out of the pool map cannot get + * the notification. + */ + rc = chk_iv_update(ins->ci_iv_ns, &iv, CRT_IV_SHORTCUT_NONE, CRT_IV_SYNC_EAGER, + true); + D_CDEBUG(rc != 0, DLOG_ERR, DLOG_INFO, + DF_ENGINE" on rank %u notify pool shards for "DF_UUIDF", phase %u, " + "ins_status %u, pool_status %u: rc = %d\n", + DP_ENGINE(ins), dss_self_rank(), DP_UUID(cpr->cpr_uuid), iv.ci_phase, + iv.ci_ins_status, iv.ci_pool_status, rc); + } + + if (!DAOS_FAIL_CHECK(DAOS_CHK_PS_NOTIFY_LEADER)) { + iv.ci_from_psl = 0; + iv.ci_to_leader = 1; + /* Synchronously notify the check leader with the new check status/phase. */ + rc = chk_iv_update(ins->ci_iv_ns, &iv, CRT_IV_SHORTCUT_TO_ROOT, + CRT_IV_SYNC_NONE, true); + D_CDEBUG(rc != 0, DLOG_ERR, DLOG_INFO, + DF_ENGINE" on rank %u notify check leader for "DF_UUIDF", phase %u, " + "ins_status %u, pool_status %u: rc = %d\n", + DP_ENGINE(ins), dss_self_rank(), DP_UUID(cpr->cpr_uuid), + iv.ci_phase, iv.ci_ins_status, iv.ci_pool_status, rc); + } +} + +static void +chk_engine_pool_ult(void *args) +{ + struct chk_pool_mbs_args *cpma = args; + struct chk_pool_rec *cpr = cpma->cpma_cpr; + struct ds_pool_svc *svc = cpma->cpma_svc; + struct chk_instance *ins = cpr->cpr_ins; + struct chk_bookmark *cbk = &cpr->cpr_bk; + struct pool_map *map = NULL; + struct chk_cont_list_aggregator aggregator = { 0 }; + char uuid_str[DAOS_UUID_STR_SIZE]; + int i; + int rc = 0; + int rc1 = 0; + int rc2 = 0; + bool update = true; + + D_ASSERT(svc != NULL); + D_ASSERT(cpr != NULL); + D_ASSERT(cpr->cpr_mbs != NULL); + D_ASSERTF(cbk->cb_phase >= CHK__CHECK_SCAN_PHASE__CSP_POOL_MBS, + "Invalid check phase %u for pool "DF_UUIDF"\n", + cbk->cb_phase, DP_UUID(cpr->cpr_uuid)); + + D_INFO(DF_ENGINE" pool ult enter for "DF_UUIDF"\n", DP_ENGINE(ins), DP_UUID(cpr->cpr_uuid)); + + uuid_unparse_lower(cpr->cpr_uuid, uuid_str); + + if (cpr->cpr_stop) + goto out; + + if (cbk->cb_phase > CHK__CHECK_SCAN_PHASE__CSP_POOL_CLEANUP) + goto cont; + + if (cbk->cb_phase < CHK__CHECK_SCAN_PHASE__CSP_POOL_CLEANUP) { + cbk->cb_phase = CHK__CHECK_SCAN_PHASE__CSP_POOL_CLEANUP; + chk_engine_pool_notify(cpr); + /* QUEST: How to estimate the left time? */ + cbk->cb_time.ct_left_time = CHK__CHECK_SCAN_PHASE__CSP_DONE - cbk->cb_phase; + rc = chk_bk_update_pool(cbk, uuid_str); + if (rc != 0 || cpr->cpr_stop) + goto out; + } + + rc = ds_pool_svc_load_map(svc, &map); + if (rc != 0) + goto out; + + for (i = 0; i < cpr->cpr_shard_nr && !cpr->cpr_stop; i++) { + rc = chk_engine_pool_mbs_one(cpr, map, &cpr->cpr_mbs[i]); + if (rc != 0 || cpr->cpr_skip || cpr->cpr_stop) + goto out; + } + + /* Lookup for dangling entry in the pool map. */ + rc = chk_engine_find_dangling_pm(cpr, map); + if (rc != 0 || cpr->cpr_skip || cpr->cpr_stop) + goto out; + + if (cpr->cpr_map_refreshed) { + /* + * Under dryrun mode, we cannot make the changed pool map to be used by + * subsequent check, then have to skip it. + */ + if (ins->ci_prop.cp_flags & CHK__CHECK_FLAG__CF_DRYRUN) { + cpr->cpr_skip = 1; + goto out; + } + + /* + * Flush the pool map to persistent storage and + * distribute the pool map to other pool shards. + */ + rc = ds_pool_svc_flush_map(svc, map); + if (rc != 0 || cpr->cpr_skip || cpr->cpr_stop) + goto out; + } + + if (cpr->cpr_delay_label) { + rc = chk_engine_bad_pool_label(cpr, svc); + if (rc != 0 || cpr->cpr_skip || cpr->cpr_stop) + goto out; + + if (DAOS_FAIL_CHECK(DAOS_CHK_LEADER_BLOCK)) { + while (!cpr->cpr_stop) + dss_sleep(300); + goto out; + } + } + + /* + * Cleanup all old connections. It is no matter even if we cannot evict some + * old connections. That is also independent from former check phases result. + */ + ds_pool_svc_evict_all(svc); + + if (cpr->cpr_stop) + goto out; + +cont: + if (cbk->cb_phase < CHK__CHECK_SCAN_PHASE__CSP_CONT_LIST) { + cbk->cb_phase = CHK__CHECK_SCAN_PHASE__CSP_CONT_LIST; + chk_engine_pool_notify(cpr); + /* QUEST: How to estimate the left time? */ + cbk->cb_time.ct_left_time = CHK__CHECK_SCAN_PHASE__CSP_DONE - cbk->cb_phase; + rc = chk_bk_update_pool(cbk, uuid_str); + if (rc != 0 || cpr->cpr_stop) + goto out; + } + + if (unlikely(cbk->cb_phase > CHK__CHECK_SCAN_PHASE__CSP_CONT_CLEANUP)) + goto out; + + rc = chk_engine_cont_list_init(cpr->cpr_uuid, &aggregator); + if (rc != 0) + goto out; + + /* Collect containers from pool shards. */ + rc = chk_cont_list_remote(ds_pool_svc2pool(svc), cbk->cb_gen, + chk_engine_cont_list_remote_cb, &aggregator); + if (rc != 0 || cpr->cpr_stop) + goto out; + + if (cbk->cb_phase < CHK__CHECK_SCAN_PHASE__CSP_CONT_CLEANUP) { + cbk->cb_phase = CHK__CHECK_SCAN_PHASE__CSP_CONT_CLEANUP; + chk_engine_pool_notify(cpr); + /* QUEST: How to estimate the left time? */ + cbk->cb_time.ct_left_time = CHK__CHECK_SCAN_PHASE__CSP_DONE - cbk->cb_phase; + rc = chk_bk_update_pool(cbk, uuid_str); + if (rc != 0 || cpr->cpr_stop) + goto out; + } + + rc = chk_engine_cont_cleanup(cpr, svc, &aggregator); + if (rc != 0) + goto out; + + rc = ds_pool_svc_schedule_reconf(svc); + +out: + chk_engine_cont_list_fini(&aggregator); + if (map != NULL) + pool_map_decref(map); + + /* + * If someone wants to stop (cpr_stop) the pool ULT, then it needs to + * update related pool bookmark and notify other pool shards by itself. + */ + if (!cpr->cpr_stop) { + if (rc != 0) { + cbk->cb_pool_status = CHK__CHECK_POOL_STATUS__CPS_FAILED; + } else { + /* + * This may be caused by former chk_engine_pool_notify() failed to + * notify some other check engine(s) and the leader about the pool + * status. It will be synced this time. + */ + cbk->cb_pool_status = CHK__CHECK_POOL_STATUS__CPS_CHECKED; + if (likely(cbk->cb_phase != CHK__CHECK_SCAN_PHASE__CSP_DONE)) + cbk->cb_phase = CHK__CHECK_SCAN_PHASE__CSP_DONE; + else + update = false; + } + chk_engine_pool_notify(cpr); + cbk->cb_time.ct_stop_time = time(NULL); + if (likely(update)) + rc1 = chk_bk_update_pool(cbk, uuid_str); + + if (cbk->cb_pool_status == CHK__CHECK_POOL_STATUS__CPS_CHECKED && + !cpr->cpr_not_export_ps) { + chk_pool_start_svc(cpr, &rc2); + if (cpr->cpr_started && cpr->cpr_start_post) + /* + * The pool may has been marked as non-connectable before + * corruption, re-enable it to allow new connection. + */ + rc2 = ds_pool_mark_connectable(svc); + } + } + + D_CDEBUG(rc != 0 || rc1 != 0 || rc2 != 0, DLOG_ERR, DLOG_INFO, + DF_ENGINE" on rank %u exit pool ULT for "DF_UUIDF" with %s stop: %d/%d/%d\n", + DP_ENGINE(ins), dss_self_rank(), DP_UUID(cpr->cpr_uuid), + cpr->cpr_stop ? "external" : "self", rc, rc1, rc2); + + ds_pool_svc_put_leader(svc); + cpr->cpr_done = 1; + if (ins->ci_sched_running && !ins->ci_sched_exiting && + (cbk->cb_pool_status != CHK__CHECK_POOL_STATUS__CPS_CHECKED || cpr->cpr_not_export_ps)) + d_list_add_tail(&cpr->cpr_shutdown_link, &ins->ci_pool_shutdown_list); + else + chk_pool_put(cpr); + D_FREE(cpma); +} + +static void +chk_engine_sched(void *args) +{ + struct chk_instance *ins = args; + struct chk_bookmark *cbk = &ins->ci_bk; + struct chk_pool_rec *cpr; + uint32_t ins_phase; + uint32_t ins_status; + uint32_t pool_status; + d_rank_t myrank = dss_self_rank(); + int done = 0; + int rc = 0; + + D_INFO(DF_ENGINE" scheduler on rank %u entry at phase %u\n", + DP_ENGINE(ins), myrank, cbk->cb_phase); + + while (!ins->ci_sched_exiting) { + dss_sleep(300); + + /* Someone wants to stop the check. */ + if (ins->ci_sched_exiting) + D_GOTO(out, rc = 0); + + ins_phase = chk_pools_find_slowest(ins, &done); + + /* + * Check @done before update cb_phase. Otherwise, the cb_phase may has become 'DONE' + * but cb_ins_status is still 'RUNNING'. + */ + if (done != 0) { + if (done > 0) { + D_INFO(DF_ENGINE" on rank %u has done\n", DP_ENGINE(ins), myrank); + rc = 1; + } else { + D_INFO(DF_ENGINE" on rank %u is stopped\n", DP_ENGINE(ins), myrank); + rc = 0; + } + + D_GOTO(out, rc); + } + + if (ins_phase > cbk->cb_phase) { + D_INFO(DF_ENGINE" on rank %u moves from phase %u to phase %u\n", + DP_ENGINE(ins), myrank, cbk->cb_phase, ins_phase); + + cbk->cb_phase = ins_phase; + /* QUEST: How to estimate the left time? */ + cbk->cb_time.ct_left_time = CHK__CHECK_SCAN_PHASE__CSP_DONE - cbk->cb_phase; + rc = chk_bk_update_engine(cbk); + if (rc != 0) + goto out; + } + + while ((cpr = d_list_pop_entry(&ins->ci_pool_shutdown_list, struct chk_pool_rec, + cpr_shutdown_link)) != NULL) { + chk_pool_shutdown(cpr, false); + chk_pool_put(cpr); + } + } + +out: + ins_phase = CHK_INVAL_PHASE; + if (rc > 0) { + /* + * If failed to check some pool(s), then the engine will be marked as 'failed'. + * It means that there is at least one failure during DAOS check on this engine. + * pool_status is useless under this case since all pools have done. + */ + if (ins->ci_slowest_fail_phase != CHK_INVAL_PHASE && + ins->ci_prop.cp_flags & CHK__CHECK_FLAG__CF_FAILOUT) { + ins_phase = ins->ci_slowest_fail_phase; + ins_status = CHK__CHECK_INST_STATUS__CIS_FAILED; + pool_status = CHK__CHECK_POOL_STATUS__CPS_IMPLICATED; + } else { + ins_phase = CHK__CHECK_SCAN_PHASE__CSP_DONE; + ins_status = CHK__CHECK_INST_STATUS__CIS_COMPLETED; + pool_status = CHK__CHECK_POOL_STATUS__CPS_CHECKED; + } + } else if (rc == 0) { + if (ins->ci_implicated) { + ins_status = CHK__CHECK_INST_STATUS__CIS_IMPLICATED; + pool_status = CHK__CHECK_POOL_STATUS__CPS_IMPLICATED; + } else if (ins->ci_stopping) { + ins_status = CHK__CHECK_INST_STATUS__CIS_STOPPED; + pool_status = CHK__CHECK_POOL_STATUS__CPS_STOPPED; + } else { + ins_status = CHK__CHECK_INST_STATUS__CIS_PAUSED; + pool_status = CHK__CHECK_POOL_STATUS__CPS_PAUSED; + } + } else { + ins_status = CHK__CHECK_INST_STATUS__CIS_FAILED; + pool_status = CHK__CHECK_POOL_STATUS__CPS_IMPLICATED; + } + + /* The pool scan ULTs will be terminated via chk_engine_exit(). */ + chk_engine_exit(ins, ins_phase, ins_status, pool_status); + + D_INFO(DF_ENGINE" scheduler on rank %u exit at phase %u with status %u: rc %d\n", + DP_ENGINE(ins), myrank, cbk->cb_phase, ins_status, rc); + + ins->ci_sched_running = 0; +} + +static int +chk_engine_start_prep(struct chk_instance *ins, uint32_t rank_nr, d_rank_t *ranks, + uint32_t policy_nr, struct chk_policy *policies, int pool_nr, + uuid_t pools[], uint64_t gen, int phase, uint32_t api_flags, + d_rank_t leader, uint32_t flags) +{ + struct chk_traverse_pools_args ctpa = { 0 }; + struct chk_bookmark *cbk = &ins->ci_bk; + struct chk_property *prop = &ins->ci_prop; + d_rank_list_t *rank_list = NULL; + uint32_t cbk_phase = CHK__CHECK_SCAN_PHASE__CSP_DONE; + int rc = 0; + + /* Check leader has already verified related parameters, trust them. */ + + ins->ci_start_flags = flags; + + if (chk_is_on_leader(gen, leader, true)) { + d_rank_list_free(ins->ci_ranks); + ins->ci_ranks = NULL; + rc = chk_prop_fetch(prop, &ins->ci_ranks); + if (rc != 0) + goto out; + } else { + rank_list = uint32_array_to_rank_list(ranks, rank_nr); + if (rank_list == NULL) + D_GOTO(out, rc = -DER_NOMEM); + + d_rank_list_sort(rank_list); + } + + if (ins->ci_start_flags & CSF_RESET_ALL) + goto reset; + + if (pool_nr > 0) { + rc = chk_pools_load_list(ins, gen, api_flags, pool_nr, pools, &cbk_phase); + if (rc != 0) + goto out; + } else { + ctpa.ctpa_ins = ins; + ctpa.ctpa_gen = gen; + ctpa.ctpa_phase = cbk_phase; + rc = chk_traverse_pools(chk_pools_load_from_db, &ctpa); + if (rc != 0) + goto out; + + cbk_phase = ctpa.ctpa_phase; + } + + if (d_list_empty(&ins->ci_pool_list) && !(api_flags & CHK__CHECK_FLAG__CF_ORPHAN_POOL)) + D_GOTO(out, rc = 1); + + goto init; + +reset: + ctpa.ctpa_ins = ins; + ctpa.ctpa_gen = gen; + rc = chk_traverse_pools(chk_pools_cleanup_cb, &ctpa); + if (rc != 0) + goto out; + + if (pool_nr > 0) { + rc = chk_pools_load_list(ins, gen, api_flags, pool_nr, pools, NULL); + if (rc != 0) + goto out; + } else { + rc = ds_mgmt_tgt_pool_iterate(chk_pools_add_from_dir, &ctpa); + if (rc != 0) + goto out; + + rc = ds_mgmt_newborn_pool_iterate(chk_pools_add_from_dir, &ctpa); + if (rc != 0) + goto out; + + rc = ds_mgmt_zombie_pool_iterate(chk_pools_add_from_dir, &ctpa); + if (rc != 0) + goto out; + } + + memset(cbk, 0, sizeof(*cbk)); + cbk->cb_magic = CHK_BK_MAGIC_ENGINE; + cbk->cb_version = DAOS_CHK_VERSION; + cbk_phase = CHK__CHECK_SCAN_PHASE__CSP_PREPARE; + +init: + if (!chk_is_on_leader(gen, leader, true)) { + rc = chk_prop_prepare(leader, api_flags, phase, policy_nr, policies, rank_list, + prop); + if (rc != 0) + goto out; + + if (rank_list != NULL) { + d_rank_list_free(ins->ci_ranks); + ins->ci_ranks = rank_list; + rank_list = NULL; + } + } + + /* The engine bookmark will be stored via chk_engine_start_post() later. */ + if (cbk->cb_phase > cbk_phase) + cbk->cb_phase = cbk_phase; + + cbk->cb_gen = gen; + if (api_flags & CHK__CHECK_FLAG__CF_RESET && !(ins->ci_start_flags & CSF_RESET_ALL)) { + memset(&cbk->cb_statistics, 0, sizeof(cbk->cb_statistics)); + memset(&cbk->cb_time, 0, sizeof(cbk->cb_time)); + } + + ins->ci_slowest_fail_phase = CHK_INVAL_PHASE; + +out: + d_rank_list_free(rank_list); + if (rc < 0) { + /* Reset ci_ranks if hit failure, then we can reload when use it next time. */ + d_rank_list_free(ins->ci_ranks); + ins->ci_ranks = NULL; + } + + return rc; +} + +static int +chk_engine_start_post(struct chk_instance *ins) +{ + struct chk_pool_rec *cpr; + struct chk_bookmark *ins_cbk = &ins->ci_bk; + struct chk_bookmark *pool_cbk; + char uuid_str[DAOS_UUID_STR_SIZE]; + uint32_t phase = CHK__CHECK_SCAN_PHASE__CSP_DONE; + int rc = 0; + + d_list_for_each_entry(cpr, &ins->ci_pool_list, cpr_link) { + pool_cbk = &cpr->cpr_bk; + + if (pool_cbk->cb_phase == CHK__CHECK_SCAN_PHASE__CSP_DONE) + continue; + + if (phase > pool_cbk->cb_phase) + phase = pool_cbk->cb_phase; + + pool_cbk->cb_gen = ins_cbk->cb_gen; + pool_cbk->cb_pool_status = CHK__CHECK_POOL_STATUS__CPS_CHECKING; + /* Always refresh the start time. */ + pool_cbk->cb_time.ct_start_time = time(NULL); + /* QUEST: How to estimate the left time? */ + pool_cbk->cb_time.ct_left_time = CHK__CHECK_SCAN_PHASE__CSP_DONE - + pool_cbk->cb_phase; + + uuid_unparse_lower(cpr->cpr_uuid, uuid_str); + rc = chk_bk_update_pool(pool_cbk, uuid_str); + if (rc != 0) + break; + } + + if (rc == 0) { + /* + * The phase may be CHK__CHECK_SCAN_PHASE__CSP_DONE, it is fine. + * + * The phase in engine bookmark may be larger than the phase in + * some pools that may be new added into current check instance. + * So we allow the phase to backward. + */ + ins_cbk->cb_phase = phase; + ins_cbk->cb_ins_status = CHK__CHECK_INST_STATUS__CIS_RUNNING; + /* Always refresh the start time. */ + ins_cbk->cb_time.ct_start_time = time(NULL); + /* QUEST: How to estimate the left time? */ + ins_cbk->cb_time.ct_left_time = CHK__CHECK_SCAN_PHASE__CSP_DONE - + ins_cbk->cb_phase; + rc = chk_bk_update_engine(ins_cbk); + if (rc == 0) { + d_list_for_each_entry(cpr, &ins->ci_pool_list, cpr_link) + /* Shutdown former instance left opened pool. */ + chk_pool_shutdown(cpr, false); + } + } + + return rc; +} + +static int +chk_engine_pool_filter(uuid_t uuid, void *arg, int *phase) +{ + struct chk_instance *ins = arg; + struct chk_pool_rec *cpr; + d_iov_t kiov; + d_iov_t riov; + int rc; + + D_ASSERT(ins != NULL); + + d_iov_set(&riov, NULL, 0); + d_iov_set(&kiov, uuid, sizeof(uuid_t)); + + rc = dbtree_lookup(ins->ci_pool_hdl, &kiov, &riov); + if (rc == 0) { + cpr = (struct chk_pool_rec *)riov.iov_buf; + *phase = cpr->cpr_bk.cb_phase; + D_ASSERT(*phase >= 0); + } else if (rc == -DER_NONEXIST && ins->ci_start_flags & CSF_ORPHAN_POOL) { + *phase = CHK_INVAL_PHASE; + rc = 0; + } + + return rc; +} + +int +chk_engine_start(uint64_t gen, uint32_t rank_nr, d_rank_t *ranks, uint32_t policy_nr, + struct chk_policy *policies, int pool_nr, uuid_t pools[], uint32_t api_flags, + int phase, d_rank_t leader, uint32_t flags, uuid_t iv_uuid, + struct ds_pool_clues *clues) +{ + struct chk_instance *ins = chk_engine; + struct chk_bookmark *cbk = &ins->ci_bk; + struct umem_attr uma = { 0 }; + char uuid_str[DAOS_UUID_STR_SIZE]; + d_rank_t myrank = dss_self_rank(); + int rc; + int rc1; + + rc = chk_ins_can_start(ins); + if (rc != 0) + goto out_log; + + ins->ci_starting = 1; + ins->ci_started = 0; + ins->ci_start_flags = 0; + ins->ci_for_orphan = 0; + ins->ci_orphan_done = 0; + ins->ci_implicated = 0; + ins->ci_pool_stopped = 0; + + D_ASSERT(daos_handle_is_inval(ins->ci_pool_hdl)); + D_ASSERT(d_list_empty(&ins->ci_pool_list)); + + D_ASSERT(daos_handle_is_inval(ins->ci_pending_hdl)); + + if (ins->ci_sched != ABT_THREAD_NULL) + ABT_thread_free(&ins->ci_sched); + + chk_iv_ns_cleanup(&ins->ci_iv_ns); + + if (ins->ci_iv_group != NULL) { + crt_group_secondary_destroy(ins->ci_iv_group); + ins->ci_iv_group = NULL; + } + + uma.uma_id = UMEM_CLASS_VMEM; + + rc = dbtree_create_inplace(DBTREE_CLASS_CHK_POOL, 0, CHK_BTREE_ORDER, &uma, + &ins->ci_pool_btr, &ins->ci_pool_hdl); + if (rc != 0) + goto out_tree; + + rc = dbtree_create_inplace(DBTREE_CLASS_CHK_PA, 0, CHK_BTREE_ORDER, &uma, + &ins->ci_pending_btr, &ins->ci_pending_hdl); + if (rc != 0) + goto out_tree; + + rc = chk_engine_start_prep(ins, rank_nr, ranks, policy_nr, policies, + pool_nr, pools, gen, phase, api_flags, leader, flags); + if (rc != 0) + goto out_tree; + + if (chk_is_on_leader(gen, leader, true)) { + ins->ci_iv_ns = chk_leader_get_iv_ns(); + if (unlikely(ins->ci_iv_ns == NULL)) + goto out_tree; + } else { + uuid_unparse_lower(iv_uuid, uuid_str); + rc = crt_group_secondary_create(uuid_str, NULL, ins->ci_ranks, &ins->ci_iv_group); + if (rc != 0) + goto out_tree; + + rc = ds_iv_ns_create(dss_get_module_info()->dmi_ctx, iv_uuid, ins->ci_iv_group, + &ins->ci_iv_id, &ins->ci_iv_ns); + if (rc != 0) + goto out_group; + + ds_iv_ns_update(ins->ci_iv_ns, leader, ins->ci_iv_ns->iv_master_term + 1); + } + + uuid_copy(cbk->cb_iv_uuid, iv_uuid); + rc = chk_engine_start_post(ins); + if (rc != 0) + goto out_stop; + + rc = ds_pool_clues_init(chk_engine_pool_filter, ins, clues); + if (rc != 0) + goto out_stop; + + ins->ci_sched_running = 1; + + rc = dss_ult_create(chk_engine_sched, ins, DSS_XS_SYS, 0, DSS_DEEP_STACK_SZ, + &ins->ci_sched); + if (rc != 0) { + ins->ci_sched_running = 0; + goto out_stop; + } + + goto out_done; + +out_stop: + chk_pool_stop_all(ins, CHK__CHECK_POOL_STATUS__CPS_IMPLICATED, NULL); + if (cbk->cb_ins_status == CHK__CHECK_INST_STATUS__CIS_RUNNING) { + cbk->cb_time.ct_stop_time = time(NULL); + cbk->cb_ins_status = CHK__CHECK_INST_STATUS__CIS_FAILED; + rc1 = chk_bk_update_engine(cbk); + if (rc1 != 0) + D_WARN(DF_ENGINE" failed to update engine bookmark: "DF_RC"\n", + DP_ENGINE(ins), DP_RC(rc1)); + } + chk_iv_ns_cleanup(&ins->ci_iv_ns); +out_group: + if (ins->ci_iv_group != NULL) { + crt_group_secondary_destroy(ins->ci_iv_group); + ins->ci_iv_group = NULL; + } +out_tree: + chk_destroy_pending_tree(ins); + chk_destroy_pool_tree(ins); +out_done: + ins->ci_starting = 0; +out_log: + if (rc >= 0) { + D_INFO(DF_ENGINE " %s on rank %u with api_flags %x, phase %d, leader %u, " + "flags %x, iv "DF_UUIDF": rc %d\n", + DP_ENGINE(ins), chk_is_ins_reset(ins, api_flags) ? "start" : "resume", + myrank, api_flags, phase, leader, flags, DP_UUID(iv_uuid), rc); + + chk_ranks_dump(ins->ci_ranks->rl_nr, ins->ci_ranks->rl_ranks); + chk_pools_dump(&ins->ci_pool_list, pool_nr, pools); + } else { + D_ERROR(DF_ENGINE" failed to start on rank %u with %d pools, api_flags %x, " + "phase %d, leader %u, flags %x, gen "DF_X64", iv "DF_UUIDF": "DF_RC"\n", + DP_ENGINE(ins), myrank, pool_nr, api_flags, phase, leader, flags, gen, + DP_UUID(iv_uuid), DP_RC(rc)); + } + + return rc; +} + +int +chk_engine_stop(uint64_t gen, int pool_nr, uuid_t pools[], uint32_t *flags) +{ + struct chk_instance *ins = chk_engine; + struct chk_bookmark *cbk = &ins->ci_bk; + struct chk_pool_rec *cpr; + d_rank_t myrank = dss_self_rank(); + int rc = 0; + int i; + int active = false; + + if (gen != 0 && gen != cbk->cb_gen) + D_GOTO(log, rc = -DER_NOTAPPLICABLE); + + if (cbk->cb_magic != CHK_BK_MAGIC_ENGINE) + D_GOTO(log, rc = -DER_NOTAPPLICABLE); + + if (ins->ci_starting) + D_GOTO(log, rc = -DER_BUSY); + + if (ins->ci_stopping || ins->ci_sched_exiting) + D_GOTO(log, rc = -DER_INPROGRESS); + + if (cbk->cb_ins_status != CHK__CHECK_INST_STATUS__CIS_RUNNING) + D_GOTO(log, rc = -DER_ALREADY); + + ins->ci_stopping = 1; + + D_INFO(DF_ENGINE" stopping on rank %u with %d pools\n", DP_ENGINE(ins), myrank, pool_nr); + + if (pool_nr == 0) { + chk_pool_stop_all(ins, CHK__CHECK_POOL_STATUS__CPS_STOPPED, &rc); + if (rc != 0) + D_GOTO(out, rc); + } else { + for (i = 0; i < pool_nr; i++) { + chk_pool_stop_one(ins, pools[i], CHK__CHECK_POOL_STATUS__CPS_STOPPED, + CHK_INVAL_PHASE, &rc); + if (rc != 0) + D_GOTO(out, rc); + } + } + + if (ins->ci_pool_stopped) + *flags = CSF_POOL_STOPPED; + + d_list_for_each_entry(cpr, &ins->ci_pool_list, cpr_link) { + if (!cpr->cpr_done && !cpr->cpr_skip && !cpr->cpr_stop) { + D_ASSERTF(pool_nr != 0, "Hit active pool "DF_UUIDF" after stop all\n", + DP_UUID(cpr->cpr_uuid)); + + active = true; + break; + } + } + + if (!active) { + chk_stop_sched(ins); + /* To indicate that there is no active pool(s) on this rank. */ + rc = 1; + } + +out: + ins->ci_pool_stopped = 0; + ins->ci_stopping = 0; +log: + if (rc >= 0 || rc == -DER_ALREADY) { + D_INFO(DF_ENGINE" stopped on rank %u with %d pools: rc %d\n", DP_ENGINE(ins), + myrank, pool_nr, rc); + + chk_pools_dump(NULL, pool_nr, pools); + if (rc == -DER_ALREADY) + rc = 1; + } else { + D_ERROR(DF_ENGINE" failed to stop on rank %u with %d pools, " + "gen "DF_X64": "DF_RC"\n", DP_ENGINE(ins), myrank, pool_nr, gen, DP_RC(rc)); + } + + return rc; +} + +/* Query one pool shard on one xstream. */ +static int +chk_engine_query_one(void *args) +{ + struct chk_query_pool_shard *shard = args; + struct chk_query_target *target; + char *path = NULL; + daos_handle_t poh = DAOS_HDL_INVAL; + vos_pool_info_t info; + int tid = dss_get_module_info()->dmi_tgt_id; + int rc; + + target = &shard->cqps_targets[tid]; + target->cqt_rank = dss_self_rank(); + target->cqt_tgt = tid; + + rc = ds_mgmt_tgt_pool_exist(shard->cqps_uuid, &path); + /* We allow the target nonexist. */ + if (rc <= 0) + goto out; + + rc = vos_pool_open(path, shard->cqps_uuid, VOS_POF_FOR_CHECK_QUERY, &poh); + if (rc != 0) { + D_ERROR("Failed to open vos pool "DF_UUIDF" on target %u/%d: "DF_RC"\n", + DP_UUID(shard->cqps_uuid), dss_self_rank(), tid, DP_RC(rc)); + goto out; + } + + rc = vos_pool_query(poh, &info); + if (rc != 0) { + D_ERROR("Failed to query vos pool "DF_UUIDF" on target %u/%d: "DF_RC"\n", + DP_UUID(shard->cqps_uuid), dss_self_rank(), tid, DP_RC(rc)); + goto out; + } + + target->cqt_ins_status = info.pif_chk.cpi_ins_status; + target->cqt_statistics = info.pif_chk.cpi_statistics; + target->cqt_time = info.pif_chk.cpi_time; + +out: + if (daos_handle_is_valid(poh)) + vos_pool_close(poh); + D_FREE(path); + return rc; +} + +static int +chk_engine_query_pool(uuid_t uuid, void *args) +{ + struct chk_query_pool_args *cqpa = args; + struct chk_query_pool_shard *shard; + struct chk_query_pool_shard *new_shards; + struct chk_bookmark cbk; + struct dss_coll_args coll_args = { 0 }; + struct dss_coll_ops coll_ops = { 0 }; + char uuid_str[DAOS_UUID_STR_SIZE]; + int rc = 0; + + if (cqpa->cqpa_idx == cqpa->cqpa_cap) { + D_REALLOC_ARRAY(new_shards, cqpa->cqpa_shards, cqpa->cqpa_cap, cqpa->cqpa_cap << 1); + if (new_shards == NULL) + D_GOTO(out, rc = -DER_NOMEM); + + cqpa->cqpa_shards = new_shards; + cqpa->cqpa_cap <<= 1; + } + + shard = &cqpa->cqpa_shards[cqpa->cqpa_idx++]; + uuid_copy(shard->cqps_uuid, uuid); + shard->cqps_rank = dss_self_rank(); + + uuid_unparse_lower(uuid, uuid_str); + rc = chk_bk_fetch_pool(&cbk, uuid_str); + if (rc == -DER_NONEXIST) { + shard->cqps_status = CHK__CHECK_POOL_STATUS__CPS_UNCHECKED; + shard->cqps_phase = CHK__CHECK_SCAN_PHASE__CSP_PREPARE; + memset(&shard->cqps_statistics, 0, sizeof(shard->cqps_statistics)); + memset(&shard->cqps_time, 0, sizeof(shard->cqps_time)); + shard->cqps_target_nr = 0; + shard->cqps_targets = NULL; + + D_GOTO(out, rc = 0); + } + + if (rc != 0) + goto out; + + D_ALLOC_ARRAY(shard->cqps_targets, dss_tgt_nr); + if (shard->cqps_targets == NULL) + D_GOTO(out, rc = -DER_NOMEM); + + shard->cqps_status = cbk.cb_pool_status; + shard->cqps_phase = cbk.cb_phase; + memcpy(&shard->cqps_statistics, &cbk.cb_statistics, sizeof(shard->cqps_statistics)); + memcpy(&shard->cqps_time, &cbk.cb_time, sizeof(shard->cqps_time)); + shard->cqps_target_nr = dss_tgt_nr; + + coll_ops.co_func = chk_engine_query_one; + coll_args.ca_func_args = shard; + + rc = dss_thread_collective_reduce(&coll_ops, &coll_args, 0); + +out: + D_CDEBUG(rc != 0, DLOG_ERR, DLOG_DBG, + DF_ENGINE" on rank %u query pool "DF_UUIDF": "DF_RC"\n", + DP_ENGINE(cqpa->cqpa_ins), dss_self_rank(), DP_UUID(uuid), DP_RC(rc)); + return rc; +} + +int +chk_engine_query(uint64_t gen, int pool_nr, uuid_t pools[], uint32_t *ins_status, + uint32_t *ins_phase, uint32_t *shard_nr, struct chk_query_pool_shard **shards, + uint64_t *l_gen) +{ + struct chk_instance *ins = chk_engine; + struct chk_query_pool_args cqpa = { 0 }; + int rc = 0; + int i; + + /* + * We will support to check query from new check leader under the case of old leader + * crashed, that may have different check generation. So do not check "cb_gen" here, + * instead, current engine's "cb_gen" will be returned to leader to indicate whether + * it is new leader or not. + */ + + cqpa.cqpa_ins = ins; + cqpa.cqpa_cap = 2; + cqpa.cqpa_idx = 0; + D_ALLOC_ARRAY(cqpa.cqpa_shards, cqpa.cqpa_cap); + if (cqpa.cqpa_shards == NULL) + D_GOTO(log, rc = -DER_NOMEM); + + if (pool_nr == 0) { + rc = ds_mgmt_tgt_pool_iterate(chk_engine_query_pool, &cqpa); + } else { + for (i = 0; i < pool_nr; i++) { + rc = chk_engine_query_pool(pools[i], &cqpa); + if (rc != 0) + goto log; + } + } + +log: + if (rc != 0) { + chk_query_free(cqpa.cqpa_shards, cqpa.cqpa_idx); + } else { + *ins_status = ins->ci_bk.cb_ins_status; + *ins_phase = ins->ci_bk.cb_phase; + *shards = cqpa.cqpa_shards; + *shard_nr = cqpa.cqpa_idx; + *l_gen = ins->ci_bk.cb_gen; + } + + D_CDEBUG(rc != 0, DLOG_ERR, DLOG_DBG, + DF_ENGINE" on rank %u handle query with gen "DF_X64" for %d pools, status %u, " + "phase %u: "DF_RC"\n", DP_ENGINE(ins), dss_self_rank(), gen, pool_nr, + ins->ci_bk.cb_ins_status, ins->ci_bk.cb_phase, DP_RC(rc)); + + return rc; +} + +int +chk_engine_mark_rank_dead(uint64_t gen, d_rank_t rank, uint32_t version) +{ + struct chk_instance *ins = chk_engine; + struct chk_property *prop = &ins->ci_prop; + struct chk_bookmark *cbk = &ins->ci_bk; + d_rank_list_t *rank_list = NULL; + int rc = 0; + + if (cbk->cb_gen != gen) + D_GOTO(out, rc = -DER_NOTAPPLICABLE); + + rc = chk_prop_fetch(prop, &rank_list); + if (rc != 0) + goto out; + + D_ASSERT(rank_list != NULL); + + /* For check engine on the leader, related rank has already been marked as "dead". */ + if (chk_is_on_leader(cbk->cb_gen, prop->cp_leader, true)) + goto group; + + if (!chk_remove_rank_from_list(rank_list, rank)) + D_GOTO(out, rc = -DER_NOTAPPLICABLE); + + prop->cp_rank_nr--; + rc = chk_prop_update(prop, rank_list); + if (rc != 0) + goto out; + + /* + * NOTE: If the rank dead before DAOS check start, then subsequent check start will + * get failure, and then the control plane needs to decide whether or not to + * exclude the dead rank from the system and re-run DAOS check. + * + * If the rank dead at (or before) CHK__CHECK_SCAN_PHASE__CSP_CONT_LIST, then + * related PS leader(s) will know that when list the containers, and then mark + * related pool(s) as 'failed'. + * + * If the rank dead after CHK__CHECK_SCAN_PHASE__CSP_CONT_LIST, then if such + * rank is not involved in subsequent DAOS check, then no affect for current + * check instance; otherwise, related pool(s) will be marked as 'failed' when + * try ro access something on the dead rank. + * + * So here, it is not necessary to find out the affected pools and fail them + * immediately when the death event is reported, instead, it will be handled + * sometime later as the DAOS check going. + */ + +group: + if (ins->ci_iv_group != NULL) + rc = crt_group_secondary_modify(ins->ci_iv_group, rank_list, rank_list, + CRT_GROUP_MOD_OP_REPLACE, version); + +out: + if (rc == 0) { + d_rank_list_free(ins->ci_ranks); + ins->ci_ranks = rank_list; + rank_list = NULL; + } + + d_rank_list_free(rank_list); + if (rc != -DER_NOTAPPLICABLE) + D_CDEBUG(rc != 0, DLOG_ERR, DLOG_INFO, + DF_ENGINE" on rank %u mark rank %u as dead with gen " + DF_X64", version %u: "DF_RC"\n", + DP_ENGINE(ins), dss_self_rank(), rank, gen, version, DP_RC(rc)); + + return rc; +} + +static int +chk_engine_act_internal(struct chk_instance *ins, uint64_t seq, uint32_t act, bool locked) +{ + struct chk_pending_rec *cpr = NULL; + int rc; + + rc = chk_pending_del(ins, seq, locked, &cpr); + if (rc == 0) { + /* The cpr will be destroyed by the waiter via chk_engine_report(). */ + D_ASSERT(cpr->cpr_busy); + + ABT_mutex_lock(cpr->cpr_mutex); + /* + * It is the control plane's duty to guarantee that the decision is a valid + * action from the report options. Otherwise, related inconsistency will be + * ignored. + */ + cpr->cpr_action = act; + ABT_cond_broadcast(cpr->cpr_cond); + ABT_mutex_unlock(cpr->cpr_mutex); + } + + D_CDEBUG(rc != 0, DLOG_ERR, DLOG_INFO, + DF_ENGINE" on rank %u takes action for seq "DF_X64" with action %u: %d\n", + DP_ENGINE(ins), dss_self_rank(), seq, act, rc); + + return rc; +} + +int +chk_engine_act(uint64_t gen, uint64_t seq, uint32_t cla, uint32_t act, uint32_t flags) +{ + struct chk_instance *ins = chk_engine; + struct chk_property *prop = &ins->ci_prop; + struct chk_pool_rec *pool = NULL; + struct chk_pool_rec *pool_tmp = NULL; + struct chk_pending_rec *cpr = NULL; + struct chk_pending_rec *cpr_tmp = NULL; + int rc; + + if (ins->ci_bk.cb_gen != gen) + D_GOTO(out, rc = -DER_NOTAPPLICABLE); + + if (unlikely(cla >= CHK_POLICY_MAX)) { + D_ERROR("Invalid DAOS inconsistency class %u\n", cla); + D_GOTO(out, rc = -DER_INVAL); + } + + /* The admin may input the wrong option, not acceptable. */ + if (unlikely(act == CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT)) { + D_ERROR("%u is not acceptable for interaction decision.\n", cla); + D_GOTO(out, rc = -DER_INVAL); + } + + rc = chk_engine_act_internal(ins, seq, act, false); + if (rc == -DER_NONEXIST || rc == -DER_NO_HDL) + rc = 0; + + if (rc != 0 || !(flags & CAF_FOR_ALL)) + goto out; + + if (likely(prop->cp_policies[cla] != act)) { + prop->cp_policies[cla] = act; + chk_prop_update(prop, NULL); + } + + /* + * Hold reference on each to guarantee that the next 'tmp' will not be unlinked from the + * pool list during current pool process. + */ + d_list_for_each_entry(pool, &ins->ci_pool_list, cpr_link) + chk_pool_get(pool); + + d_list_for_each_entry_safe(pool, pool_tmp, &ins->ci_pool_list, cpr_link) { + if (rc == 0) { + ABT_rwlock_wrlock(ins->ci_abt_lock); + d_list_for_each_entry_safe(cpr, cpr_tmp, &pool->cpr_pending_list, + cpr_pool_link) { + if (cpr->cpr_class != cla || + cpr->cpr_action != CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT) + continue; + + rc = chk_engine_act_internal(ins, cpr->cpr_seq, act, true); + if (rc != 0) + break; + } + ABT_rwlock_unlock(ins->ci_abt_lock); + } + chk_pool_put(pool); + } + +out: + D_CDEBUG(rc != 0, DLOG_ERR, DLOG_INFO, + DF_ENGINE" on rank %u takes action for seq " + DF_X64" with gen "DF_X64", class %u, action %u, flags %x: "DF_RC"\n", + DP_ENGINE(ins), dss_self_rank(), seq, gen, cla, act, flags, DP_RC(rc)); + + return rc; +} + +static int +chk_engine_cont_list_local_cb(daos_handle_t ih, vos_iter_entry_t *entry, vos_iter_type_t type, + vos_iter_param_t *param, void *data, unsigned int *acts) +{ + struct chk_cont_list_args *ccla = data; + uuid_t *new_array; + int rc = 0; + + if (ccla->ccla_idx >= ccla->ccla_cap) { + D_REALLOC_ARRAY(new_array, ccla->ccla_conts, ccla->ccla_cap, ccla->ccla_cap << 1); + if (new_array == NULL) + D_GOTO(out, rc = -DER_NOMEM); + + ccla->ccla_conts = new_array; + ccla->ccla_cap <<= 1; + } + + uuid_copy(ccla->ccla_conts[ccla->ccla_idx++], entry->ie_couuid); + +out: + return rc; +} + +/* + * Enumerate the containers for one pool target. + * Different pool targets (on the same rank) may have different containers list. + */ +static int +chk_engine_cont_list_one(void *args) +{ + struct dss_coll_stream_args *reduce = args; + struct dss_stream_arg_type *streams = reduce->csa_streams; + struct chk_cont_list_args *ccla; + struct ds_pool_child *pool; + vos_iter_param_t param = { 0 }; + struct vos_iter_anchors anchor = { 0 }; + int rc = 0; + + ccla = streams[dss_get_module_info()->dmi_tgt_id].st_arg; + pool = ds_pool_child_lookup(ccla->ccla_pool); + /* non-exist pool is not fatal. */ + if (pool != NULL) { + param.ip_hdl = pool->spc_hdl; + rc = vos_iterate(¶m, VOS_ITER_COUUID, false, &anchor, + chk_engine_cont_list_local_cb, NULL, ccla, NULL); + ds_pool_child_put(pool); + } + + return rc; +} + +static void +chk_engine_cont_list_reduce(void *a_args, void *s_args) +{ + struct chk_cont_list_args *ccla = s_args; + + chk_engine_cont_list_reduce_internal(a_args, ccla->ccla_conts, ccla->ccla_idx); +} + +static int +chk_engine_cont_list_alloc(struct dss_stream_arg_type *args, void *a_arg) +{ + struct chk_cont_list_aggregator *aggregator = a_arg; + struct chk_cont_list_args *ccla; + int rc = 0; + + D_ALLOC_PTR(ccla); + if (ccla == NULL) + D_GOTO(out, rc = -DER_NOMEM); + + ccla->ccla_cap = 2; + D_ALLOC_ARRAY(ccla->ccla_conts, ccla->ccla_cap); + if (ccla->ccla_conts == NULL) { + D_FREE(ccla); + D_GOTO(out, rc = -DER_NOMEM); + } + + uuid_copy(ccla->ccla_pool, aggregator->ccla_pool); + args->st_arg = ccla; + +out: + return rc; +} + +static void +chk_engine_cont_list_free(struct dss_stream_arg_type *args) +{ + struct chk_cont_list_args *ccla = args->st_arg; + + if (ccla != NULL) { + D_FREE(ccla->ccla_conts); + D_FREE(args->st_arg); + } +} + +int +chk_engine_cont_list(uint64_t gen, uuid_t pool_uuid, uuid_t **conts, uint32_t *count) +{ + struct chk_instance *ins = chk_engine; + struct chk_bookmark *cbk = &ins->ci_bk; + struct chk_cont_list_aggregator aggregator = { 0 }; + struct dss_coll_args coll_args = { 0 }; + struct dss_coll_ops coll_ops = { 0 }; + struct chk_cont_rec *ccr; + uuid_t *uuids; + int i = 0; + int rc = 0; + + if (cbk->cb_gen != gen) + D_GOTO(out, rc = -DER_NOTAPPLICABLE); + + rc = chk_engine_cont_list_init(pool_uuid, &aggregator); + if (rc != 0) + goto out; + + coll_args.ca_func_args = &coll_args.ca_stream_args; + coll_args.ca_aggregator = &aggregator; + + coll_ops.co_func = chk_engine_cont_list_one; + coll_ops.co_reduce = chk_engine_cont_list_reduce; + coll_ops.co_reduce_arg_alloc = chk_engine_cont_list_alloc; + coll_ops.co_reduce_arg_free = chk_engine_cont_list_free; + + rc = ds_pool_task_collective_reduce(pool_uuid, + PO_COMP_ST_NEW | PO_COMP_ST_DOWN | PO_COMP_ST_DOWNOUT, + &coll_ops, &coll_args, 0); + +out: + if (rc == 0 && aggregator.ccla_count > 0) { + D_ALLOC_ARRAY(uuids, aggregator.ccla_count); + if (uuids == NULL) { + rc = -DER_NOMEM; + *conts = NULL; + *count = 0; + } else { + d_list_for_each_entry(ccr, &aggregator.ccla_list, ccr_link) + uuid_copy(uuids[i++], ccr->ccr_uuid); + + *conts = uuids; + *count = aggregator.ccla_count; + } + } else { + *conts = NULL; + *count = 0; + } + + chk_engine_cont_list_fini(&aggregator); + + return rc; +} + +int +chk_engine_pool_start(uint64_t gen, uuid_t uuid, uint32_t phase, uint32_t flags) +{ + struct chk_instance *ins = chk_engine; + struct chk_pool_rec *cpr = NULL; + struct chk_bookmark *cbk; + struct chk_bookmark new; + char uuid_str[DAOS_UUID_STR_SIZE]; + d_iov_t riov; + d_iov_t kiov; + int rc; + + if (ins->ci_bk.cb_ins_status != CHK__CHECK_INST_STATUS__CIS_RUNNING) + D_GOTO(out, rc = -DER_SHUTDOWN); + + uuid_unparse_lower(uuid, uuid_str); + + d_iov_set(&riov, NULL, 0); + d_iov_set(&kiov, uuid, sizeof(uuid_t)); + rc = dbtree_lookup(ins->ci_pool_hdl, &kiov, &riov); + if (rc != 0 && rc != -DER_NONEXIST) + goto out; + + if (rc == -DER_NONEXIST) { + if (!(flags & CPSF_FOR_ORPHAN)) + goto out; + + /* It is for orphan pool, add the chk_pool_rec. */ + rc = ds_mgmt_pool_exist(uuid); + if (unlikely(rc == 0)) + D_GOTO(out, rc = -DER_NONEXIST); + + rc = chk_bk_fetch_pool(&new, uuid_str); + if (rc != 0 && rc != -DER_NONEXIST) + goto out; + + if (rc == -DER_NONEXIST) { + memset(&new, 0, sizeof(new)); + new.cb_magic = CHK_BK_MAGIC_POOL; + new.cb_version = DAOS_CHK_VERSION; + new.cb_gen = ins->ci_bk.cb_gen; + new.cb_pool_status = CHK__CHECK_POOL_STATUS__CPS_CHECKING; + new.cb_time.ct_start_time = time(NULL); + } + + rc = chk_pool_add_shard(ins->ci_pool_hdl, &ins->ci_pool_list, uuid, dss_self_rank(), + &new, ins, NULL, NULL, NULL, &cpr); + if (rc != 0) + goto out; + } else { + cpr = (struct chk_pool_rec *)riov.iov_buf; + } + + if (cpr->cpr_stop) + D_GOTO(out, rc = -DER_SHUTDOWN); + + /* Maybe resent one. */ + if (unlikely(cpr->cpr_started)) + D_GOTO(out, rc = -DER_ALREADY); + + if (flags & CPSF_NOT_EXPORT_PS) + cpr->cpr_not_export_ps = 1; + + cbk = &cpr->cpr_bk; + chk_pool_get(cpr); + + rc = ds_pool_start(uuid); + if (rc != 0) + D_GOTO(put, rc = (rc == -DER_NONEXIST ? 1 : rc)); + + if (cbk->cb_phase < phase) { + cbk->cb_phase = cbk->cb_phase; + /* QUEST: How to estimate the left time? */ + cbk->cb_time.ct_left_time = CHK__CHECK_SCAN_PHASE__CSP_DONE - cbk->cb_phase; + rc = chk_bk_update_pool(cbk, uuid_str); + if (rc != 0) { + ds_pool_stop(uuid); + goto put; + } + } + + cpr->cpr_started = 1; + +put: + if (rc != 0) { + if (rc > 0) { + chk_pool_stop_one(ins, uuid, CHK__CHECK_POOL_STATUS__CPS_CHECKED, + CHK__CHECK_SCAN_PHASE__CSP_DONE, NULL); + rc = 0; + } else { + chk_ins_set_fail(ins, cbk->cb_phase > phase ? cbk->cb_phase : phase); + chk_pool_stop_one(ins, uuid, CHK__CHECK_POOL_STATUS__CPS_FAILED, + CHK_INVAL_PHASE, NULL); + } + } + + chk_pool_put(cpr); + +out: + if (unlikely(rc == -DER_ALREADY)) + rc = 0; + else + D_CDEBUG(rc != 0, DLOG_ERR, DLOG_INFO, + DF_ENGINE" on rank %u start pool "DF_UUIDF" at phase %u: "DF_RC"\n", + DP_ENGINE(ins), dss_self_rank(), DP_UUID(uuid), phase, DP_RC(rc)); + + return rc; +} + +int +chk_engine_pool_mbs(uint64_t gen, uuid_t uuid, uint32_t phase, const char *label, uint64_t seq, + uint32_t flags, uint32_t mbs_nr, struct chk_pool_mbs *mbs_array, + struct rsvc_hint *hint) +{ + struct chk_instance *ins = chk_engine; + struct chk_bookmark *cbk; + struct chk_pool_rec *cpr = NULL; + struct ds_pool_svc *svc = NULL; + struct chk_pool_mbs_args *cpma = NULL; + char uuid_str[DAOS_UUID_STR_SIZE]; + d_iov_t riov; + d_iov_t kiov; + int rc; + int i; + + if (ins->ci_bk.cb_ins_status != CHK__CHECK_INST_STATUS__CIS_RUNNING) + D_GOTO(out, rc = -DER_SHUTDOWN); + + rc = ds_pool_svc_lookup_leader(uuid, &svc, hint); + if (rc != 0) + goto out; + + d_iov_set(&riov, NULL, 0); + d_iov_set(&kiov, uuid, sizeof(uuid_t)); + rc = dbtree_lookup(ins->ci_pool_hdl, &kiov, &riov); + if (rc != 0) + goto out; + + cpr = (struct chk_pool_rec *)riov.iov_buf; + cbk = &cpr->cpr_bk; + + if (cpr->cpr_stop) + D_GOTO(out, rc = -DER_SHUTDOWN); + + /* Maybe resent one. */ + if (unlikely(cpr->cpr_mbs != NULL)) + D_GOTO(out, rc = 0); + + D_ASSERT(cpr->cpr_thread == ABT_THREAD_NULL); + chk_pool_get(cpr); + + D_ALLOC_ARRAY(cpr->cpr_mbs, mbs_nr); + if (cpr->cpr_mbs == NULL) + D_GOTO(put, rc = -DER_NOMEM); + + cpr->cpr_shard_nr = mbs_nr; + for (i = 0; i < mbs_nr; i++) { + D_ALLOC_ARRAY(cpr->cpr_mbs[i].cpm_tgt_status, mbs_array[i].cpm_tgt_nr); + if (cpr->cpr_mbs[i].cpm_tgt_status == NULL) + D_GOTO(put, rc = -DER_NOMEM); + + cpr->cpr_mbs[i].cpm_rank = mbs_array[i].cpm_rank; + cpr->cpr_mbs[i].cpm_tgt_nr = mbs_array[i].cpm_tgt_nr; + memcpy(cpr->cpr_mbs[i].cpm_tgt_status, mbs_array[i].cpm_tgt_status, + sizeof(*mbs_array[i].cpm_tgt_status) * mbs_array[i].cpm_tgt_nr); + } + + rc = chk_dup_string(&cpr->cpr_label, label, label != NULL ? strlen(label) : 0); + if (rc != 0) + goto put; + + cpr->cpr_label_seq = seq; + if (flags & CMF_REPAIR_LABEL) + cpr->cpr_delay_label = 1; + + if (cbk->cb_phase < phase) { + cbk->cb_phase = phase; + /* QUEST: How to estimate the left time? */ + cbk->cb_time.ct_left_time = CHK__CHECK_SCAN_PHASE__CSP_DONE - cbk->cb_phase; + uuid_unparse_lower(cpr->cpr_uuid, uuid_str); + rc = chk_bk_update_pool(cbk, uuid_str); + if (rc != 0) + goto put; + } + + D_ALLOC_PTR(cpma); + if (cpma == NULL) + D_GOTO(put, rc = -DER_NOMEM); + + cpma->cpma_svc = svc; + cpma->cpma_cpr = cpr; + svc = NULL; + + rc = dss_ult_create(chk_engine_pool_ult, cpma, DSS_XS_SYS, 0, DSS_DEEP_STACK_SZ, + &cpr->cpr_thread); + if (rc != 0) { + svc = cpma->cpma_svc; + D_FREE(cpma); + rc = dss_abterr2der(rc); + D_ERROR(DF_ENGINE" on rank %u failed to create ULT for pool "DF_UUIDF": "DF_RC"\n", + DP_ENGINE(ins), dss_self_rank(), DP_UUID(uuid), DP_RC(rc)); + } + +put: + if (rc != 0) { + for (i = 0; i < cpr->cpr_shard_nr; i++) + D_FREE(cpr->cpr_mbs[i].cpm_tgt_status); + D_FREE(cpr->cpr_mbs); + D_FREE(cpr->cpr_label); + cpr->cpr_shard_nr = 0; + cpr->cpr_delay_label = 0; + + chk_ins_set_fail(ins, cbk->cb_phase > phase ? cbk->cb_phase : phase); + chk_pool_stop_one(ins, uuid, CHK__CHECK_POOL_STATUS__CPS_FAILED, + CHK_INVAL_PHASE, NULL); + chk_pool_put(cpr); + } +out: + if (svc != NULL) + ds_pool_svc_put_leader(svc); + + D_CDEBUG(rc != 0, DLOG_ERR, DLOG_INFO, + DF_ENGINE" on rank %u set pool mbs "DF_UUIDF" at phase %u: "DF_RC"\n", + DP_ENGINE(ins), dss_self_rank(), DP_UUID(uuid), phase, DP_RC(rc)); + + return rc; +} + +/* + * \return Positive value if interaction is interrupted, such as check stop. + * Zero on success. + * Negative value if error. + */ +static int +chk_engine_report(struct chk_report_unit *cru, uint64_t *seq, int *decision) +{ + struct chk_instance *ins = chk_engine; + struct chk_pending_rec *cpr = NULL; + struct chk_pending_rec *tmp = NULL; + struct chk_pool_rec *pool = NULL; + d_iov_t kiov; + d_iov_t riov; + int rc; + + D_ASSERT(cru->cru_pool != NULL); + + if (*seq == 0) { + +new_seq: + *seq = chk_report_seq_gen(ins); + } + + if (cru->cru_act == CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT) { + d_iov_set(&riov, NULL, 0); + d_iov_set(&kiov, cru->cru_pool, sizeof(uuid_t)); + rc = dbtree_lookup(ins->ci_pool_hdl, &kiov, &riov); + if (rc != 0) + goto log; + + pool = (struct chk_pool_rec *)riov.iov_buf; + + rc = chk_pending_add(ins, &pool->cpr_pending_list, NULL, *cru->cru_pool, *seq, + cru->cru_rank, cru->cru_cla, &cpr); + if (unlikely(rc == -DER_AGAIN)) + goto new_seq; + + if (rc != 0) + goto log; + } + + rc = chk_report_remote(ins->ci_prop.cp_leader, ins->ci_bk.cb_gen, cru->cru_cla, + cru->cru_act, cru->cru_result, cru->cru_rank, cru->cru_target, + cru->cru_pool, cru->cru_pool_label, cru->cru_cont, + cru->cru_cont_label, cru->cru_obj, cru->cru_dkey, + cru->cru_akey, cru->cru_msg, cru->cru_option_nr, cru->cru_options, + cru->cru_detail_nr, cru->cru_details, *seq); + if (unlikely(rc == -DER_AGAIN)) { + D_ASSERT(cru->cru_act == CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT); + + rc = chk_pending_del(ins, *seq, false, &tmp); + if (rc == 0) + D_ASSERT(tmp == NULL); + else if (rc != -DER_NONEXIST) + goto log; + + chk_pending_destroy(cpr); + cpr = NULL; + + goto new_seq; + } + + /* Check cpr->cpr_action for the case of "dmg check repair" by race. */ + if (rc == 0 && pool != NULL && + likely(cpr->cpr_action == CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT)) + pool->cpr_bk.cb_pool_status = CHK__CHECK_POOL_STATUS__CPS_PENDING; + +log: + D_CDEBUG(rc != 0, DLOG_ERR, DLOG_INFO, + DF_ENGINE" on rank %u report with class %u, action %u, seq " + DF_X64", handle_rc %d, report_rc %d\n", DP_ENGINE(ins), + cru->cru_rank, cru->cru_cla, cru->cru_act, *seq, cru->cru_result, rc); + + if (rc != 0 || cpr == NULL) + goto out; + + D_ASSERT(cpr->cpr_busy); + + D_INFO(DF_ENGINE" on rank %u need interaction for class %u\n", + DP_ENGINE(ins), cru->cru_rank, cru->cru_cla); + + ABT_mutex_lock(cpr->cpr_mutex); + +again: + if (cpr->cpr_action != CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT) { + *decision = cpr->cpr_action; + ABT_mutex_unlock(cpr->cpr_mutex); + goto out; + } + + if (!ins->ci_sched_running || ins->ci_sched_exiting || cpr->cpr_exiting) { + rc = 1; + ABT_mutex_unlock(cpr->cpr_mutex); + goto out; + } + + ABT_cond_wait(cpr->cpr_cond, cpr->cpr_mutex); + + goto again; + +out: + if (pool != NULL && pool->cpr_bk.cb_pool_status == CHK__CHECK_POOL_STATUS__CPS_PENDING) + pool->cpr_bk.cb_pool_status = CHK__CHECK_POOL_STATUS__CPS_CHECKING; + + if (cpr != NULL) + chk_pending_destroy(cpr); + + return rc; +} + +int +chk_engine_notify(struct chk_iv *iv) +{ + struct chk_instance *ins = chk_engine; + struct chk_bookmark *cbk = &ins->ci_bk; + struct chk_pool_rec *cpr; + int rc = 0; + + if (cbk->cb_gen != iv->ci_gen) + D_GOTO(out, rc = -DER_NOTAPPLICABLE); + + if (cbk->cb_ins_status != CHK__CHECK_INST_STATUS__CIS_RUNNING) + D_GOTO(out, rc = -DER_NOTAPPLICABLE); + + if (!uuid_is_null(iv->ci_uuid)) { + rc = chk_pool_handle_notify(ins, iv); + goto out; + } + + /* Pool service leader must specify the pool UUID when notify the pool shards. */ + if (iv->ci_from_psl) + D_GOTO(out, rc = -DER_INVAL); + + if (iv->ci_phase >= CHK__CHECK_SCAN_PHASE__CSP_POOL_MBS) { + ins->ci_orphan_done = 1; + D_INFO(DF_ENGINE" leader completed orphan pools process\n", DP_ENGINE(ins)); + } + + switch (iv->ci_ins_status) { + case CHK__CHECK_INST_STATUS__CIS_RUNNING: + if (unlikely(iv->ci_phase < cbk->cb_phase)) { + rc = -DER_NOTAPPLICABLE; + } else if (iv->ci_phase != cbk->cb_phase) { + cbk->cb_phase = iv->ci_phase; + rc = chk_bk_update_engine(cbk); + if (rc == 0) + rc = chk_pools_update_bk(ins, iv->ci_phase); + } + break; + case CHK__CHECK_INST_STATUS__CIS_FAILED: + case CHK__CHECK_INST_STATUS__CIS_IMPLICATED: + /* Leader notifies the engine to exit. */ + ins->ci_implicated = 1; + chk_stop_sched(ins); + break; + case CHK__CHECK_INST_STATUS__CIS_COMPLETED: + /* + * Usually, the check leader will not notify its COMPLETE to check engines + * unless the check leader has not notify 'ci_orphan_done' yet. Under such + * case, there should be no in-processing pools on check engines. + */ + d_list_for_each_entry(cpr, &ins->ci_pool_list, cpr_link) { + if (!cpr->cpr_done && !cpr->cpr_skip && !cpr->cpr_stop) { + D_ERROR(DF_ENGINE" there is at least one pool " + DF_UUID" in processing but leader 'COMPLETED\n", + DP_ENGINE(ins), DP_UUID(cpr->cpr_uuid)); + D_GOTO(out, rc = -DER_PROTO); + } + } + break; + default: + D_GOTO(out, rc = -DER_NOTAPPLICABLE); + } + +out: + D_CDEBUG(rc != 0 && rc != -DER_NOTAPPLICABLE, DLOG_ERR, DLOG_INFO, + DF_ENGINE" on rank %u got notification from rank %u, for pool " DF_UUIDF + ", phase %u, ins_status %u, pool_status %u, gen "DF_X64", seq "DF_X64": "DF_RC"\n", + DP_ENGINE(ins), dss_self_rank(), iv->ci_rank, DP_UUID(iv->ci_uuid), iv->ci_phase, + iv->ci_ins_status, iv->ci_pool_status, iv->ci_gen, iv->ci_seq, DP_RC(rc)); + + return (rc == 0 || rc == -DER_NOTAPPLICABLE) ? 0 : rc; +} + +void +chk_engine_rejoin(void *args) +{ + struct chk_instance *ins = chk_engine; + struct chk_property *prop = &ins->ci_prop; + struct chk_bookmark *cbk = &ins->ci_bk; + uuid_t *pools = NULL; + struct chk_iv iv = { 0 }; + struct umem_attr uma = { 0 }; + char uuid_str[DAOS_UUID_STR_SIZE]; + d_rank_t myrank = dss_self_rank(); + uint32_t pool_nr = 0; + uint32_t flags = 0; + int rc = 0; + int rc1; + bool need_join = false; + + if (cbk->cb_magic != CHK_BK_MAGIC_ENGINE) + goto out_log; + + if (cbk->cb_ins_status != CHK__CHECK_INST_STATUS__CIS_RUNNING && + cbk->cb_ins_status != CHK__CHECK_INST_STATUS__CIS_PAUSED) + goto out_log; + + /* We do NOT support leader (and its associated engine ) to rejoin former check instance. */ + if (chk_is_on_leader(cbk->cb_gen, prop->cp_leader, true)) + goto out_log; + + if (ins->ci_ranks == NULL) + goto out_log; + + D_ASSERT(ins->ci_starting == 0); + D_ASSERT(ins->ci_stopping == 0); + D_ASSERT(ins->ci_sched_running == 0); + D_ASSERT(ins->ci_iv_group == NULL); + D_ASSERT(ins->ci_iv_ns == NULL); + D_ASSERT(ins->ci_sched == ABT_THREAD_NULL); + D_ASSERT(daos_handle_is_inval(ins->ci_pool_hdl)); + D_ASSERT(d_list_empty(&ins->ci_pool_list)); + D_ASSERT(daos_handle_is_inval(ins->ci_pending_hdl)); + + ins->ci_rejoining = 1; + ins->ci_starting = 1; + ins->ci_started = 0; + ins->ci_start_flags = 0; + + need_join = true; + uma.uma_id = UMEM_CLASS_VMEM; + + rc = dbtree_create_inplace(DBTREE_CLASS_CHK_POOL, 0, CHK_BTREE_ORDER, &uma, + &ins->ci_pool_btr, &ins->ci_pool_hdl); + if (rc != 0) + goto out_tree; + + rc = dbtree_create_inplace(DBTREE_CLASS_CHK_PA, 0, CHK_BTREE_ORDER, &uma, + &ins->ci_pending_btr, &ins->ci_pending_hdl); + if (rc != 0) + goto out_tree; + + uuid_unparse_lower(cbk->cb_iv_uuid, uuid_str); + rc = crt_group_secondary_create(uuid_str, NULL, ins->ci_ranks, &ins->ci_iv_group); + if (rc != 0) + goto out_tree; + + rc = ds_iv_ns_create(dss_get_module_info()->dmi_ctx, cbk->cb_iv_uuid, ins->ci_iv_group, + &ins->ci_iv_id, &ins->ci_iv_ns); + if (rc != 0) + goto out_group; + + ds_iv_ns_update(ins->ci_iv_ns, prop->cp_leader, ins->ci_iv_ns->iv_master_term + 1); + +again: + /* Ask leader whether this engine can rejoin or not. */ + rc = chk_rejoin_remote(prop->cp_leader, cbk->cb_gen, myrank, cbk->cb_iv_uuid, &flags, + &pool_nr, &pools); + if (rc != 0) { + if ((rc == -DER_OOG || rc == -DER_GRPVER) && !ins->ci_pause) { + D_INFO(DF_ENGINE" Someone is not ready %d, let's rejoin after 1 sec\n", + DP_ENGINE(ins), rc); + dss_sleep(1000); + if (!ins->ci_pause) + goto again; + } + + goto out_iv; + } + + if (pool_nr == 0) { + need_join = false; + D_GOTO(out_iv, rc = 1); + } + + rc = chk_pools_load_list(ins, cbk->cb_gen, 0, pool_nr, pools, NULL); + if (rc != 0) + goto out_notify; + + rc = chk_engine_start_post(ins); + if (rc != 0) + goto out_stop; + + ins->ci_sched_running = 1; + + rc = dss_ult_create(chk_engine_sched, ins, DSS_XS_SYS, 0, DSS_DEEP_STACK_SZ, + &ins->ci_sched); + if (rc != 0) { + ins->ci_sched_running = 0; + goto out_stop; + } + + if (flags & CRF_ORPHAN_DONE) + ins->ci_orphan_done = 1; + + goto out_log; + +out_stop: + chk_pool_stop_all(ins, CHK__CHECK_POOL_STATUS__CPS_IMPLICATED, NULL); + if (cbk->cb_ins_status == CHK__CHECK_INST_STATUS__CIS_RUNNING) { + cbk->cb_time.ct_stop_time = time(NULL); + cbk->cb_ins_status = CHK__CHECK_INST_STATUS__CIS_FAILED; + rc1 = chk_bk_update_engine(cbk); + if (rc1 != 0) + D_WARN(DF_ENGINE" failed to update engine bookmark: "DF_RC"\n", + DP_ENGINE(ins), DP_RC(rc1)); + } +out_notify: + iv.ci_gen = cbk->cb_gen; + iv.ci_phase = cbk->cb_phase; + iv.ci_ins_status = CHK__CHECK_INST_STATUS__CIS_FAILED; + iv.ci_to_leader = 1; + + /* Notify the leader that check instance exit on the engine. */ + rc1 = chk_iv_update(ins->ci_iv_ns, &iv, CRT_IV_SHORTCUT_TO_ROOT, CRT_IV_SYNC_NONE, true); + D_CDEBUG(rc1 != 0, DLOG_ERR, DLOG_INFO, + DF_ENGINE" on rank %u notify leader for its exit, status %u: rc1 = %d\n", + DP_ENGINE(ins), myrank, cbk->cb_ins_status, rc1); +out_iv: + chk_iv_ns_cleanup(&ins->ci_iv_ns); +out_group: + if (ins->ci_iv_group != NULL) { + crt_group_secondary_destroy(ins->ci_iv_group); + ins->ci_iv_group = NULL; + } +out_tree: + chk_destroy_pending_tree(ins); + chk_destroy_pool_tree(ins); +out_log: + if (need_join) + D_CDEBUG(rc < 0, DLOG_ERR, DLOG_INFO, + DF_ENGINE" rejoin on rank %u with iv "DF_UUIDF": "DF_RC"\n", + DP_ENGINE(ins), myrank, DP_UUID(cbk->cb_iv_uuid), DP_RC(rc)); + ins->ci_rejoining = 0; + ins->ci_starting = 0; + ins->ci_inited = 1; +} + +void +chk_engine_pause(void) +{ + struct chk_instance *ins = chk_engine; + + chk_stop_sched(ins); + D_ASSERT(d_list_empty(&ins->ci_pool_list)); +} + +int +chk_engine_init(void) +{ + struct chk_traverse_pools_args ctpa = { 0 }; + struct chk_bookmark *cbk; + int rc; + + rc = chk_ins_init(&chk_engine); + if (rc != 0) + goto fini; + + chk_report_seq_init(chk_engine); + + /* + * DAOS global consistency check depends on all related engines' local + * consistency. If hit some local data corruption, then it is possible + * that local consistency is not guaranteed. Need to break and resolve + * related local inconsistency firstly. + */ + + cbk = &chk_engine->ci_bk; + rc = chk_bk_fetch_engine(cbk); + if (rc == -DER_NONEXIST) + goto prop; + + /* It may be caused by local data corruption, let's break. */ + if (rc != 0) + goto fini; + + if (cbk->cb_magic != 0 && cbk->cb_magic != CHK_BK_MAGIC_ENGINE) { + D_ERROR("Hit corrupted engine bookmark on rank %u: %u vs %u\n", + dss_self_rank(), cbk->cb_magic, CHK_BK_MAGIC_ENGINE); + D_GOTO(fini, rc = -DER_IO); + } + + if (cbk->cb_ins_status == CHK__CHECK_INST_STATUS__CIS_RUNNING) { + /* + * Leader crashed before normally exit, reset the status as 'PAUSED' + * to avoid blocking next CHK_START. + */ + cbk->cb_ins_status = CHK__CHECK_INST_STATUS__CIS_PAUSED; + cbk->cb_time.ct_stop_time = time(NULL); + rc = chk_bk_update_engine(cbk); + if (rc != 0) { + D_ERROR(DF_ENGINE" failed to reset status as 'PAUSED': "DF_RC"\n", + DP_ENGINE(chk_engine), DP_RC(rc)); + goto fini; + } + + ctpa.ctpa_gen = cbk->cb_gen; + rc = chk_traverse_pools(chk_pools_pause_cb, &ctpa); + /* + * Failed to reset pool status will not affect next check start, so it is not fatal, + * but related check query result may be confused for user. + */ + if (rc != 0) + D_WARN(DF_ENGINE" failed to reset pools status as 'PAUSED': "DF_RC"\n", + DP_ENGINE(chk_engine), DP_RC(rc)); + } + +prop: + rc = chk_prop_fetch(&chk_engine->ci_prop, &chk_engine->ci_ranks); + if (rc == -DER_NONEXIST) + rc = 0; +fini: + if (rc != 0) + chk_ins_fini(&chk_engine); + return rc; +} + +void +chk_engine_fini(void) +{ + chk_ins_fini(&chk_engine); +} diff --git a/src/chk/chk_internal.h b/src/chk/chk_internal.h new file mode 100644 index 00000000000..a11fa5518ef --- /dev/null +++ b/src/chk/chk_internal.h @@ -0,0 +1,1232 @@ +/** + * (C) Copyright 2022-2023 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ + +/** + * DAOS global consistency checker RPC Protocol Definitions + */ + +#ifndef __CHK_INTERNAL_H__ +#define __CHK_INTERNAL_H__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "chk.pb-c.h" + +/* + * RPC operation codes + * + * These are for daos_rpc::dr_opc and DAOS_RPC_OPCODE(opc, ...) rather than + * crt_req_create(..., opc, ...). See daos/rpc.h. + */ +#define DAOS_CHK_VERSION 1 + +#define CHK_PROTO_SRV_RPC_LIST \ + X(CHK_START, \ + 0, &CQF_chk_start, ds_chk_start_hdlr, &chk_start_co_ops), \ + X(CHK_STOP, \ + 0, &CQF_chk_stop, ds_chk_stop_hdlr, &chk_stop_co_ops), \ + X(CHK_QUERY, \ + 0, &CQF_chk_query, ds_chk_query_hdlr, &chk_query_co_ops), \ + X(CHK_MARK, \ + 0, &CQF_chk_mark, ds_chk_mark_hdlr, &chk_mark_co_ops), \ + X(CHK_ACT, \ + 0, &CQF_chk_act, ds_chk_act_hdlr, &chk_act_co_ops), \ + X(CHK_CONT_LIST, \ + 0, &CQF_chk_cont_list, ds_chk_cont_list_hdlr, &chk_cont_list_co_ops), \ + X(CHK_POOL_START, \ + 0, &CQF_chk_pool_start, ds_chk_pool_start_hdlr, &chk_pool_start_co_ops),\ + X(CHK_POOL_MBS, \ + 0, &CQF_chk_pool_mbs, ds_chk_pool_mbs_hdlr, NULL), \ + X(CHK_REPORT, \ + 0, &CQF_chk_report, ds_chk_report_hdlr, NULL), \ + X(CHK_REJOIN, \ + 0, &CQF_chk_rejoin, ds_chk_rejoin_hdlr, NULL) + +/* Define for RPC enum population below */ +#define X(a, b, c, d, e) a + +enum chk_rpc_opc { + CHK_PROTO_SRV_RPC_LIST, + CHK_PROTO_SRV_RPC_COUNT, +}; + +#undef X + +struct chk_pool_mbs { + d_rank_t cpm_rank; + uint32_t cpm_tgt_nr; + uint32_t *cpm_tgt_status; +}; + +/* + * CHK_START: + * From check leader to check engine to start the check instance on specified pool(s) or all pools. + */ +#define DAOS_ISEQ_CHK_START \ + ((uint64_t) (csi_gen) CRT_VAR) \ + ((uint32_t) (csi_flags) CRT_VAR) \ + ((int32_t) (csi_phase) CRT_VAR) \ + ((d_rank_t) (csi_leader_rank) CRT_VAR) \ + ((uint32_t) (csi_api_flags) CRT_VAR) \ + ((uuid_t) (csi_iv_uuid) CRT_VAR) \ + ((d_rank_t) (csi_ranks) CRT_ARRAY) \ + ((struct chk_policy) (csi_policies) CRT_ARRAY) \ + ((uuid_t) (csi_uuids) CRT_ARRAY) + +#define DAOS_OSEQ_CHK_START \ + ((int32_t) (cso_status) CRT_VAR) \ + ((uint32_t) (cso_rank_cap) CRT_VAR) \ + ((uint32_t) (cso_clue_cap) CRT_VAR) \ + ((int32_t) (cso_padding) CRT_VAR) \ + ((d_rank_t) (cso_cmp_ranks) CRT_ARRAY) \ + ((struct ds_pool_clue) (cso_clues) CRT_ARRAY) + +CRT_RPC_DECLARE(chk_start, DAOS_ISEQ_CHK_START, DAOS_OSEQ_CHK_START); + +/* + * CHK_STOP: + * From check leader to check engine to stop the check instance on specified pools(s) or all pools. + */ +#define DAOS_ISEQ_CHK_STOP \ + ((uint64_t) (csi_gen) CRT_VAR) \ + ((uuid_t) (csi_uuids) CRT_ARRAY) + +#define DAOS_OSEQ_CHK_STOP \ + ((int32_t) (cso_status) CRT_VAR) \ + ((uint32_t) (cso_flags) CRT_VAR) \ + ((uint32_t) (cso_cap) CRT_VAR) \ + ((int32_t) (cso_padding) CRT_VAR) \ + ((d_rank_t) (cso_ranks) CRT_ARRAY) + +CRT_RPC_DECLARE(chk_stop, DAOS_ISEQ_CHK_STOP, DAOS_OSEQ_CHK_STOP); + +/* + * CHK_QUERY: + * From check leader to check engine to query the check process for specified pools(s) or all pools. + */ +#define DAOS_ISEQ_CHK_QUERY \ + ((uint64_t) (cqi_gen) CRT_VAR) \ + ((uuid_t) (cqi_uuids) CRT_ARRAY) + +#define DAOS_OSEQ_CHK_QUERY \ + ((int32_t) (cqo_status) CRT_VAR) \ + ((uint32_t) (cqo_cap) CRT_VAR) \ + ((uint32_t) (cqo_ins_status) CRT_VAR) \ + ((uint32_t) (cqo_ins_phase) CRT_VAR) \ + ((uint64_t) (cqo_gen) CRT_VAR) \ + ((struct chk_query_pool_shard) (cqo_shards) CRT_ARRAY) + +CRT_RPC_DECLARE(chk_query, DAOS_ISEQ_CHK_QUERY, DAOS_OSEQ_CHK_QUERY); + +/* + * CHK_MARK: + * From check leader to check engine to mark some rank as "dead". Under check mode, if some rank + * is dead (and failed to rejoin), it will not be excluded from related pool map to avoid further + * damaging the system, instead, it will be mark as "dead" by the check instance and the check + * status on related pool(s) will be marked as "failed". + */ +#define DAOS_ISEQ_CHK_MARK \ + ((uint64_t) (cmi_gen) CRT_VAR) \ + ((d_rank_t) (cmi_rank) CRT_VAR) \ + ((uint32_t) (cmi_version) CRT_VAR) + +#define DAOS_OSEQ_CHK_MARK \ + ((int32_t) (cmo_status) CRT_VAR) \ + ((uint32_t) (cmo_padding) CRT_VAR) + +CRT_RPC_DECLARE(chk_mark, DAOS_ISEQ_CHK_MARK, DAOS_OSEQ_CHK_MARK); + +/* + * CHK_ACT: + * From check leader to check engine to execute the admin specified repair action for former + * reported inconsistency under interaction mode. + */ +#define DAOS_ISEQ_CHK_ACT \ + ((uint64_t) (cai_gen) CRT_VAR) \ + ((uint64_t) (cai_seq) CRT_VAR) \ + ((uint32_t) (cai_cla) CRT_VAR) \ + ((uint32_t) (cai_act) CRT_VAR) \ + ((uint32_t) (cai_flags) CRT_VAR) \ + ((uint32_t) (cai_padding) CRT_VAR) + +#define DAOS_OSEQ_CHK_ACT \ + ((int32_t) (cao_status) CRT_VAR) \ + ((uint32_t) (cao_padding) CRT_VAR) + +CRT_RPC_DECLARE(chk_act, DAOS_ISEQ_CHK_ACT, DAOS_OSEQ_CHK_ACT); + +/* + * CHK_CONT_LIST: + * From PS leader to check engine to get containers list. + */ +#define DAOS_ISEQ_CHK_CONT_LIST \ + ((uint64_t) (ccli_gen) CRT_VAR) \ + ((d_rank_t) (ccli_rank) CRT_VAR) \ + ((uint32_t) (ccli_padding) CRT_VAR) \ + ((uuid_t) (ccli_pool) CRT_VAR) + +#define DAOS_OSEQ_CHK_CONT_LIST \ + ((int32_t) (cclo_status) CRT_VAR) \ + ((uint32_t) (cclo_cap) CRT_VAR) \ + ((uuid_t) (cclo_conts) CRT_ARRAY) + +CRT_RPC_DECLARE(chk_cont_list, DAOS_ISEQ_CHK_CONT_LIST, DAOS_OSEQ_CHK_CONT_LIST); + +/* + * CHK_POOL_START: + * From check leader to check engine to start the pool shard. + */ +#define DAOS_ISEQ_CHK_POOL_START \ + ((uint64_t) (cpsi_gen) CRT_VAR) \ + ((uuid_t) (cpsi_pool) CRT_VAR) \ + ((uint32_t) (cpsi_phase) CRT_VAR) \ + ((uint32_t) (cpsi_flags) CRT_VAR) + +#define DAOS_OSEQ_CHK_POOL_START \ + ((int32_t) (cpso_status) CRT_VAR) \ + ((uint32_t) (cpso_rank) CRT_VAR) + +CRT_RPC_DECLARE(chk_pool_start, DAOS_ISEQ_CHK_POOL_START, DAOS_OSEQ_CHK_POOL_START); + +/* + * CHK_POOL_MBS: + * From check leader to check engine to notify the pool members. + */ +#define DAOS_ISEQ_CHK_POOL_MBS \ + ((uint64_t) (cpmi_gen) CRT_VAR) \ + ((uuid_t) (cpmi_pool) CRT_VAR) \ + ((uint32_t) (cpmi_flags) CRT_VAR) \ + ((uint32_t) (cpmi_phase) CRT_VAR) \ + ((d_string_t) (cpmi_label) CRT_VAR) \ + ((uint64_t) (cpmi_label_seq) CRT_VAR) \ + ((struct chk_pool_mbs) (cpmi_targets) CRT_ARRAY) \ + +#define DAOS_OSEQ_CHK_POOL_MBS \ + ((int32_t) (cpmo_status) CRT_VAR) \ + ((uint32_t) (cpmo_padding) CRT_VAR) \ + ((struct rsvc_hint) (cpmo_hint) CRT_VAR) + +CRT_RPC_DECLARE(chk_pool_mbs, DAOS_ISEQ_CHK_POOL_MBS, DAOS_OSEQ_CHK_POOL_MBS); + +/* + * CHK_REPORT: + * From check engine to check leader to report the inconsistency and related repair action + * and result. It can require to interact with the admin to make decision for how to handle + * the inconsistency. + */ +#define DAOS_ISEQ_CHK_REPORT \ + ((uint64_t) (cri_gen) CRT_VAR) \ + ((uint32_t) (cri_ics_class) CRT_VAR) \ + ((uint32_t) (cri_ics_action) CRT_VAR) \ + ((int32_t) (cri_ics_result) CRT_VAR) \ + ((d_rank_t) (cri_rank) CRT_VAR) \ + ((uint32_t) (cri_target) CRT_VAR) \ + ((uint32_t) (cri_padding) CRT_VAR) \ + ((uint64_t) (cri_seq) CRT_VAR) \ + ((uuid_t) (cri_pool) CRT_VAR) \ + ((d_string_t) (cri_pool_label) CRT_VAR) \ + ((uuid_t) (cri_cont) CRT_VAR) \ + ((d_string_t) (cri_cont_label) CRT_VAR) \ + ((daos_unit_oid_t) (cri_obj) CRT_RAW) \ + ((daos_key_t) (cri_dkey) CRT_VAR) \ + ((daos_key_t) (cri_akey) CRT_VAR) \ + ((d_string_t) (cri_msg) CRT_VAR) \ + ((uint32_t) (cri_options) CRT_ARRAY) \ + ((d_sg_list_t) (cri_details) CRT_ARRAY) + +#define DAOS_OSEQ_CHK_REPORT \ + ((int32_t) (cro_status) CRT_VAR) \ + ((uint32_t) (cro_padding) CRT_VAR) \ + +CRT_RPC_DECLARE(chk_report, DAOS_ISEQ_CHK_REPORT, DAOS_OSEQ_CHK_REPORT); + +/* + * CHK_REJOIN: + * From check engine to check leader to require rejoin former check instance after the engine + * restart under check mode. + */ +#define DAOS_ISEQ_CHK_REJOIN \ + ((uint64_t) (cri_gen) CRT_VAR) \ + ((d_rank_t) (cri_rank) CRT_VAR) \ + ((uint32_t) (cri_padding) CRT_VAR) \ + ((uuid_t) (cri_iv_uuid) CRT_VAR) + +#define DAOS_OSEQ_CHK_REJOIN \ + ((int32_t) (cro_status) CRT_VAR) \ + ((uint32_t) (cro_flags) CRT_VAR) \ + ((uuid_t) (cro_pools) CRT_ARRAY) + +CRT_RPC_DECLARE(chk_rejoin, DAOS_ISEQ_CHK_REJOIN, DAOS_OSEQ_CHK_REJOIN); + +/* dkey for check DB under sys_db */ +#define CHK_DB_TABLE "chk" + +/* akey for leader bookmark under CHK_DB_TABLE */ +#define CHK_BK_LEADER "leader" + +/* akey for engine bookmark under CHK_DB_TABLE */ +#define CHK_BK_ENGINE "engine" + +/* akey for check property under CHK_DB_TABLE */ +#define CHK_PROPERTY "property" + +/* akey for the list of ranks under CHK_DB_TABLE */ +#define CHK_RANKS "ranks" + +#define CHK_BK_MAGIC_LEADER 0xe6f703da +#define CHK_BK_MAGIC_ENGINE 0xe6f703db +#define CHK_BK_MAGIC_POOL 0xe6f703dc + +#define CHK_INVAL_PHASE (uint32_t)(-1) +#define CHK_LEADER_RANK (uint32_t)(-1) + +/* + * Keep the lowest 20-bits of DAOS engine rank in the check report sequence. + * If the count of DAOS engines exceeds 2 ^ 20, then different check engines + * may generate the same sequence for different check reports. Such conflict + * is not fatal for non-interaction report. As for interaction report, check + * leader will detect such report sequqnce conflict and ask related engine(s) + * to generate new sequence(s). + */ +#define CHK_REPORT_RANK_BIT 40 +#define CHK_REPORT_SEQ_MASK ((1ULL << CHK_REPORT_RANK_BIT) - 1) + +#define CHK_BTREE_ORDER 16 + +#define CHK_MSG_BUFLEN 320 + +/* + * NOTE: Please be careful when change CHK__CHECK_INCONSIST_CLASS__CIC_UNKNOWN + * to avoid hole is the struct chk_property. + */ +#define CHK_POLICY_MAX (CHK__CHECK_INCONSIST_CLASS__CIC_UNKNOWN + 1) + +struct chk_co_rpc_cb_args { + void *cb_priv; + uint64_t cb_gen; + int cb_result; + uint32_t cb_flags; + uint32_t cb_ins_status; + uint32_t cb_ins_phase; + uint32_t cb_rank; + uint32_t cb_nr; + void *cb_data; +}; + +typedef int (*chk_co_rpc_cb_t)(struct chk_co_rpc_cb_args *cb_args); + +typedef void (*chk_pool_free_data_t)(void *data); + +enum chk_start_flags { + /* Reset all check bookmarks, for leader, engines and all pools. */ + CSF_RESET_ALL = 1, + /* Reset the pool which check is not completed. */ + CSF_RESET_NONCOMP = 2, + /* Handle orphan pools. */ + CSF_ORPHAN_POOL = 4, +}; + +enum chk_stop_flags { + /* The check on some pools have been stopped. */ + CSF_POOL_STOPPED = 1, +}; + +enum chk_act_flags { + /* The action is applicable to the same kind of inconssitency. */ + CAF_FOR_ALL = 1, +}; + +enum chk_mbs_flags { + CMF_REPAIR_LABEL = 1, +}; + +enum chk_pool_start_flags { + /* The pool is not in check list, but it is reported by engine for potential orphan pool. */ + CPSF_FOR_ORPHAN = 1, + /* Do not export pool service after check done. */ + CPSF_NOT_EXPORT_PS = 2, +}; + +enum chk_rejoin_flags { + CRF_ORPHAN_DONE = 1, +}; + +/* + * Each check instance has a unique leader engine that uses key "chk/leader" under its local + * sys_db to trace the check instance. + * + * For each engine, include the leader engine, there is a system level key "chk/engine" under + * the engine's local sys_db to trace the check instance on the engine. When server (re)start + * the check module uses it to determain whether needs to rejoin the check instance. + * + * For each pool, there is a key "chk/$pool_uuid" under the engine's local sys_db to trace + * check process for the pool on related engine. + */ +struct chk_bookmark { + uint32_t cb_magic; + uint32_t cb_version; + uint64_t cb_gen; + uuid_t cb_iv_uuid; + Chk__CheckScanPhase cb_phase; + union { + Chk__CheckInstStatus cb_ins_status; + Chk__CheckPoolStatus cb_pool_status; + }; + /* + * For leader bookmark, it is the inconsistency statistics during the phases range + * [CSP_PREPARE, CSP_POOL_LIST] for the whole system. The inconsistency and related + * reparation during these phases may be on MS leader, not related with any engine. + * + * For pool bookmark, it is the inconsistency statistics during the phases range + * [CSP_POOL_MBS, CSP_CONT_CLEANUP] for the pool. The inconsistency and related + * reparation during these phases is applied to the PS leader. + */ + struct chk_statistics cb_statistics; + struct chk_time cb_time; +}; + +/* + * On each engine (including the leader), there is a key "chk/property" under its local sys_db. + * That is shared by all the pools for current check instance. + * + * DAOS check property is persistent. Unless you specify new property to overwrite the old one + * when check start, otherwise, it will reuse former property for current check instance. + * + * + * About the leader: + * + * The leader bookmark and global pools' traces are only stored on current check leader. So if + * we switch to new check leader for current check instance, we will lose those former traces. + * Then we will have to rescan the whole system from scratch when switch to new check leader. + * + * + * About some flags: + * + * - CHK__CHECK_FLAG__CF_RESET + * + * If 'reset' flag is specified together with pool list when check start, then it only makes + * the check against the specified pools to rescan from the beginning. + * + * If 'reset' flag is specified without pool list when check start, then all pools in system + * will be affected with rescanning from scratch. + * + * The 'reset' flag is not stored in the check property persistently. It is per instance, and + * only affects current check start. When you restart DAOS check next time without explicitly + * specify 'reset' flag, you will reuse former check property and resume the scan from former + * pause/stop phase. + * + * The 'reset' flag does not affect check property. If want to change check property, need to + * overwrite related property explicitly when check start. + * + * NOTE: If a pool has been 'checked' (as CHK__CHECK_SCAN_PHASE__CSP_DONE) in former instance, + * then current check instance will skip it directly unless explicitly set 'reset' flag + * or reset is triggered for other reason, such as check ranks changes. + * + * - CHK__CHECK_FLAG__CF_DRYRUN + * + * To simplify the logic, dryrun mode is per system, not per pool. Means that if dryrun flag is + * specified when check start, then all non-completed pools' check will be dryrun mode in spite + * of whether a pool is in current instance check list or not. + * + * Under dryrun mode, we do not really repair the found inconsistency, then we will lose former + * stable base if we want to resume DAOS check from former pause/stop point. So if former check + * instance ran under dryrun mode, then current check start will be handled as 'reset' for all + * pools in spite of current instance is dryrun mode or not. + * + * NOTE: Consider above behavior, although the 'dryrun' flag is stored persistently, it is per + * instance, and only affects current check instance. + * + * - CHK__CHECK_FLAG__CF_ORPHAN_POOL + * + * Handle orphan pool requires all check engines to report their known pools (shards), then + * compare the list with the MS known ones. But for most of time, the check instance may only + * drive the check against some specified pool(s). So we offer two ways to trigger the handle + * of orphan pools: + * + * 1. Anytime when the check is (re)start from the scratch for all pools, in spite of whether + * it is for 'reset' flag without pool list or other reason, such as check ranks changes. + * + * 2. Explicitly specify 'orphan' flag when check start, in spite of it is for all pools or + * just against the specified pool list. + * + * NOTE: Similar as 'reset' flag, the 'orphan' flag is also not stored persistently, instead, + * it only affects current check instance. + * + * + * About the policies: + * + * The repair policies are shared among all pools. For some specified inconsistency, its repair + * policy may be changed during the check scan via CHECK_ACT dRPC downcall with 'for_all' flag. + * + * When check start, if do not specify policies, the former policies will be reused. Currently, + * we do not support to set policy just for special inconsistency class, means that either all + * are specified (to overwrite) or none. That can be improved in the future. + * + * + * About the ranks: + * + * The changes for the ranks that take part in the check means the potential pools' membership + * changes. It will affect former non-completed pools' check. Currently, to simplify the logic, + * if current check ranks do not match former ones, then current check start will be handled as + * 'reset' for all pools. + */ +struct chk_property { + d_rank_t cp_leader; + Chk__CheckFlag cp_flags; + Chk__CheckInconsistAction cp_policies[CHK_POLICY_MAX]; + /* + * NOTE: Preserve for supporting to continue the check until the specified phase in the + * future. -1 means to check all phases. + */ + int32_t cp_phase; + /* How many ranks (ever or should) take part in the check instance. */ + uint32_t cp_rank_nr; +}; + +/* + * For each check instance, there are one leader instance and 1 ~ N engine instances. + * For each rank, there can be at most one leader instance and one engine instance. + * + * Currently, we do not support to run multiple check instances in the system (even + * if they are on different ranks sets) at the same time. If multiple pools need to + * be checked, then please either specify their uuids together (or not specify pool + * option, then check all pools by default) via single "dmg check" command, or wait + * one check instance done and then start next. + */ +struct chk_instance { + struct chk_bookmark ci_bk; + struct chk_property ci_prop; + + struct btr_root ci_rank_btr; + daos_handle_t ci_rank_hdl; + d_list_t ci_rank_list; + + struct btr_root ci_pool_btr; + daos_handle_t ci_pool_hdl; + d_list_t ci_pool_list; + + struct btr_root ci_pending_btr; + daos_handle_t ci_pending_hdl; + + d_list_t ci_pool_shutdown_list; + + /* The slowest phase for the failed pool or rank. */ + uint32_t ci_slowest_fail_phase; + + uint32_t ci_iv_id; + struct ds_iv_ns *ci_iv_ns; + crt_group_t *ci_iv_group; + + d_rank_list_t *ci_ranks; + + /* The dead ranks to be processed by the leader. Protected by ci_abt_mutex. */ + d_list_t ci_dead_ranks; + + ABT_thread ci_sched; + ABT_rwlock ci_abt_lock; + ABT_mutex ci_abt_mutex; + ABT_cond ci_abt_cond; + + /* Generator for report event, pending repair actions, and so on. */ + uint64_t ci_seq; + + uint32_t ci_is_leader:1, + ci_sched_running:1, + ci_sched_exiting:1, + ci_for_orphan:1, + ci_orphan_done:1, /* leader has processed orphan pools. */ + ci_pool_stopped:1, /* check on some pools have been stopped. */ + ci_starting:1, + ci_stopping:1, + ci_started:1, + ci_inited:1, + ci_pause:1, + ci_rejoining:1, + ci_implicated:1; + uint32_t ci_start_flags; +}; + +struct chk_iv { + uint64_t ci_gen; + uint64_t ci_seq; + uuid_t ci_uuid; + d_rank_t ci_rank; + uint32_t ci_phase; + uint32_t ci_ins_status; + uint32_t ci_pool_status; + uint32_t ci_to_leader:1, /* To check leader. */ + ci_pool_destroyed:1, /* Pool has been destroyed. */ + ci_from_psl:1; /* From pool service leader. */ +}; + +/* Check engine uses it to trace pools. Query logic uses it to organize the result. */ +struct chk_pool_shard { + /* Link into chk_pool_rec::cpr_shard_list. */ + d_list_t cps_link; + d_rank_t cps_rank; + void *cps_data; + chk_pool_free_data_t cps_free_cb; +}; + +/* Check engine uses it to trace pools. Query logic uses it to organize the result. */ +struct chk_pool_rec { + /* Link into chk_instance::ci_pool_list. */ + d_list_t cpr_link; + /* Link into chk_instance::ci_pool_shutdown_list. */ + d_list_t cpr_shutdown_link; + /* The list of chk_pool_shard. */ + d_list_t cpr_shard_list; + /* The list of chk_pending_rec. */ + d_list_t cpr_pending_list; + uint32_t cpr_shard_nr; + uint32_t cpr_started:1, + cpr_start_post:1, + cpr_stop:1, + cpr_done:1, + cpr_skip:1, + cpr_dangling:1, + cpr_for_orphan:1, + cpr_notified_exit:1, + cpr_destroyed:1, + cpr_healthy:1, + cpr_delay_label:1, + cpr_exist_on_ms:1, + cpr_not_export_ps:1, + cpr_map_refreshed:1; + int cpr_advice; + int cpr_refs; + uuid_t cpr_uuid; + ABT_thread cpr_thread; + struct ds_pool_clues cpr_clues; + struct ds_pool_clue *cpr_clue; + struct chk_bookmark cpr_bk; + struct chk_instance *cpr_ins; + struct chk_pool_mbs *cpr_mbs; + char *cpr_label; + uint64_t cpr_label_seq; + ABT_mutex cpr_mutex; + ABT_cond cpr_cond; +}; + +struct chk_pending_rec { + /* Link into chk_pool_rec::cpr_pending_list. */ + d_list_t cpr_pool_link; + /* Link into chk_rank_rec::crr_pending_list. */ + d_list_t cpr_rank_link; + uuid_t cpr_uuid; + uint64_t cpr_seq; + d_rank_t cpr_rank; + uint32_t cpr_class; + uint32_t cpr_action; + uint32_t cpr_busy:1, + cpr_exiting:1, + cpr_on_leader:1; + ABT_mutex cpr_mutex; + ABT_cond cpr_cond; +}; + +struct chk_report_unit { + uint64_t cru_gen; + uint32_t cru_cla; + uint32_t cru_act; + uint32_t cru_target; + d_rank_t cru_rank; + uint32_t cru_option_nr; + uint32_t cru_detail_nr; + uuid_t *cru_pool; + char *cru_pool_label; + uuid_t *cru_cont; + char *cru_cont_label; + daos_unit_oid_t *cru_obj; + daos_key_t *cru_dkey; + daos_key_t *cru_akey; + char *cru_msg; + uint32_t *cru_options; + d_sg_list_t *cru_details; + uint32_t cru_sugg; + uint32_t cru_result; +}; + +struct chk_traverse_pools_args { + uint64_t ctpa_gen; + struct chk_instance *ctpa_ins; + uint32_t ctpa_status; + uint32_t ctpa_phase; +}; + +struct chk_dead_rank { + /* Link into chk_instance::ci_dead_ranks. */ + d_list_t cdr_link; + d_rank_t cdr_rank; +}; + +extern struct crt_proto_format chk_proto_fmt; + +extern struct crt_corpc_ops chk_start_co_ops; +extern struct crt_corpc_ops chk_stop_co_ops; +extern struct crt_corpc_ops chk_query_co_ops; +extern struct crt_corpc_ops chk_mark_co_ops; +extern struct crt_corpc_ops chk_act_co_ops; +extern struct crt_corpc_ops chk_cont_list_co_ops; +extern struct crt_corpc_ops chk_pool_start_co_ops; + +extern btr_ops_t chk_pool_ops; +extern btr_ops_t chk_pending_ops; +extern btr_ops_t chk_rank_ops; +extern btr_ops_t chk_cont_ops; + +/* chk_common.c */ + +void chk_ranks_dump(uint32_t rank_nr, d_rank_t *ranks); + +void chk_pools_dump(d_list_t *head, int pool_nr, uuid_t pools[]); + +void chk_pool_remove_nowait(struct chk_pool_rec *cpr); + +void chk_pool_start_svc(struct chk_pool_rec *cpr, int *ret); + +void chk_pool_stop_one(struct chk_instance *ins, uuid_t uuid, int status, uint32_t phase, int *ret); + +void chk_pool_stop_all(struct chk_instance *ins, uint32_t status, int *ret); + +int chk_pools_pause_cb(struct sys_db *db, char *table, d_iov_t *key, void *args); + +int chk_pools_cleanup_cb(struct sys_db *db, char *table, d_iov_t *key, void *args); + +int chk_pool_start_one(struct chk_instance *ins, uuid_t uuid, uint64_t gen); + +int chk_pools_load_list(struct chk_instance *ins, uint64_t gen, uint32_t flags, + int pool_nr, uuid_t pools[], uint32_t *phase); + +int chk_pools_load_from_db(struct sys_db *db, char *table, d_iov_t *key, void *args); + +int chk_pools_update_bk(struct chk_instance *ins, uint32_t phase); + +int chk_pool_handle_notify(struct chk_instance *ins, struct chk_iv *iv); + +int chk_pool_add_shard(daos_handle_t hdl, d_list_t *head, uuid_t uuid, d_rank_t rank, + struct chk_bookmark *bk, struct chk_instance *ins, + uint32_t *shard_nr, void *data, chk_pool_free_data_t free_cb, + struct chk_pool_rec **cpr); + +void chk_pool_shard_cleanup(struct chk_instance *ins); + +int chk_pending_add(struct chk_instance *ins, d_list_t *pool_head, d_list_t *rank_head, uuid_t uuid, + uint64_t seq, uint32_t rank, uint32_t cla, struct chk_pending_rec **cpr); + +int chk_pending_del(struct chk_instance *ins, uint64_t seq, bool locked, + struct chk_pending_rec **cpr); + +int chk_pending_wakeup(struct chk_instance *ins, struct chk_pending_rec *cpr); + +void chk_pending_destroy(struct chk_pending_rec *cpr); + +int chk_prop_prepare(d_rank_t leader, uint32_t flags, int phase, + uint32_t policy_nr, struct chk_policy *policies, + d_rank_list_t *ranks, struct chk_property *prop); + +uint32_t chk_pool_merge_status(uint32_t status_a, uint32_t status_b); + +void chk_ins_merge_info(uint32_t *status_dst, uint32_t status_src, uint32_t *phase_dst, + uint32_t phase_src, uint64_t *gen_dst, uint64_t gen_src); + +int chk_ins_init(struct chk_instance **p_ins); + +void chk_ins_fini(struct chk_instance **p_ins); + +/* chk_engine.c */ + +int chk_engine_start(uint64_t gen, uint32_t rank_nr, d_rank_t *ranks, + uint32_t policy_nr, struct chk_policy *policies, int pool_nr, + uuid_t pools[], uint32_t api_flags, int phase, d_rank_t leader, + uint32_t flags, uuid_t iv_uuid, struct ds_pool_clues *clues); + +int chk_engine_stop(uint64_t gen, int pool_nr, uuid_t pools[], uint32_t *flags); + +int chk_engine_query(uint64_t gen, int pool_nr, uuid_t pools[], uint32_t *ins_status, + uint32_t *ins_phase, uint32_t *shard_nr, struct chk_query_pool_shard **shards, + uint64_t *l_gen); + +int chk_engine_mark_rank_dead(uint64_t gen, d_rank_t rank, uint32_t version); + +int chk_engine_act(uint64_t gen, uint64_t seq, uint32_t cla, uint32_t act, uint32_t flags); + +int chk_engine_cont_list(uint64_t gen, uuid_t pool_uuid, uuid_t **conts, uint32_t *count); + +int chk_engine_pool_start(uint64_t gen, uuid_t uuid, uint32_t phase, uint32_t flags); + +int chk_engine_pool_mbs(uint64_t gen, uuid_t uuid, uint32_t phase, const char *label, uint64_t seq, + uint32_t flags, uint32_t mbs_nr, struct chk_pool_mbs *mbs_array, + struct rsvc_hint *hint); + +int chk_engine_notify(struct chk_iv *iv); + +void chk_engine_rejoin(void *args); + +void chk_engine_pause(void); + +int chk_engine_init(void); + +void chk_engine_fini(void); + +/* chk_iv.c */ + +int chk_iv_update(void *ns, struct chk_iv *iv, uint32_t shortcut, uint32_t sync_mode, bool retry); + +int chk_iv_init(void); + +int chk_iv_fini(void); + +/* chk_leader.c */ + +bool chk_is_on_leader(uint64_t gen, d_rank_t leader, bool known_leader); + +struct ds_iv_ns *chk_leader_get_iv_ns(void); + +int chk_leader_report(struct chk_report_unit *cru, uint64_t *seq, int *decision); + +int chk_leader_notify(struct chk_iv *iv); + +int chk_leader_rejoin(uint64_t gen, d_rank_t rank, uuid_t iv_uuid, uint32_t *flags, int *pool_nr, + uuid_t **pools); + +void chk_leader_pause(void); + +int chk_leader_init(void); + +void chk_leader_fini(void); + +/* chk_rpc.c */ + +int chk_start_remote(d_rank_list_t *rank_list, uint64_t gen, uint32_t rank_nr, d_rank_t *ranks, + uint32_t policy_nr, struct chk_policy *policies, int pool_nr, + uuid_t pools[], uint32_t api_flags, int phase, d_rank_t leader, uint32_t flags, + uuid_t iv_uuid, chk_co_rpc_cb_t start_cb, void *args); + +int chk_stop_remote(d_rank_list_t *rank_list, uint64_t gen, int pool_nr, uuid_t pools[], + chk_co_rpc_cb_t stop_cb, void *args); + +int chk_query_remote(d_rank_list_t *rank_list, uint64_t gen, int pool_nr, uuid_t pools[], + chk_co_rpc_cb_t query_cb, void *args); + +int chk_mark_remote(d_rank_list_t *rank_list, uint64_t gen, d_rank_t rank, uint32_t version); + +int chk_act_remote(d_rank_list_t *rank_list, uint64_t gen, uint64_t seq, uint32_t cla, + uint32_t act, d_rank_t rank, bool for_all); + +int chk_cont_list_remote(struct ds_pool *pool, uint64_t gen, chk_co_rpc_cb_t list_cb, void *args); + +int chk_pool_start_remote(d_rank_list_t *rank_list, uint64_t gen, uuid_t uuid, uint32_t phase, + uint32_t flags); + +int chk_pool_mbs_remote(d_rank_t rank, uint32_t phase, uint64_t gen, uuid_t uuid, char *label, + uint64_t seq, uint32_t flags, uint32_t mbs_nr, + struct chk_pool_mbs *mbs_array, struct rsvc_hint *hint); + +int chk_report_remote(d_rank_t leader, uint64_t gen, uint32_t cla, uint32_t act, int result, + d_rank_t rank, uint32_t target, uuid_t *pool, char *pool_label, + uuid_t *cont, char *cont_label, daos_unit_oid_t *obj, daos_key_t *dkey, + daos_key_t *akey, char *msg, uint32_t option_nr, uint32_t *options, + uint32_t detail_nr, d_sg_list_t *details, uint64_t seq); + +int chk_rejoin_remote(d_rank_t leader, uint64_t gen, d_rank_t rank, uuid_t iv_uuid, uint32_t *flags, + uint32_t *pool_nr, uuid_t **pools); + +/* chk_updcall.c */ + +int chk_report_upcall(uint64_t gen, uint64_t seq, uint32_t cla, uint32_t act, int result, + d_rank_t rank, uint32_t target, uuid_t *pool, char *pool_label, + uuid_t *cont, char *cont_label, daos_unit_oid_t *obj, daos_key_t *dkey, + daos_key_t *akey, char *msg, uint32_t option_nr, uint32_t *options, + uint32_t detail_nr, d_sg_list_t *details); + +/* chk_vos.c */ + +int chk_bk_fetch_leader(struct chk_bookmark *cbk); + +int chk_bk_update_leader(struct chk_bookmark *cbk); + +int chk_bk_delete_leader(void); + +int chk_bk_fetch_engine(struct chk_bookmark *cbk); + +int chk_bk_update_engine(struct chk_bookmark *cbk); + +int chk_bk_delete_engine(void); + +int chk_bk_fetch_pool(struct chk_bookmark *cbk, char *uuid_str); + +int chk_bk_update_pool(struct chk_bookmark *cbk, char *uuid_str); + +int chk_bk_delete_pool(char *uuid_str); + +int chk_prop_fetch(struct chk_property *cpp, d_rank_list_t **rank_list); + +int chk_prop_update(struct chk_property *cpp, d_rank_list_t *rank_list); + +int chk_traverse_pools(sys_db_trav_cb_t cb, void *args); + +void chk_vos_init(void); + +void chk_vos_fini(void); + +static inline bool +chk_is_ins_reset(struct chk_instance *ins, uint32_t flags) +{ + return flags & CHK__CHECK_FLAG__CF_RESET || ins->ci_start_flags & CSF_RESET_ALL; +} + +static inline void +chk_ins_set_fail(struct chk_instance *ins, uint32_t phase) +{ + if (ins->ci_slowest_fail_phase == CHK_INVAL_PHASE || ins->ci_slowest_fail_phase > phase) + ins->ci_slowest_fail_phase = phase; +} + +static inline bool +chk_rank_in_list(d_rank_list_t *rlist, d_rank_t rank) +{ + int i; + bool found = false; + + /* TBD: more efficiently search for the sorted ranks list. */ + + for (i = 0; i < rlist->rl_nr; i++) { + if (rlist->rl_ranks[i] == rank) { + found = true; + break; + } + } + + return found; +} + +static inline bool +chk_remove_rank_from_list(d_rank_list_t *rlist, d_rank_t rank) +{ + int i; + bool found = false; + + /* TBD: more efficiently search for the sorted ranks list. */ + + for (i = 0; i < rlist->rl_nr; i++) { + if (rlist->rl_ranks[i] == rank) { + found = true; + rlist->rl_nr--; + /* The leader rank will always be in the rank list. */ + D_ASSERT(rlist->rl_nr > 0); + + if (i < rlist->rl_nr) + memmove(&rlist->rl_ranks[i], &rlist->rl_ranks[i + 1], + sizeof(rlist->rl_ranks[i]) * (rlist->rl_nr - i)); + break; + } + } + + return found; +} + +static inline void +chk_destroy_tree(daos_handle_t *toh, struct btr_root *root) +{ + int rc; + + if (daos_handle_is_valid(*toh)) { + rc = dbtree_destroy(*toh, NULL); + if (rc != 0) + D_ERROR("Failed to destroy the tree: "DF_RC"\n", DP_RC(rc)); + + /* + * Reset the tree even if failed to destroy, that may cause DRAM leak, + * but it will not prevent next check instance running. + */ + *toh = DAOS_HDL_INVAL; + memset(root, 0, sizeof(*root)); + } +} + +static inline void +chk_destroy_pending_tree(struct chk_instance *ins) +{ + ABT_rwlock_wrlock(ins->ci_abt_lock); + chk_destroy_tree(&ins->ci_pending_hdl, &ins->ci_pending_btr); + ABT_rwlock_unlock(ins->ci_abt_lock); +} + +static inline void +chk_destroy_pool_tree(struct chk_instance *ins) +{ + chk_destroy_tree(&ins->ci_pool_hdl, &ins->ci_pool_btr); +} + +static inline void +chk_query_free(struct chk_query_pool_shard *shards, uint32_t shard_nr) +{ + int i; + + if (shards != NULL) { + for (i = 0; i < shard_nr; i++) + D_FREE(shards[i].cqps_targets); + + D_FREE(shards); + } +} + +static inline void +chk_iv_ns_cleanup(struct ds_iv_ns **ns) +{ + if (*ns != NULL) { + if ((*ns)->iv_refcount == 1) + ds_iv_ns_cleanup(*ns); + ds_iv_ns_put(*ns); + *ns = NULL; + } +} + +static inline void +chk_pool_get(struct chk_pool_rec *cpr) +{ + cpr->cpr_refs++; + + D_DEBUG(DB_TRACE, "Get ref on pool rec %p for "DF_UUIDF", ref %d\n", + cpr, DP_UUID(cpr->cpr_uuid), cpr->cpr_refs); +} + +static inline void +chk_pool_put(struct chk_pool_rec *cpr) +{ + struct chk_pool_shard *cps; + int i; + + /* NOTE: Before being destroyed, keep it in the list. */ + D_ASSERT(!d_list_empty(&cpr->cpr_link)); + + D_DEBUG(DB_TRACE, "Pet ref on pool rec %p for "DF_UUIDF", ref %d\n", + cpr, DP_UUID(cpr->cpr_uuid), cpr->cpr_refs); + + if (--(cpr->cpr_refs) == 0) { + d_list_del(&cpr->cpr_link); + D_ASSERT(cpr->cpr_thread == ABT_THREAD_NULL); + D_ASSERT(d_list_empty(&cpr->cpr_pending_list)); + D_ASSERT(d_list_empty(&cpr->cpr_shutdown_link)); + + while ((cps = d_list_pop_entry(&cpr->cpr_shard_list, struct chk_pool_shard, + cps_link)) != NULL) { + if (cps->cps_free_cb != NULL) + cps->cps_free_cb(cps->cps_data); + else + D_FREE(cps->cps_data); + D_FREE(cps); + } + D_FREE(cpr->cpr_clues.pcs_array); + + if (cpr->cpr_mutex != ABT_MUTEX_NULL) + ABT_mutex_free(&cpr->cpr_mutex); + if (cpr->cpr_cond != ABT_COND_NULL) + ABT_cond_free(&cpr->cpr_cond); + + if (!cpr->cpr_ins->ci_is_leader && cpr->cpr_mbs != NULL) { + for (i = 0; i < cpr->cpr_shard_nr; i++) + D_FREE(cpr->cpr_mbs[i].cpm_tgt_status); + } + + D_DEBUG(DB_TRACE, "Destroy pool rec %p for "DF_UUIDF"\n", + cpr, DP_UUID(cpr->cpr_uuid)); + + D_FREE(cpr->cpr_mbs); + D_FREE(cpr->cpr_label); + D_FREE(cpr); + } +} + +static inline void +chk_pool_shutdown(struct chk_pool_rec *cpr, bool locked) +{ + d_iov_t psid; + int rc; + + D_ASSERT(cpr->cpr_refs > 0); + + if (!locked) + ABT_mutex_lock(cpr->cpr_mutex); + + d_iov_set(&psid, cpr->cpr_uuid, sizeof(uuid_t)); + rc = ds_rsvc_stop(DS_RSVC_CLASS_POOL, &psid, RDB_NIL_TERM, false); + D_DEBUG(DB_MD, "Shutdown PS for "DF_UUIDF": "DF_RC"\n", + DP_UUID(cpr->cpr_uuid), DP_RC(rc)); + cpr->cpr_start_post = 0; + + ds_pool_stop(cpr->cpr_uuid); + cpr->cpr_started = 0; + + D_DEBUG(DB_MD, "Stop pool for "DF_UUIDF" with locked %s\n", + DP_UUID(cpr->cpr_uuid), locked ? "true" : "false"); + + if (!locked) + ABT_mutex_unlock(cpr->cpr_mutex); +} + +static inline bool +chk_pool_in_zombie(struct chk_pool_rec *cpr) +{ + struct chk_pool_shard *cps; + struct ds_pool_clue *clue; + bool found = false; + + d_list_for_each_entry(cps, &cpr->cpr_shard_list, cps_link) { + clue = cps->cps_data; + if (clue->pc_dir == DS_POOL_DIR_ZOMBIE) { + found = true; + break; + } + } + + return found; +} + +static inline int +chk_pools_add_from_dir(uuid_t uuid, void *args) +{ + struct chk_traverse_pools_args *ctpa = args; + + return chk_pool_start_one(ctpa->ctpa_ins, uuid, ctpa->ctpa_gen); +} + +static inline uint32_t +chk_pools_find_slowest(struct chk_instance *ins, int *done) +{ + struct chk_pool_rec *cpr; + uint32_t phase = CHK__CHECK_SCAN_PHASE__CSP_DONE; + + if (ins->ci_pool_stopped) + *done = -1; + else if (!ins->ci_is_leader && !ins->ci_orphan_done) + /* + * For check engine, if the check leader has not processed orphan pools, + * then we do not know whether there will be more pools to be scanned or + * not. So we cannot set @done under such case. + * + * For check leader, it needs to notify the check engines after orphan + * pools being processed (CHK__CHECK_SCAN_PHASE__CSP_POOL_MBS). If the + * check leader failed to notify the check engines, related schedulers + * on those check engines will be blocked until the checker is stopped + * explicitly. + */ + *done = 0; + else + *done = 1; + + d_list_for_each_entry(cpr, &ins->ci_pool_list, cpr_link) { + if (cpr->cpr_done || cpr->cpr_stop) + continue; + + *done = 0; + + if (cpr->cpr_bk.cb_phase < phase) + phase = cpr->cpr_bk.cb_phase; + } + + return phase; +} + +static inline int +chk_dup_string(char **tgt, const char *src, size_t len) +{ + int rc = 0; + + if (src == NULL) { + *tgt = NULL; + } else { + D_STRNDUP(*tgt, src, len); + if (*tgt == NULL) + rc = -DER_NOMEM; + } + + return rc; +} + +static inline void +chk_stop_sched(struct chk_instance *ins) +{ + uint64_t gen = ins->ci_bk.cb_gen; + + ins->ci_pause = 1; + ABT_mutex_lock(ins->ci_abt_mutex); + if (ins->ci_sched_running && !ins->ci_sched_exiting) { + D_ASSERT(ins->ci_sched != ABT_THREAD_NULL); + + D_INFO("Stopping %s instance on rank %u with gen "DF_U64"\n", + ins->ci_is_leader ? "leader" : "engine", dss_self_rank(), gen); + + ins->ci_sched_exiting = 1; + ABT_cond_broadcast(ins->ci_abt_cond); + ABT_mutex_unlock(ins->ci_abt_mutex); + ABT_thread_free(&ins->ci_sched); + } else { + ABT_mutex_unlock(ins->ci_abt_mutex); + /* Check ci_bk.cb_gen for the case of others restarted checker during my wait. */ + while ((ins->ci_sched_running || ins->ci_rejoining) && gen == ins->ci_bk.cb_gen) + dss_sleep(300); + } +} + +static inline int +chk_ins_can_start(struct chk_instance *ins) +{ + if (unlikely(!ins->ci_inited)) + return -DER_AGAIN; + + if (ins->ci_starting) + return -DER_INPROGRESS; + + if (ins->ci_stopping || ins->ci_sched_exiting) + return -DER_BUSY; + + if (ins->ci_sched_running) + return -DER_ALREADY; + + return 0; +} + +static inline void +chk_report_seq_init(struct chk_instance *ins) +{ + uint64_t myrank; + + if (ins->ci_is_leader) + myrank = CHK_LEADER_RANK; + else + myrank = dss_self_rank(); + + ins->ci_seq = (myrank << CHK_REPORT_RANK_BIT) | (d_hlc_get() >> (64 - CHK_REPORT_RANK_BIT)); + + /* Clear the highest bit. */ + ins->ci_seq &= ~(1ULL << 63); +} + +static inline uint64_t +chk_report_seq_gen(struct chk_instance *ins) +{ + uint64_t seq = ins->ci_seq & CHK_REPORT_SEQ_MASK; + + seq++; + seq &= CHK_REPORT_SEQ_MASK; + ins->ci_seq = (ins->ci_seq & ~CHK_REPORT_SEQ_MASK) | seq; + + return ins->ci_seq; +} + +#endif /* __CHK_INTERNAL_H__ */ diff --git a/src/chk/chk_iv.c b/src/chk/chk_iv.c new file mode 100644 index 00000000000..eabef148a23 --- /dev/null +++ b/src/chk/chk_iv.c @@ -0,0 +1,234 @@ +/** + * (C) Copyright 2022-2023 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ + +#define D_LOGFAC DD_FAC(chk) + +#include +#include +#include +#include +#include +#include + +#include "chk_internal.h" + +static int +chk_iv_alloc_internal(d_sg_list_t *sgl) +{ + int rc = 0; + + rc = d_sgl_init(sgl, 1); + if (rc != 0) + goto out; + + D_ALLOC(sgl->sg_iovs[0].iov_buf, sizeof(struct chk_iv)); + if (sgl->sg_iovs[0].iov_buf == NULL) { + d_sgl_fini(sgl, true); + D_GOTO(out, rc = -DER_NOMEM); + } + + sgl->sg_iovs[0].iov_buf_len = sizeof(struct chk_iv); + sgl->sg_iovs[0].iov_len = sizeof(struct chk_iv); + +out: + return rc; +} + +static int +chk_iv_ent_init(struct ds_iv_key *iv_key, void *data, struct ds_iv_entry *entry) +{ + int rc; + + rc = chk_iv_alloc_internal(&entry->iv_value); + if (rc == 0) { + entry->iv_key.class_id = iv_key->class_id; + entry->iv_key.rank = iv_key->rank; + } + + return rc; +} + +static int +chk_iv_ent_get(struct ds_iv_entry *entry, void **priv) +{ + return 0; +} + +static void +chk_iv_ent_put(struct ds_iv_entry *entry, void *priv) +{ +} + +static int +chk_iv_ent_destroy(d_sg_list_t *sgl) +{ + d_sgl_fini(sgl, true); + + return 0; +} + +static int +chk_iv_ent_fetch(struct ds_iv_entry *entry, struct ds_iv_key *key, d_sg_list_t *dst, void **priv) +{ + D_ASSERT(0); + + return 0; +} + +/* Update the chk pool svc lists and status from engine to leader. */ +static int +chk_iv_ent_update(struct ds_iv_entry *entry, struct ds_iv_key *key, + d_sg_list_t *src, void **priv) +{ + struct chk_iv *dst_iv = entry->iv_value.sg_iovs[0].iov_buf; + struct chk_iv *src_iv = src->sg_iovs[0].iov_buf; + int rc; + + if (src_iv->ci_rank == dss_self_rank()) { + if (src_iv->ci_to_leader) { + /* + * The case of the check engine sending IV message to the check leader + * on the same rank has already been handled via chk_iv_update(). Then + * only need to handle the case that the check leader resides on other + * rank (trigger RPC to the check leader - the IV parent via returning + * -DER_IVCB_FORWARD. + */ + D_ASSERTF(!chk_is_on_leader(src_iv->ci_gen, CHK_LEADER_RANK, false), + "Invalid IV forward path for gen "DF_X64"/"DF_X64", rank %u, " + "phase %u, status %d/%d, from_psl %s\n", + src_iv->ci_gen, src_iv->ci_seq, src_iv->ci_rank, src_iv->ci_phase, + src_iv->ci_ins_status, src_iv->ci_pool_status, + src_iv->ci_from_psl ? "yes" : "no"); + rc = -DER_IVCB_FORWARD; + } else { + /* + * If it is message to engine, then it may be triggered by check leader, + * but it also may be from the pool service leader to other pool shards. + * Return zero that will trigger IV_SYNC to other check engines. + * + * NOTE: Currently, IV refresh from root node is always direct to leaves, + * it does not need some internal nodes to forward. So here, if it + * is not for PS leader notification, then it must be triggered by + * the check leader. + */ + if (!src_iv->ci_from_psl) + D_ASSERTF(chk_is_on_leader(src_iv->ci_gen, CHK_LEADER_RANK, false), + "Invalid IV forward path for gen "DF_X64"/"DF_X64 + ", rank %u, phase %u, status %d/%d\n", src_iv->ci_gen, + src_iv->ci_seq, src_iv->ci_rank, src_iv->ci_phase, + src_iv->ci_ins_status, src_iv->ci_pool_status); + rc = 0; + } + } else { + /* + * We got an IV SYNC (refresh) RPC from some engine. But because the engine + * always set CRT_IV_SHORTCUT_TO_ROOT for sync, then this should not happen. + */ + D_ASSERTF(src_iv->ci_to_leader, + "Got invalid IV SYNC with gen "DF_X64"/"DF_X64", rank %u, phase %u, " + "status %d/%d, to_leader no, from_psl %s\n", + src_iv->ci_gen, src_iv->ci_seq, src_iv->ci_rank, src_iv->ci_phase, + src_iv->ci_ins_status, src_iv->ci_pool_status, + src_iv->ci_from_psl ? "yes" : "no"); + *dst_iv = *src_iv; + rc = chk_leader_notify(dst_iv); + } + + return rc; +} + +/* Refresh the chk status from leader to engines. */ +static int +chk_iv_ent_refresh(struct ds_iv_entry *entry, struct ds_iv_key *key, + d_sg_list_t *src, int ref_rc, void **priv) +{ + struct chk_iv *dst_iv = entry->iv_value.sg_iovs[0].iov_buf; + struct chk_iv *src_iv = src->sg_iovs[0].iov_buf; + int rc = 0; + + /* + * For the notification from pool service leader, skip the local pool shard that will + * be handled by the pool service leader (including the @cpr status and pool service). + * + * For the notification from the check leader to check engines, do not skip the local + * check engine. + */ + if (!src_iv->ci_to_leader && (src_iv->ci_rank != dss_self_rank() || !src_iv->ci_from_psl)) { + *dst_iv = *src_iv; + rc = chk_engine_notify(dst_iv); + } + + return rc; +} + +static int +chk_iv_value_alloc(struct ds_iv_entry *entry, struct ds_iv_key *key, d_sg_list_t *sgl) +{ + return chk_iv_alloc_internal(sgl); +} + +struct ds_iv_class_ops chk_iv_ops = { + .ivc_ent_init = chk_iv_ent_init, + .ivc_ent_get = chk_iv_ent_get, + .ivc_ent_put = chk_iv_ent_put, + .ivc_ent_destroy = chk_iv_ent_destroy, + .ivc_ent_fetch = chk_iv_ent_fetch, + .ivc_ent_update = chk_iv_ent_update, + .ivc_ent_refresh = chk_iv_ent_refresh, + .ivc_value_alloc = chk_iv_value_alloc, +}; + +int +chk_iv_update(void *ns, struct chk_iv *iv, uint32_t shortcut, uint32_t sync_mode, bool retry) +{ + d_sg_list_t sgl; + d_iov_t iov; + struct ds_iv_key key; + int rc; + + iv->ci_rank = dss_self_rank(); + iv->ci_seq = d_hlc_get(); + + if (chk_is_on_leader(iv->ci_gen, CHK_LEADER_RANK, false) && iv->ci_to_leader) { + /* + * It is the check engine sends IV message to the check leader on + * the same rank. Then directly notify the check leader without RPC. + */ + rc = chk_leader_notify(iv); + } else { + iov.iov_buf = iv; + iov.iov_len = sizeof(*iv); + iov.iov_buf_len = sizeof(*iv); + sgl.sg_nr = 1; + sgl.sg_nr_out = 0; + sgl.sg_iovs = &iov; + + memset(&key, 0, sizeof(key)); + key.class_id = IV_CHK; + rc = ds_iv_update(ns, &key, &sgl, shortcut, sync_mode, 0, retry); + } + + D_CDEBUG(rc != 0, DLOG_ERR, DLOG_INFO, + "CHK iv "DF_X64"/"DF_X64" on rank %u, phase %u, ins_status %u, " + "pool_status %u, to_leader %s, from_psl %s, destroyed %s: rc = %d\n", + iv->ci_gen, iv->ci_seq, iv->ci_rank, iv->ci_phase, iv->ci_ins_status, + iv->ci_pool_status, iv->ci_to_leader ? "yes" : "no", + iv->ci_from_psl ? "yes" : "no", iv->ci_pool_destroyed ? "yes" : "no", rc); + + return rc; +} + +int +chk_iv_init(void) +{ + return ds_iv_class_register(IV_CHK, &iv_cache_ops, &chk_iv_ops); +} + +int +chk_iv_fini(void) +{ + return ds_iv_class_unregister(IV_CHK); +} diff --git a/src/chk/chk_leader.c b/src/chk/chk_leader.c new file mode 100644 index 00000000000..c9e91752ed1 --- /dev/null +++ b/src/chk/chk_leader.c @@ -0,0 +1,3916 @@ +/** + * (C) Copyright 2022-2023 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ + +#define D_LOGFAC DD_FAC(chk) + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "chk.pb-c.h" +#include "chk_internal.h" + +#define DF_LEADER "Check leader (gen: "DF_X64")" +#define DP_LEADER(ins) (ins)->ci_bk.cb_gen + +static struct chk_instance *chk_leader; + +struct chk_query_args { + struct chk_instance *cqa_ins; + struct btr_root cqa_btr; + daos_handle_t cqa_hdl; + d_list_t cqa_list; + uint32_t cqa_count; + uint32_t cqa_ins_status; + uint32_t cqa_ins_phase; + uint64_t cqa_gen; +}; + +struct chk_rank_rec { + /* Link into chk_instance::ci_rank_list. */ + d_list_t crr_link; + /* The list of chk_pending_rec. */ + d_list_t crr_pending_list; + d_rank_t crr_rank; + uint32_t crr_phase; + struct chk_instance *crr_ins; +}; + +struct chk_rank_bundle { + d_rank_t crb_rank; + uint32_t crb_phase; + struct chk_instance *crb_ins; +}; + +static int +chk_rank_hkey_size(void) +{ + return sizeof(d_rank_t); +} + +static void +chk_rank_hkey_gen(struct btr_instance *tins, d_iov_t *key_iov, void *hkey) +{ + D_ASSERT(key_iov->iov_len == sizeof(d_rank_t)); + + memcpy(hkey, key_iov->iov_buf, key_iov->iov_len); +} + +static int +chk_rank_alloc(struct btr_instance *tins, d_iov_t *key_iov, d_iov_t *val_iov, + struct btr_record *rec, d_iov_t *val_out) +{ + struct chk_rank_bundle *crb = val_iov->iov_buf; + struct chk_rank_rec *crr; + int rc = 0; + + D_ASSERT(crb != NULL); + + D_ALLOC_PTR(crr); + if (crr == NULL) + D_GOTO(out, rc = -DER_NOMEM); + + D_INIT_LIST_HEAD(&crr->crr_pending_list); + crr->crr_rank = crb->crb_rank; + crr->crr_phase = crb->crb_phase; + crr->crr_ins = crb->crb_ins; + + rec->rec_off = umem_ptr2off(&tins->ti_umm, crr); + d_list_add_tail(&crr->crr_link, &crb->crb_ins->ci_rank_list); + +out: + return rc; +} + +static int +chk_rank_free(struct btr_instance *tins, struct btr_record *rec, void *args) +{ + d_iov_t *val_iov = args; + struct chk_rank_rec *crr; + + crr = (struct chk_rank_rec *)umem_off2ptr(&tins->ti_umm, rec->rec_off); + rec->rec_off = UMOFF_NULL; + d_list_del_init(&crr->crr_link); + + if (val_iov != NULL) { + d_iov_set(val_iov, crr, sizeof(*crr)); + } else { + /* + * This only happens when destroy the rank tree. At that time, + * the pending records tree has already been destroyed. + */ + D_ASSERT(d_list_empty(&crr->crr_pending_list)); + D_FREE(crr); + } + + return 0; +} + +static int +chk_rank_fetch(struct btr_instance *tins, struct btr_record *rec, + d_iov_t *key_iov, d_iov_t *val_iov) +{ + struct chk_rank_rec *crr; + + D_ASSERT(val_iov != NULL); + + crr = umem_off2ptr(&tins->ti_umm, rec->rec_off); + d_iov_set(val_iov, crr, sizeof(*crr)); + + return 0; +} + +static int +chk_rank_update(struct btr_instance *tins, struct btr_record *rec, + d_iov_t *key, d_iov_t *val, d_iov_t *val_out) +{ + struct chk_rank_bundle *crb = val->iov_buf; + struct chk_rank_rec *crr; + + crr = (struct chk_rank_rec *)umem_off2ptr(&tins->ti_umm, rec->rec_off); + crr->crr_phase = crb->crb_phase; + + return 0; +} + +btr_ops_t chk_rank_ops = { + .to_hkey_size = chk_rank_hkey_size, + .to_hkey_gen = chk_rank_hkey_gen, + .to_rec_alloc = chk_rank_alloc, + .to_rec_free = chk_rank_free, + .to_rec_fetch = chk_rank_fetch, + .to_rec_update = chk_rank_update, +}; + +bool +chk_is_on_leader(uint64_t gen, d_rank_t leader, bool known_leader) +{ + D_ASSERTF(gen != 0, "Invalid gen "DF_X64"\n", gen); + + if (!known_leader) + leader = chk_leader->ci_prop.cp_leader; + + return chk_leader->ci_bk.cb_gen == gen && leader == dss_self_rank(); +} + +struct ds_iv_ns * +chk_leader_get_iv_ns(void) +{ + struct chk_instance *ins = chk_leader; + struct ds_iv_ns *ns = ins->ci_iv_ns; + + if (ns != NULL) + ds_iv_ns_get(ns); + + return ns; +} + +static int +chk_rank_del(struct chk_instance *ins, d_rank_t rank) +{ + struct chk_rank_rec *crr; + struct chk_pending_rec *cpr; + struct chk_pending_rec *tmp; + d_iov_t riov; + d_iov_t kiov; + int rc; + int rc1; + + d_iov_set(&riov, NULL, 0); + d_iov_set(&kiov, &rank, sizeof(rank)); + rc = dbtree_delete(ins->ci_rank_hdl, BTR_PROBE_EQ, &kiov, &riov); + if (rc != 0) + D_GOTO(out, rc = ((rc == -DER_NONEXIST || rc == -DER_NO_HDL) ? 0 : rc)); + + crr = (struct chk_rank_rec *)riov.iov_buf; + if (d_list_empty(&crr->crr_pending_list)) + goto out; + + /* Cleanup all pending records belong to this rank. */ + ABT_rwlock_wrlock(ins->ci_abt_lock); + d_list_for_each_entry_safe(cpr, tmp, &crr->crr_pending_list, cpr_rank_link) { + rc1 = chk_pending_wakeup(ins, cpr); + if (rc1 != 0 && rc == 0) + rc = rc1; + } + ABT_rwlock_unlock(ins->ci_abt_lock); + +out: + return rc; +} + +static inline void +chk_leader_destroy_trees(struct chk_instance *ins) +{ + /* + * Because the pending reocrd is attached to some rank record, then destroy + * the pending records tree before destroying the rank records tree. + */ + chk_destroy_pending_tree(ins); + chk_destroy_tree(&ins->ci_rank_hdl, &ins->ci_rank_btr); + chk_destroy_pool_tree(ins); +} + +static void +chk_leader_exit(struct chk_instance *ins, uint32_t ins_phase, uint32_t ins_status, + uint32_t pool_status, bool bcast) +{ + struct chk_bookmark *cbk = &ins->ci_bk; + struct chk_iv iv = { 0 }; + int rc = 0; + + ins->ci_sched_exiting = 1; + + D_ASSERT(d_list_empty(&ins->ci_pool_shutdown_list)); + + chk_pool_stop_all(ins, pool_status, NULL); + + if ((bcast && ins_status == CHK__CHECK_INST_STATUS__CIS_FAILED) || + ins_status == CHK__CHECK_INST_STATUS__CIS_IMPLICATED || + unlikely(ins_status == CHK__CHECK_INST_STATUS__CIS_COMPLETED && !ins->ci_orphan_done)) { + iv.ci_gen = cbk->cb_gen; + iv.ci_phase = ins_phase != CHK_INVAL_PHASE ? ins_phase : cbk->cb_phase; + iv.ci_ins_status = ins_status; + + /* Synchronously notify the engines that the check leader exit. */ + rc = chk_iv_update(ins->ci_iv_ns, &iv, CRT_IV_SHORTCUT_NONE, + CRT_IV_SYNC_EAGER, true); + D_CDEBUG(rc != 0, DLOG_ERR, DLOG_INFO, + DF_LEADER" notify the engines its exit, status %u: rc = %d\n", + DP_LEADER(ins), ins_status, rc); + } + + chk_leader_destroy_trees(ins); + + if (cbk->cb_ins_status == CHK__CHECK_INST_STATUS__CIS_RUNNING) { + cbk->cb_ins_status = ins_status; + if (ins_phase != CHK_INVAL_PHASE) + cbk->cb_phase = ins_phase; + cbk->cb_time.ct_stop_time = time(NULL); + rc = chk_bk_update_leader(cbk); + if (rc != 0) + D_ERROR(DF_LEADER" exit with status %u: "DF_RC"\n", + DP_LEADER(ins), ins_status, DP_RC(rc)); + } + + ins->ci_sched_exiting = 0; +} + +static void +chk_leader_post_repair(struct chk_instance *ins, struct chk_pool_rec *cpr, + int *result, bool update, bool notify) +{ + struct chk_bookmark *cbk = &cpr->cpr_bk; + char uuid_str[DAOS_UUID_STR_SIZE]; + struct chk_iv iv = { 0 }; + int rc; + + D_ASSERT(cpr != NULL); + + if (unlikely(*result > 0)) + *result = 0; + + if (*result != 0) { + chk_ins_set_fail(ins, cpr->cpr_bk.cb_phase); + if (ins->ci_prop.cp_flags & CHK__CHECK_FLAG__CF_FAILOUT) + cpr->cpr_skip = 1; + } + + if (cpr->cpr_skip || cpr->cpr_destroyed) { + if (*result != 0) { + cbk->cb_pool_status = CHK__CHECK_POOL_STATUS__CPS_FAILED; + } else if (cpr->cpr_destroyed) { + /* Since the pool is destroyed, then mark its phase as DONE. */ + cbk->cb_phase = CHK__CHECK_SCAN_PHASE__CSP_DONE; + cbk->cb_pool_status = CHK__CHECK_POOL_STATUS__CPS_CHECKED; + } else { + cbk->cb_pool_status = CHK__CHECK_POOL_STATUS__CPS_IMPLICATED; + } + cbk->cb_time.ct_stop_time = time(NULL); + uuid_unparse_lower(cpr->cpr_uuid, uuid_str); + rc = chk_bk_update_pool(cbk, uuid_str); + if (rc != 0) + D_WARN("Failed to update pool (" DF_UUID ") bookmark after repair: %d\n", + DP_UUID(cpr->cpr_uuid), rc); + } + + /* + * If the operation failed and 'failout' is set, then do nothing here. + * chk_leader_exit will handle all the IV and bookmark related things. + */ + if (*result == 0 || !(ins->ci_prop.cp_flags & CHK__CHECK_FLAG__CF_FAILOUT)) { + if (notify) { + iv.ci_gen = cbk->cb_gen; + uuid_copy(iv.ci_uuid, cpr->cpr_uuid); + iv.ci_ins_status = ins->ci_bk.cb_ins_status; + iv.ci_phase = cbk->cb_phase; + iv.ci_pool_status = cbk->cb_pool_status; + if (cpr->cpr_destroyed) + iv.ci_pool_destroyed = 1; + + /* Synchronously notify the engines that check on the pool got failure. */ + rc = chk_iv_update(ins->ci_iv_ns, &iv, CRT_IV_SHORTCUT_NONE, + CRT_IV_SYNC_EAGER, true); + D_CDEBUG(rc != 0, DLOG_ERR, DLOG_INFO, + DF_LEADER" notify the engines that the check for pool " + DF_UUIDF" is done with status %u: rc = %d\n", + DP_LEADER(ins), DP_UUID(cpr->cpr_uuid), iv.ci_pool_status, rc); + if (rc == 0) + cpr->cpr_notified_exit = 1; + } + + *result = 0; + } + + if (update) { + rc = chk_bk_update_leader(&ins->ci_bk); + if (rc != 0) + D_WARN("Cannot update leader bookmark after repair: "DF_RC"\n", DP_RC(rc)); + } +} + +static d_rank_list_t * +chk_leader_cpr2ranklist(struct chk_pool_rec *cpr, bool svc) +{ + struct chk_pool_shard *cps; + struct ds_pool_clue *clue; + d_rank_list_t *ranks; + int i = 0; + + ranks = d_rank_list_alloc(cpr->cpr_shard_nr); + if (ranks != NULL) { + d_list_for_each_entry(cps, &cpr->cpr_shard_list, cps_link) { + if (svc) { + clue = cps->cps_data; + if (clue == NULL || clue->pc_rc <= 0 || clue->pc_svc_clue == NULL) + continue; + } + ranks->rl_ranks[i++] = cps->cps_rank; + } + + /* There is at least one valid rank. */ + D_ASSERT(i > 0); + + /* Reset the rl_nr according to the valid ranks. */ + ranks->rl_nr = i; + } + + return ranks; +} + +static int +chk_leader_destroy_pool(struct chk_pool_rec *cpr, uint64_t seq, bool dereg) +{ + d_rank_list_t *ranks = NULL; + int rc = 0; + + /* + * Firstly, deregister from MS. If it is successful but we failed to destroy + * related pool target(s) in subsequent steps, then the pool becomes orphan. + * It may cause some space leak, but will not cause correctness issue. That + * will be handled when run DAOS check next time. + */ + if (dereg) { + rc = ds_chk_deregpool_upcall(seq, cpr->cpr_uuid); + if (rc != 0 && rc != -DER_NONEXIST) + goto out; + } + + ranks = chk_leader_cpr2ranklist(cpr, false); + if (ranks == NULL) + D_GOTO(out, rc = -DER_NOMEM); + + rc = ds_mgmt_tgt_pool_destroy_ranks(cpr->cpr_uuid, ranks); + if (rc == -DER_NONEXIST) + rc = 0; + if (rc == 0) + cpr->cpr_destroyed = 1; + d_rank_list_free(ranks); + +out: + return rc; +} + +static void +chk_leader_fail_pool(struct chk_pool_rec *cpr, int result) +{ + struct chk_instance *ins = cpr->cpr_ins; + struct chk_bookmark *cbk = &ins->ci_bk; + struct chk_report_unit cru = { 0 }; + Chk__CheckInconsistClass cla; + Chk__CheckInconsistAction act; + uint64_t seq; + int rc; + + cla = CHK__CHECK_INCONSIST_CLASS__CIC_UNKNOWN; + act = CHK__CHECK_INCONSIST_ACTION__CIA_IGNORE; + seq = chk_report_seq_gen(ins); + cbk->cb_statistics.cs_ignored++; + cpr->cpr_skip = 1; + + cru.cru_gen = cbk->cb_gen; + cru.cru_cla = cla; + cru.cru_act = act; + cru.cru_rank = dss_self_rank(); + cru.cru_pool = (uuid_t *)&cpr->cpr_uuid; + cru.cru_pool_label = cpr->cpr_label; + cru.cru_msg = "Some engine failed to report information for pool.\n"; + cru.cru_result = result; + + rc = chk_leader_report(&cru, &seq, NULL); + + D_WARN(DF_LEADER" some engine failed to report information for pool " + DF_UUIDF", action %u, seq "DF_X64", remote_rc %d, report_rc %d\n", + DP_LEADER(ins), DP_UUID(cpr->cpr_uuid), act, seq, result, rc); +} + +/* + * NOTE: Initialize and construct clues_out from cpr. The caller is responsible for freeing + * clues->pcs_array with D_FREE, but the borrowed clues->pcs_array->pc_svc_clue must be kept. + */ +static int +chk_leader_build_pool_clues(struct chk_pool_rec *cpr) +{ + struct chk_instance *ins = cpr->cpr_ins; + struct chk_pool_shard *cps; + struct ds_pool_clue *clue; + struct ds_pool_clues clues; + int rc = 0; + bool update = false; + + clues.pcs_cap = 4; + clues.pcs_len = 0; + + D_ALLOC_ARRAY(clues.pcs_array, clues.pcs_cap); + if (clues.pcs_array == NULL) + D_GOTO(out, rc = -DER_NOMEM); + + d_list_for_each_entry(cps, &cpr->cpr_shard_list, cps_link) { + clue = cps->cps_data; + + /* Related engine failed to report the pool shard(s), have to skip it. */ + if (clue == NULL || clue->pc_tgt_nr < 0) { + if (clue == NULL) + rc = -DER_NOMEM; + else + rc = clue->pc_rc; + chk_leader_fail_pool(cpr, rc); + + D_GOTO(out, update = true); + } + + /* Related engine failed to report PS because of PS shutdown trouble, skip it. */ + if (clue->pc_rc == -DER_BUSY) { + chk_leader_fail_pool(cpr, rc); + + D_GOTO(out, update = true); + } + + if (clue->pc_rc <= 0 || clue->pc_svc_clue == NULL) + continue; + + if (clues.pcs_len == clues.pcs_cap) { + D_REALLOC_ARRAY(clue, clues.pcs_array, clues.pcs_cap, clues.pcs_cap << 1); + if (clue == NULL) { + D_FREE(clues.pcs_array); + D_GOTO(out, rc = -DER_NOMEM); + } + + clues.pcs_array = clue; + clues.pcs_cap <<= 1; + clue = cps->cps_data; + } + + memcpy(&clues.pcs_array[clues.pcs_len++], clue, sizeof(*clue)); + } + + memcpy(&cpr->cpr_clues, &clues, sizeof(cpr->cpr_clues)); + +out: + if (rc != 0) { + D_ERROR(DF_LEADER" failed to build pool service clues for "DF_UUIDF": "DF_RC"\n", + DP_LEADER(ins), DP_UUID(cpr->cpr_uuid), DP_RC(rc)); + /* + * We do not know whether the pool is inconsistency or not. But since we cannot + * parse the pool clues, then have to skip it. Notify the check engines. + */ + cpr->cpr_skip = 1; + chk_leader_post_repair(ins, cpr, &rc, update, true); + } + + return rc; +} + +/* Only keep the chosen PS replica, destroy all others. */ +static int +chk_leader_reset_pool_svc(struct chk_pool_rec *cpr) +{ + struct ds_pool_clues *clues = &cpr->cpr_clues; + struct chk_instance *ins = cpr->cpr_ins; + struct chk_bookmark *cbk = &ins->ci_bk; + d_rank_t *ranks; + d_rank_list_t rank_list; + d_iov_t psid; + int chosen = cpr->cpr_advice; + int i; + int j; + int rc; + + D_ASSERT(chosen >= 0 && clues->pcs_len > chosen); + + /* If the chosen one is the unique PS replica, then do nothing. */ + if (clues->pcs_len == 1) + D_GOTO(out, rc = 0); + + D_ALLOC_ARRAY(ranks, clues->pcs_len - 1); + if (ranks == NULL) + D_GOTO(out, rc = -DER_NOMEM); + + /* Build a list of all ranks except for the chosen one. */ + for (i = 0, j = 0; i < clues->pcs_len; i++) { + if (i != chosen) + ranks[j++] = clues->pcs_array[i].pc_rank; + } + + rank_list.rl_ranks = ranks; + rank_list.rl_nr = j; + + d_iov_set(&psid, cpr->cpr_uuid, sizeof(uuid_t)); + rc = ds_rsvc_dist_stop(DS_RSVC_CLASS_POOL, &psid, &rank_list, NULL /* excluded */, + RDB_NIL_TERM, true /* destroy */); + D_FREE(ranks); + +out: + if (rc != 0) { + D_ERROR(DF_LEADER" failed to destroy other pool service replicas for "DF_UUIDF + ": "DF_RC"\n", DP_LEADER(ins), DP_UUID(cpr->cpr_uuid), DP_RC(rc)); + + cbk->cb_statistics.cs_failed++; + cpr->cpr_skip = 1; + } + + return rc; +} + +static int +chk_leader_dangling_pool(struct chk_pool_rec *cpr) +{ + struct chk_instance *ins = cpr->cpr_ins; + struct chk_property *prop = &ins->ci_prop; + struct chk_bookmark *cbk = &ins->ci_bk; + struct chk_report_unit cru = { 0 }; + char *strs[2]; + d_iov_t iovs[2]; + d_sg_list_t sgl; + d_sg_list_t *details = NULL; + Chk__CheckInconsistClass cla; + Chk__CheckInconsistAction act; + uint64_t seq = 0; + uint32_t options[2]; + uint32_t option_nr = 0; + uint32_t detail_nr = 0; + int decision = -1; + int result = 0; + int rc = 0; + + cla = CHK__CHECK_INCONSIST_CLASS__CIC_POOL_NONEXIST_ON_ENGINE; + act = prop->cp_policies[cla]; + cbk->cb_statistics.cs_total++; + + switch (act) { + case CHK__CHECK_INCONSIST_ACTION__CIA_DEFAULT: + /* + * Default action is to de-register the dangling pool from MS. + * + * Fall through. + */ + case CHK__CHECK_INCONSIST_ACTION__CIA_DISCARD: + /* Fall through. */ + case CHK__CHECK_INCONSIST_ACTION__CIA_TRUST_PS: + act = CHK__CHECK_INCONSIST_ACTION__CIA_DISCARD; + seq = chk_report_seq_gen(ins); + if (prop->cp_flags & CHK__CHECK_FLAG__CF_DRYRUN) { + cbk->cb_statistics.cs_repaired++; + } else { + result = ds_chk_deregpool_upcall(seq, cpr->cpr_uuid); + if (result != 0) + cbk->cb_statistics.cs_failed++; + else + cbk->cb_statistics.cs_repaired++; + } + break; + case CHK__CHECK_INCONSIST_ACTION__CIA_IGNORE: + /* Report the inconsistency without repair. */ + cbk->cb_statistics.cs_ignored++; + break; + default: + /* + * If the specified action is not applicable to the inconsistency, + * then switch to interaction mode for the decision from admin. + * + * Fall through. + */ + case CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT: + if (prop->cp_flags & CHK__CHECK_FLAG__CF_AUTO) { + /* Ignore the inconsistency if admin does not want interaction. */ + act = CHK__CHECK_INCONSIST_ACTION__CIA_IGNORE; + cbk->cb_statistics.cs_ignored++; + } else { + act = CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT; + + options[0] = CHK__CHECK_INCONSIST_ACTION__CIA_DISCARD; + options[1] = CHK__CHECK_INCONSIST_ACTION__CIA_IGNORE; + option_nr = 2; + + strs[0] = "Discard the dangling pool entry from MS [suggested]."; + strs[1] = "Keep the dangling pool entry on MS, repair nothing."; + + d_iov_set(&iovs[0], strs[0], strlen(strs[0])); + d_iov_set(&iovs[1], strs[1], strlen(strs[1])); + + sgl.sg_nr = 2; + sgl.sg_nr_out = 0; + sgl.sg_iovs = iovs; + + details = &sgl; + detail_nr = 1; + } + break; + } + +report: + cru.cru_gen = cbk->cb_gen; + cru.cru_cla = cla; + cru.cru_act = option_nr != 0 ? CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT : act; + cru.cru_rank = dss_self_rank(); + cru.cru_option_nr = option_nr; + cru.cru_detail_nr = detail_nr; + cru.cru_pool = (uuid_t *)&cpr->cpr_uuid; + cru.cru_pool_label = cpr->cpr_label; + cru.cru_msg = "Check leader detects dangling pool.\n"; + cru.cru_options = options; + cru.cru_details = details; + cru.cru_result = result; + + rc = chk_leader_report(&cru, &seq, &decision); + + D_CDEBUG(result != 0 || rc < 0, DLOG_ERR, DLOG_INFO, + DF_LEADER" detects dangling pool "DF_UUIDF", action %u (%s), seq " + DF_X64", handle_rc %d, report_rc %d, decision %d\n", + DP_LEADER(ins), DP_UUID(cpr->cpr_uuid), + act, option_nr ? "need interact" : "no interact", seq, result, rc, decision); + + if (rc < 0 && option_nr > 0) { + cbk->cb_statistics.cs_failed++; + result = rc; + } + + if (rc > 0 || result != 0 || option_nr == 0) + goto out; + + option_nr = 0; + detail_nr = 0; + + switch (decision) { + default: + D_ERROR(DF_LEADER" got invalid decision %d for dangling pool " + DF_UUIDF" with seq "DF_X64". Ignore the inconsistency.\n", + DP_LEADER(ins), decision, DP_UUID(cpr->cpr_uuid), seq); + /* + * Invalid option, ignore the inconsistency. + * + * Fall through. + */ + case CHK__CHECK_INCONSIST_ACTION__CIA_IGNORE: + act = CHK__CHECK_INCONSIST_ACTION__CIA_IGNORE; + cbk->cb_statistics.cs_ignored++; + break; + case CHK__CHECK_INCONSIST_ACTION__CIA_DISCARD: + act = CHK__CHECK_INCONSIST_ACTION__CIA_DISCARD; + if (prop->cp_flags & CHK__CHECK_FLAG__CF_DRYRUN) { + cbk->cb_statistics.cs_repaired++; + } else { + result = ds_chk_deregpool_upcall(seq, cpr->cpr_uuid); + if (result != 0) + cbk->cb_statistics.cs_failed++; + else + cbk->cb_statistics.cs_repaired++; + } + break; + } + + goto report; + +out: + chk_leader_post_repair(ins, cpr, &result, rc <= 0, false); + + return result; +} + +static int +chk_leader_orphan_pool(struct chk_pool_rec *cpr) +{ + struct chk_instance *ins = cpr->cpr_ins; + struct chk_property *prop = &ins->ci_prop; + struct chk_bookmark *cbk = &ins->ci_bk; + struct ds_pool_clue *clue = cpr->cpr_clue; + char *strs[3]; + d_iov_t iovs[3]; + d_sg_list_t sgl; + d_sg_list_t *details = NULL; + struct chk_report_unit cru = { 0 }; + Chk__CheckInconsistClass cla; + Chk__CheckInconsistAction act; + uint64_t seq = 0; + uint32_t options[3]; + uint32_t option_nr = 0; + uint32_t detail_nr = 0; + int decision = -1; + int result = 0; + int rc = 0; + + D_ASSERT(cpr->cpr_advice >= 0); + + cla = CHK__CHECK_INCONSIST_CLASS__CIC_POOL_NONEXIST_ON_MS; + act = prop->cp_policies[cla]; + cbk->cb_statistics.cs_total++; + + /* For orphan pool, do not export pool service until being registered to MS successfully. */ + cpr->cpr_not_export_ps = 1; + + switch (act) { + case CHK__CHECK_INCONSIST_ACTION__CIA_DEFAULT: + /* + * If the pool service still can start, then the default action is to register + * the orphan pool to MS; otherwise, it is suggested to destroy the orphan pool. + * + * Fall through. + */ + case CHK__CHECK_INCONSIST_ACTION__CIA_READD: + /* Fall through. */ + case CHK__CHECK_INCONSIST_ACTION__CIA_TRUST_PS: + /* + * If some pool shard is in zombie directory, then it is quite possible that the + * pool was in destroying before the corruption. It is suggested to continue the + * destroying of the orphan pool. + */ + if (chk_pool_in_zombie(cpr)) + goto interact; + + act = CHK__CHECK_INCONSIST_ACTION__CIA_READD; + seq = chk_report_seq_gen(ins); + if (prop->cp_flags & CHK__CHECK_FLAG__CF_DRYRUN) { + cbk->cb_statistics.cs_repaired++; + cpr->cpr_exist_on_ms = 1; + } else { + result = ds_chk_regpool_upcall(seq, cpr->cpr_uuid, clue->pc_label, + clue->pc_svc_clue->psc_db_clue.bcl_replicas); + if (result != 0) { + cbk->cb_statistics.cs_failed++; + } else { + cbk->cb_statistics.cs_repaired++; + cpr->cpr_exist_on_ms = 1; + cpr->cpr_not_export_ps = 0; + } + } + break; + case CHK__CHECK_INCONSIST_ACTION__CIA_DISCARD: + /* Fall through. */ + case CHK__CHECK_INCONSIST_ACTION__CIA_TRUST_MS: + act = CHK__CHECK_INCONSIST_ACTION__CIA_DISCARD; + seq = chk_report_seq_gen(ins); + if (prop->cp_flags & CHK__CHECK_FLAG__CF_DRYRUN) { + cbk->cb_statistics.cs_repaired++; + } else { + result = chk_leader_destroy_pool(cpr, seq, false); + if (result != 0) + cbk->cb_statistics.cs_failed++; + else + cbk->cb_statistics.cs_repaired++; + } + /* + * If want to destroy the orphan pool, then skip subsequent check in spite of + * whether it is destroyed successfully or not. + */ + cpr->cpr_skip = 1; + break; + case CHK__CHECK_INCONSIST_ACTION__CIA_IGNORE: + /* Report the inconsistency without repair. */ + cbk->cb_statistics.cs_ignored++; + break; + default: + /* + * If the specified action is not applicable to the inconsistency, + * then switch to interaction mode for the decision from admin. + * + * Fall through. + */ + case CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT: + +interact: + if (prop->cp_flags & CHK__CHECK_FLAG__CF_AUTO) { + /* Ignore the inconsistency if admin does not want interaction. */ + act = CHK__CHECK_INCONSIST_ACTION__CIA_IGNORE; + cbk->cb_statistics.cs_ignored++; + } else { + act = CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT; + + options[0] = CHK__CHECK_INCONSIST_ACTION__CIA_READD; + options[1] = CHK__CHECK_INCONSIST_ACTION__CIA_DISCARD; + options[2] = CHK__CHECK_INCONSIST_ACTION__CIA_IGNORE; + option_nr = 3; + + strs[0] = "Re-add the orphan pool back to MS [suggested]."; + strs[1] = "Destroy the orphan pool to release space."; + strs[2] = "Keep the orphan pool entry on engines, repair nothing."; + + d_iov_set(&iovs[0], strs[0], strlen(strs[0])); + d_iov_set(&iovs[1], strs[1], strlen(strs[1])); + d_iov_set(&iovs[2], strs[2], strlen(strs[2])); + + sgl.sg_nr = 3; + sgl.sg_nr_out = 0; + sgl.sg_iovs = iovs; + + details = &sgl; + detail_nr = 1; + } + break; + } + +report: + cru.cru_gen = cbk->cb_gen; + cru.cru_cla = cla; + cru.cru_act = option_nr != 0 ? CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT : act; + cru.cru_rank = dss_self_rank(); + cru.cru_option_nr = option_nr; + cru.cru_detail_nr = detail_nr; + cru.cru_pool = (uuid_t *)&cpr->cpr_uuid; + cru.cru_pool_label = clue->pc_label; + cru.cru_msg = "Check leader detects orphan pool.\n"; + cru.cru_options = options; + cru.cru_details = details; + cru.cru_result = result; + + rc = chk_leader_report(&cru, &seq, &decision); + + D_CDEBUG(result != 0 || rc < 0, DLOG_ERR, DLOG_INFO, + DF_LEADER" detects orphan pool "DF_UUIDF", action %u (%s), seq " + DF_X64", advice %d, handle_rc %d, report_rc %d, decision %d\n", DP_LEADER(ins), + DP_UUID(cpr->cpr_uuid), act, option_nr ? "need interact" : "no interact", + seq, cpr->cpr_advice, result, rc, decision); + + if (rc < 0 && option_nr > 0) { + cbk->cb_statistics.cs_failed++; + result = rc; + } + + if (rc > 0 || result != 0 || option_nr == 0) + goto out; + + option_nr = 0; + detail_nr = 0; + + switch (decision) { + default: + +ignore: + D_ERROR(DF_LEADER" got invalid decision %d for orphan pool " + DF_UUIDF" with seq "DF_X64". Ignore the inconsistency.\n", + DP_LEADER(ins), decision, DP_UUID(cpr->cpr_uuid), seq); + /* + * Invalid option, ignore the inconsistency. + * + * Fall through. + */ + case CHK__CHECK_INCONSIST_ACTION__CIA_IGNORE: + act = CHK__CHECK_INCONSIST_ACTION__CIA_IGNORE; + cbk->cb_statistics.cs_ignored++; + break; + case CHK__CHECK_INCONSIST_ACTION__CIA_DISCARD: + act = CHK__CHECK_INCONSIST_ACTION__CIA_DISCARD; + if (prop->cp_flags & CHK__CHECK_FLAG__CF_DRYRUN) { + cbk->cb_statistics.cs_repaired++; + } else { + result = chk_leader_destroy_pool(cpr, seq, false); + if (result != 0) + cbk->cb_statistics.cs_failed++; + else + cbk->cb_statistics.cs_repaired++; + } + /* + * If want to destroy the orphan pool, then skip subsequent check in spite of + * whether it is destroyed successfully or not. + */ + cpr->cpr_skip = 1; + break; + case CHK__CHECK_INCONSIST_ACTION__CIA_READD: + /* NOTE: currently, we do not support to register the in-destroying pool to MS. */ + if (chk_pool_in_zombie(cpr)) + goto ignore; + + act = CHK__CHECK_INCONSIST_ACTION__CIA_READD; + if (prop->cp_flags & CHK__CHECK_FLAG__CF_DRYRUN) { + cbk->cb_statistics.cs_repaired++; + cpr->cpr_exist_on_ms = 1; + } else { + result = ds_chk_regpool_upcall(seq, cpr->cpr_uuid, clue->pc_label, + clue->pc_svc_clue->psc_db_clue.bcl_replicas); + if (result != 0) { + cbk->cb_statistics.cs_failed++; + } else { + cbk->cb_statistics.cs_repaired++; + cpr->cpr_exist_on_ms = 1; + cpr->cpr_not_export_ps = 0; + } + } + break; + } + + goto report; + +out: + /* + * If the orphan pool is ignored (in spite of because it is required or failed + * to fix related inconsistency), then notify check engines to remove related + * pool record and bookmark. + */ + chk_leader_post_repair(ins, cpr, &result, rc <= 0, cpr->cpr_skip ? true : false); + + return result; +} + +static int +chk_leader_no_quorum_pool(struct chk_pool_rec *cpr) +{ + struct chk_instance *ins = cpr->cpr_ins; + struct chk_property *prop = &ins->ci_prop; + struct chk_bookmark *cbk = &ins->ci_bk; + struct ds_pool_clue *clue; + char *strs[3]; + char suggested[CHK_MSG_BUFLEN] = { 0 }; + d_iov_t iovs[3]; + d_sg_list_t sgl; + d_sg_list_t *details = NULL; + struct chk_report_unit cru = { 0 }; + Chk__CheckInconsistClass cla; + Chk__CheckInconsistAction act; + uint64_t seq = 0; + uint32_t options[3]; + uint32_t option_nr = 0; + uint32_t detail_nr = 0; + int decision = -1; + int result = 0; + int rc = 0; + + cla = CHK__CHECK_INCONSIST_CLASS__CIC_POOL_LESS_SVC_WITHOUT_QUORUM; + act = prop->cp_policies[cla]; + cbk->cb_statistics.cs_total++; + + if (cpr->cpr_advice < 0) { + switch (act) { + case CHK__CHECK_INCONSIST_ACTION__CIA_DEFAULT: + /* + * Destroy the corrupted pool by default. + * + * Fall through. + */ + case CHK__CHECK_INCONSIST_ACTION__CIA_DISCARD: + act = CHK__CHECK_INCONSIST_ACTION__CIA_DISCARD; + seq = chk_report_seq_gen(ins); + if (prop->cp_flags & CHK__CHECK_FLAG__CF_DRYRUN) { + cbk->cb_statistics.cs_repaired++; + } else { + result = chk_leader_destroy_pool(cpr, seq, true); + if (result != 0) + cbk->cb_statistics.cs_failed++; + else + cbk->cb_statistics.cs_repaired++; + } + /* + * If want to destroy the pool, then skip subsequent check in spite of + * whether it is destroyed successfully or not. + */ + cpr->cpr_skip = 1; + break; + case CHK__CHECK_INCONSIST_ACTION__CIA_IGNORE: + /* Report the inconsistency without repair. */ + cbk->cb_statistics.cs_ignored++; + /* If ignore the corrupted pool, then skip subsequent check. */ + cpr->cpr_skip = 1; + break; + default: + /* + * If the specified action is not applicable to the inconsistency, + * then switch to interaction mode for the decision from admin. + * + * Fall through. + */ + case CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT: + if (prop->cp_flags & CHK__CHECK_FLAG__CF_AUTO) { + /* Ignore the inconsistency if admin does not want interaction. */ + act = CHK__CHECK_INCONSIST_ACTION__CIA_IGNORE; + cbk->cb_statistics.cs_ignored++; + cpr->cpr_skip = 1; + break; + } + + act = CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT; + + options[0] = CHK__CHECK_INCONSIST_ACTION__CIA_DISCARD; + options[1] = CHK__CHECK_INCONSIST_ACTION__CIA_IGNORE; + option_nr = 2; + + strs[0] = "Destroy the corrupted pool from related engines [suggested]."; + strs[1] = "Keep the corrupted pool on related engines, repair nothing."; + + d_iov_set(&iovs[0], strs[0], strlen(strs[0])); + d_iov_set(&iovs[1], strs[1], strlen(strs[1])); + + sgl.sg_nr = 2; + sgl.sg_nr_out = 0; + sgl.sg_iovs = iovs; + + details = &sgl; + detail_nr = 1; + break; + } + } else { + switch (act) { + case CHK__CHECK_INCONSIST_ACTION__CIA_DEFAULT: + /* + * If we can start some PS under DICTATE mode, then do it by default. + * + * Fall through. + */ + case CHK__CHECK_INCONSIST_ACTION__CIA_TRUST_PS: + act = CHK__CHECK_INCONSIST_ACTION__CIA_TRUST_PS; + seq = chk_report_seq_gen(ins); + if (prop->cp_flags & CHK__CHECK_FLAG__CF_DRYRUN) { + cbk->cb_statistics.cs_repaired++; + /* + * Under dryrun mode, we cannot start the PS with DICTATE + * mode, then have to skip it. + */ + cpr->cpr_skip = 1; + goto report; + } + + result = chk_leader_reset_pool_svc(cpr); + if (result != 0 || cpr->cpr_exist_on_ms) + goto report; + + clue = cpr->cpr_clue; + result = ds_chk_regpool_upcall(seq, cpr->cpr_uuid, cpr->cpr_label, + clue->pc_svc_clue->psc_db_clue.bcl_replicas); + if (result != 0) { + cbk->cb_statistics.cs_failed++; + /* Skip the pool if failed to register to MS. */ + cpr->cpr_skip = 1; + } + /* + * NOTE: For result == 0 case, it still cannot be regarded as repaired. + * We need to start the PS under DICTATE mode in subsequent step. + */ + break; + case CHK__CHECK_INCONSIST_ACTION__CIA_DISCARD: + seq = chk_report_seq_gen(ins); + if (prop->cp_flags & CHK__CHECK_FLAG__CF_DRYRUN) { + cbk->cb_statistics.cs_repaired++; + } else { + result = chk_leader_destroy_pool(cpr, seq, true); + if (result != 0) + cbk->cb_statistics.cs_failed++; + else + cbk->cb_statistics.cs_repaired++; + } + /* + * If want to destroy the pool, then skip subsequent check in spite of + * whether it is destroyed successfully or not. + */ + cpr->cpr_skip = 1; + break; + case CHK__CHECK_INCONSIST_ACTION__CIA_IGNORE: + /* Report the inconsistency without repair. */ + cbk->cb_statistics.cs_ignored++; + /* If ignore the corrupted pool, then skip subsequent check. */ + cpr->cpr_skip = 1; + break; + default: + /* + * If the specified action is not applicable to the inconsistency, + * then switch to interaction mode for the decision from admin. + * + * Fall through. + */ + case CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT: + if (prop->cp_flags & CHK__CHECK_FLAG__CF_AUTO) { + /* Ignore the inconsistency if admin does not want interaction. */ + act = CHK__CHECK_INCONSIST_ACTION__CIA_IGNORE; + cbk->cb_statistics.cs_ignored++; + cpr->cpr_skip = 1; + break; + } + + act = CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT; + + options[0] = CHK__CHECK_INCONSIST_ACTION__CIA_TRUST_PS; + options[1] = CHK__CHECK_INCONSIST_ACTION__CIA_DISCARD; + options[2] = CHK__CHECK_INCONSIST_ACTION__CIA_IGNORE; + option_nr = 3; + + clue = cpr->cpr_clue; + snprintf(suggested, CHK_MSG_BUFLEN - 1, + "Start pool service under DICTATE mode from rank %d [suggested].", + clue->pc_rank); + strs[0] = suggested; + strs[1] = "Destroy the corrupted pool from related engines."; + strs[2] = "Keep the corrupted pool on related engines, repair nothing."; + + d_iov_set(&iovs[0], strs[0], strlen(strs[0])); + d_iov_set(&iovs[1], strs[1], strlen(strs[1])); + d_iov_set(&iovs[2], strs[2], strlen(strs[2])); + + sgl.sg_nr = 3; + sgl.sg_nr_out = 0; + sgl.sg_iovs = iovs; + + details = &sgl; + detail_nr = 1; + break; + } + } + +report: + cru.cru_gen = cbk->cb_gen; + cru.cru_cla = cla; + cru.cru_act = option_nr != 0 ? CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT : act; + cru.cru_rank = dss_self_rank(); + cru.cru_option_nr = option_nr; + cru.cru_detail_nr = detail_nr; + cru.cru_pool = (uuid_t *)&cpr->cpr_uuid; + cru.cru_pool_label = cpr->cpr_label; + cru.cru_msg = "Check leader detects corrupted pool without quorum.\n"; + cru.cru_options = options; + cru.cru_details = details; + cru.cru_result = result; + + rc = chk_leader_report(&cru, &seq, &decision); + + D_CDEBUG(result != 0 || rc < 0, DLOG_ERR, DLOG_INFO, + DF_LEADER" detects corrupted pool "DF_UUIDF", action %u (%s), seq " + DF_X64", advice %d, handle_rc %d, report_rc %d, decision %d\n", DP_LEADER(ins), + DP_UUID(cpr->cpr_uuid), act, option_nr ? "need interact" : "no interact", + seq, cpr->cpr_advice, result, rc, decision); + + if (rc < 0 && option_nr > 0) { + cbk->cb_statistics.cs_failed++; + /* Skip the corrupted pool if failed to interact with admin for further action. */ + cpr->cpr_skip = 1; + result = rc; + } + + if (rc > 0 || result != 0 || option_nr == 0) + goto out; + + option_nr = 0; + detail_nr = 0; + + switch (decision) { + default: + +ignore: + D_ERROR(DF_LEADER" got invalid decision %d for corrupted pool " + DF_UUIDF" with seq "DF_X64". Ignore the inconsistency.\n", + DP_LEADER(ins), decision, DP_UUID(cpr->cpr_uuid), seq); + /* + * Invalid option, ignore the inconsistency. + * + * Fall through. + */ + case CHK__CHECK_INCONSIST_ACTION__CIA_IGNORE: + act = CHK__CHECK_INCONSIST_ACTION__CIA_IGNORE; + cbk->cb_statistics.cs_ignored++; + /* If ignore the corrupted pool, then skip subsequent check. */ + cpr->cpr_skip = 1; + break; + case CHK__CHECK_INCONSIST_ACTION__CIA_DISCARD: + act = CHK__CHECK_INCONSIST_ACTION__CIA_DISCARD; + if (prop->cp_flags & CHK__CHECK_FLAG__CF_DRYRUN) { + cbk->cb_statistics.cs_repaired++; + } else { + result = chk_leader_destroy_pool(cpr, seq, true); + if (result != 0) + cbk->cb_statistics.cs_failed++; + else + cbk->cb_statistics.cs_repaired++; + } + /* + * If want to destroy the corrupted pool, then skip subsequent check in spite of + * whether it is destroyed successfully or not. + */ + cpr->cpr_skip = 1; + break; + case CHK__CHECK_INCONSIST_ACTION__CIA_TRUST_PS: + if (unlikely(cpr->cpr_advice < 0)) + goto ignore; + + act = CHK__CHECK_INCONSIST_ACTION__CIA_TRUST_PS; + if (prop->cp_flags & CHK__CHECK_FLAG__CF_DRYRUN) { + cbk->cb_statistics.cs_repaired++; + /* + * Under dryrun mode, we cannot start the PS with DICTATE + * mode, then have to skip it. + */ + cpr->cpr_skip = 1; + break; + } + + result = chk_leader_reset_pool_svc(cpr); + if (result != 0 || cpr->cpr_exist_on_ms) + break; + + clue = cpr->cpr_clue; + result = ds_chk_regpool_upcall(seq, cpr->cpr_uuid, cpr->cpr_label, + clue->pc_svc_clue->psc_db_clue.bcl_replicas); + if (result != 0) { + cbk->cb_statistics.cs_failed++; + /* Skip the pool if failed to register to MS. */ + cpr->cpr_skip = 1; + } + /* + * NOTE: For result == 0 case, it still cannot be regarded as repaired. + * We need to start the PS under DICTATE mode in subsequent step. + */ + break; + } + + goto report; + +out: + /* + * If the corrupted pool is ignored (in spite of because it is required or failed + * to fix related inconsistency), then notify check engines to remove related + * pool record and bookmark. + */ + chk_leader_post_repair(ins, cpr, &result, rc <= 0, cpr->cpr_skip ? true : false); + + return result; +} + +/* + * Whether need to stop current check instance or not. + * + * \param ins [IN] The leader instance. + * \param ret [OUT] When return true, set it as 1 if the checker is completed, + * set it as 0 if someone wants to stop the checker. + */ +static int +chk_leader_need_stop(struct chk_instance *ins, int *ret) +{ + struct chk_pool_rec *cpr; + bool dangling = false; + + if (d_list_empty(&ins->ci_rank_list)) { + d_list_for_each_entry(cpr, &ins->ci_pool_list, cpr_link) { + if (!cpr->cpr_skip && !cpr->cpr_done && cpr->cpr_dangling) { + dangling = true; + break; + } + } + + if (!dangling) { + /* + * "ci_stopping" means that the user wants to stop checker for some pools. + * But the specified pools may be not in checking. "ci_pool_stopped" means + * the checker for some pools are really stopped. + */ + if (ins->ci_pool_stopped) { + D_ASSERT(ins->ci_stopping); + *ret = 0; + return true; + } + + *ret = 1; + return true; + } + } + + if (!ins->ci_sched_running || ins->ci_sched_exiting) { + *ret = 0; + return true; + } + + return false; +} + +static inline bool +chk_leader_pool_need_stop(struct chk_pool_rec *cpr, int *ret) +{ + if (*ret < 0 || cpr->cpr_skip || cpr->cpr_stop) + return true; + + return chk_leader_need_stop(cpr->cpr_ins, ret); +} + +/* + * Collect pool svc clues, and try to choose the available replica. + * After the process, if the pool has no PS replica available, then + * it is either destroyed or skipped for subsequent check. + */ +static int +chk_leader_handle_pool_clues(struct chk_pool_rec *cpr) +{ + struct ds_pool_clues *clues; + int rc; + + rc = chk_leader_build_pool_clues(cpr); + if (chk_leader_pool_need_stop(cpr, &rc)) + goto out; + + clues = &cpr->cpr_clues; + D_ASSERTF(clues->pcs_len >= 0, "Got invalid clues: %d\n", clues->pcs_len); + + if (clues->pcs_len > 0) { + rc = ds_pool_check_svc_clues(clues, &cpr->cpr_advice); + cpr->cpr_clue = &clues->pcs_array[cpr->cpr_advice]; + if (rc == 0) { + cpr->cpr_healthy = 1; + goto out; + } + } else { + /* No pool service. */ + cpr->cpr_advice = -1; + } + + rc = chk_leader_no_quorum_pool(cpr); + +out: + return rc; +} + +static int +chk_leader_start_pool_svc(struct chk_pool_rec *cpr) +{ + struct chk_instance *ins = cpr->cpr_ins; + struct chk_bookmark *cbk = &ins->ci_bk; + struct ds_pool_clue *clue; + d_rank_list_t *ranks = NULL; + d_iov_t psid; + int rc = 0; + + D_ASSERT(cpr->cpr_advice >= 0); + + d_iov_set(&psid, cpr->cpr_uuid, sizeof(uuid_t)); + if (cpr->cpr_healthy) { + /* + * If the pool has quorum for pool service, then even if some replicas are lost, + * it is still not regarded as 'inconsistency'. The raft mechanism will recover + * the other pool service replicas automatically. + */ + ranks = chk_leader_cpr2ranklist(cpr, true); + if (ranks == NULL) + D_GOTO(out, rc = -DER_NOMEM); + } else { + /* + * We cannot start the pool service via regular quorum, but we can start it under + * DS_RSVC_DICTATE mode. + */ + ranks = d_rank_list_alloc(1); + if (ranks == NULL) + D_GOTO(out, rc = -DER_NOMEM); + + clue = cpr->cpr_clue; + ranks->rl_ranks[0] = clue->pc_rank; + } + + rc = ds_rsvc_dist_start(DS_RSVC_CLASS_POOL, &psid, cpr->cpr_uuid, ranks, RDB_NIL_TERM, + cpr->cpr_healthy ? DS_RSVC_START : DS_RSVC_DICTATE, + false /* bootstrap */, 0 /* size */); + +out: + d_rank_list_free(ranks); + if (rc != 0) { + D_ERROR(DF_LEADER" failed to start pool service (%s) for "DF_UUIDF" at replica %d, " + "skip it: "DF_RC"\n", + DP_LEADER(ins), cpr->cpr_healthy ? "healthy" : "unhealthy", + DP_UUID(cpr->cpr_uuid), cpr->cpr_advice, DP_RC(rc)); + + cpr->cpr_skip = 1; + if (!cpr->cpr_healthy) + cbk->cb_statistics.cs_failed++; + chk_leader_post_repair(ins, cpr, &rc, !cpr->cpr_healthy, true); + } else if (!cpr->cpr_healthy) { + cbk->cb_statistics.cs_repaired++; + chk_leader_post_repair(ins, cpr, &rc, true, false); + } + + return rc; +} + +static int +chk_leader_handle_pool_label(struct chk_pool_rec *cpr, struct ds_pool_clue *clue) +{ + struct chk_instance *ins = cpr->cpr_ins; + struct chk_property *prop = &ins->ci_prop; + struct chk_bookmark *cbk = &ins->ci_bk; + char strs[3][CHK_MSG_BUFLEN] = { 0 }; + char msg[CHK_MSG_BUFLEN] = { 0 }; + d_iov_t iovs[3]; + d_sg_list_t sgl; + d_sg_list_t *details = NULL; + char *label = NULL; + struct chk_report_unit cru = { 0 }; + Chk__CheckInconsistClass cla; + Chk__CheckInconsistAction act; + uint64_t seq = 0; + uint32_t options[3]; + uint32_t option_nr = 0; + uint32_t detail_nr = 0; + int decision = -1; + int result = 0; + int rc = 0; + + cla = CHK__CHECK_INCONSIST_CLASS__CIC_POOL_BAD_LABEL; + act = prop->cp_policies[cla]; + + switch (act) { + case CHK__CHECK_INCONSIST_ACTION__CIA_DEFAULT: + /* + * The pool label is mainly used by MS to lookup pool UUID by label. + * The PS recorded label info is just some kind of backup. So trust + * the label info on MS if exist. + */ + if (cpr->cpr_label == NULL) + goto try_ps; + + /* Fall through. */ + case CHK__CHECK_INCONSIST_ACTION__CIA_TRUST_MS: + act = CHK__CHECK_INCONSIST_ACTION__CIA_TRUST_MS; + /* Delay pool label update on PS until CHK__CHECK_SCAN_PHASE__CSP_POOL_CLEANUP. */ + cpr->cpr_delay_label = 1; + goto out; + case CHK__CHECK_INCONSIST_ACTION__CIA_TRUST_PS: + +try_ps: + act = CHK__CHECK_INCONSIST_ACTION__CIA_TRUST_PS; + cbk->cb_statistics.cs_total++; + seq = chk_report_seq_gen(ins); + + result = chk_dup_string(&label, clue->pc_label, + clue->pc_label != NULL ? strlen(clue->pc_label) : 0); + if (result != 0) { + cbk->cb_statistics.cs_failed++; + label = clue->pc_label; + break; + } + + if (prop->cp_flags & CHK__CHECK_FLAG__CF_DRYRUN) { + cbk->cb_statistics.cs_repaired++; + } else { + result = ds_chk_regpool_upcall(seq, cpr->cpr_uuid, clue->pc_label, + clue->pc_svc_clue->psc_db_clue.bcl_replicas); + if (result != 0) + cbk->cb_statistics.cs_failed++; + else + cbk->cb_statistics.cs_repaired++; + } + break; + case CHK__CHECK_INCONSIST_ACTION__CIA_IGNORE: + if (cpr->cpr_label != NULL) + label = cpr->cpr_label; + else + label = clue->pc_label; + + cbk->cb_statistics.cs_total++; + /* Report the inconsistency without repair. */ + cbk->cb_statistics.cs_ignored++; + break; + default: + /* + * If the specified action is not applicable to the inconsistency, + * then switch to interaction mode for the decision from admin. + * + * Fall through. + */ + case CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT: + if (cpr->cpr_label != NULL) + label = cpr->cpr_label; + else + label = clue->pc_label; + + if (prop->cp_flags & CHK__CHECK_FLAG__CF_AUTO) { + /* Ignore the inconsistency if admin does not want interaction. */ + act = CHK__CHECK_INCONSIST_ACTION__CIA_IGNORE; + cbk->cb_statistics.cs_ignored++; + break; + } + + act = CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT; + + if (cpr->cpr_label == NULL) { + options[0] = CHK__CHECK_INCONSIST_ACTION__CIA_TRUST_PS; + options[1] = CHK__CHECK_INCONSIST_ACTION__CIA_TRUST_MS; + snprintf(strs[0], CHK_MSG_BUFLEN - 1, + "Trust PS pool label: %s [suggested].", + clue->pc_label != NULL ? clue->pc_label : "(null)"); + snprintf(strs[1], CHK_MSG_BUFLEN - 1, "Trust MS pool label: (null)."); + } else { + options[0] = CHK__CHECK_INCONSIST_ACTION__CIA_TRUST_MS; + options[1] = CHK__CHECK_INCONSIST_ACTION__CIA_TRUST_PS; + snprintf(strs[0], CHK_MSG_BUFLEN - 1, + "Trust MS pool label: %s [suggested].", cpr->cpr_label); + snprintf(strs[1], CHK_MSG_BUFLEN - 1, "Trust PS pool label: %s.", + clue->pc_label != NULL ? clue->pc_label : "(null)"); + } + + options[2] = CHK__CHECK_INCONSIST_ACTION__CIA_IGNORE; + snprintf(strs[2], CHK_MSG_BUFLEN - 1, + "Keep the inconsistent pool label: %s (MS) vs %s (PS), repair nothing.", + cpr->cpr_label != NULL ? cpr->cpr_label : "(null)", + clue->pc_label != NULL ? clue->pc_label : "(null)"); + option_nr = 3; + + d_iov_set(&iovs[0], strs[0], strlen(strs[0])); + d_iov_set(&iovs[1], strs[1], strlen(strs[1])); + d_iov_set(&iovs[2], strs[2], strlen(strs[2])); + + sgl.sg_nr = 3; + sgl.sg_nr_out = 0; + sgl.sg_iovs = iovs; + + details = &sgl; + detail_nr = 1; + break; + } + +report: + cru.cru_gen = cbk->cb_gen; + cru.cru_cla = cla; + cru.cru_act = option_nr != 0 ? CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT : act; + cru.cru_rank = dss_self_rank(); + cru.cru_option_nr = option_nr; + cru.cru_detail_nr = detail_nr; + cru.cru_pool = (uuid_t *)&cpr->cpr_uuid; + cru.cru_pool_label = label; + snprintf(msg, CHK_MSG_BUFLEN - 1, + "Check leader detects corrupted pool label: %s (MS) vs %s (PS).\n", + cpr->cpr_label != NULL ? cpr->cpr_label : "(null)", + clue->pc_label != NULL ? clue->pc_label : "(null)"); + cru.cru_msg = msg; + cru.cru_options = options; + cru.cru_details = details; + cru.cru_result = result; + + rc = chk_leader_report(&cru, &seq, &decision); + + D_CDEBUG(result != 0 || rc < 0, DLOG_ERR, DLOG_INFO, + DF_LEADER" detects corrupted label for pool "DF_UUIDF", action %u (%s), seq " + DF_X64", MS label %s, PS label %s, handle_rc %d, report_rc %d, decision %d\n", + DP_LEADER(ins), DP_UUID(cpr->cpr_uuid), act, + option_nr ? "need interact" : "no interact", seq, + cpr->cpr_label != NULL ? cpr->cpr_label : "(null)", + clue->pc_label != NULL ? clue->pc_label : "(null)", result, rc, decision); + + if (rc < 0 && option_nr > 0) { + cbk->cb_statistics.cs_total++; + cbk->cb_statistics.cs_failed++; + /* It is unnecessary to skip the pool if failed to handle label inconsistency. */ + result = rc; + } + + if (rc > 0 || result != 0 || option_nr == 0) { + if (label != NULL && label != clue->pc_label && label != cpr->cpr_label) { + D_FREE(cpr->cpr_label); + cpr->cpr_label = label; + } + + goto out; + } + + option_nr = 0; + detail_nr = 0; + + switch (decision) { + default: + D_ERROR(DF_LEADER" got invalid decision %d for corrupted pool label" + DF_UUIDF" with seq "DF_X64". Ignore the inconsistency.\n", + DP_LEADER(ins), decision, DP_UUID(cpr->cpr_uuid), seq); + /* + * Invalid option, ignore the inconsistency. + * + * Fall through. + */ + case CHK__CHECK_INCONSIST_ACTION__CIA_IGNORE: + act = CHK__CHECK_INCONSIST_ACTION__CIA_IGNORE; + cbk->cb_statistics.cs_total++; + cbk->cb_statistics.cs_ignored++; + /* It is unnecessary to skip the pool if ignore label inconsistency. */ + break; + case CHK__CHECK_INCONSIST_ACTION__CIA_TRUST_MS: + act = CHK__CHECK_INCONSIST_ACTION__CIA_TRUST_MS; + /* Delay pool label update on PS until CHK__CHECK_SCAN_PHASE__CSP_POOL_CLEANUP. */ + cpr->cpr_delay_label = 1; + cpr->cpr_label_seq = seq; + goto out; + case CHK__CHECK_INCONSIST_ACTION__CIA_TRUST_PS: + act = CHK__CHECK_INCONSIST_ACTION__CIA_TRUST_PS; + cbk->cb_statistics.cs_total++; + + label = NULL; + result = chk_dup_string(&label, clue->pc_label, + clue->pc_label != NULL ? strlen(clue->pc_label) : 0); + if (result != 0) { + cbk->cb_statistics.cs_failed++; + label = clue->pc_label; + break; + } + + if (prop->cp_flags & CHK__CHECK_FLAG__CF_DRYRUN) { + cbk->cb_statistics.cs_repaired++; + } else { + result = ds_chk_regpool_upcall(seq, cpr->cpr_uuid, clue->pc_label, + clue->pc_svc_clue->psc_db_clue.bcl_replicas); + if (result != 0) + cbk->cb_statistics.cs_failed++; + else + cbk->cb_statistics.cs_repaired++; + } + break; + } + + goto report; + +out: + /* + * If decide to delay pool label update on PS until CHK__CHECK_SCAN_PHASE__CSP_POOL_CLEANUP, + * then it is unnecessary to update the leader bookmark. + */ + chk_leader_post_repair(ins, cpr, &result, + (rc > 0 || cpr->cpr_delay_label) ? false : true, false); + + return result; +} + +static void +chk_leader_dp_ult(void *arg) +{ + struct chk_pool_rec *cpr = arg; + struct chk_bookmark *cbk = &cpr->cpr_bk; + int rc; + + cbk->cb_phase = CHK__CHECK_SCAN_PHASE__CSP_PREPARE; + rc = chk_leader_dangling_pool(cpr); + if (rc != 0) + cpr->cpr_skip = 1; + else + cbk->cb_phase = CHK__CHECK_SCAN_PHASE__CSP_DONE; + + cpr->cpr_done = 1; + chk_pool_put(cpr); +} + +static void chk_leader_pool_ult(void *arg); + +static int +chk_leader_handle_pools_list(struct chk_instance *ins) +{ + struct chk_property *prop = &ins->ci_prop; + struct chk_list_pool *clp = NULL; + struct chk_pool_rec *cpr; + struct chk_pool_rec *tmp; + d_iov_t riov; + d_iov_t kiov; + int clp_nr; + int rc = 0; + int i; + bool exit; + + clp_nr = ds_chk_listpool_upcall(&clp); + if (clp_nr < 0) { + rc = clp_nr; + clp_nr = 0; + goto out; + } + + if (prop->cp_flags & CHK__CHECK_FLAG__CF_FAILOUT) + exit = true; + else + exit = false; + + /* Firstly, handle dangling pool(s) based on the comparison between engines and MS. */ + for (i = 0; i < clp_nr; i++) { + d_iov_set(&riov, NULL, 0); + d_iov_set(&kiov, clp[i].clp_uuid, sizeof(uuid_t)); + rc = dbtree_lookup(ins->ci_pool_hdl, &kiov, &riov); + if (rc == 0) { + cpr = (struct chk_pool_rec *)riov.iov_buf; + cpr->cpr_exist_on_ms = 1; + + if (cpr->cpr_done) + continue; + + rc = chk_dup_string(&cpr->cpr_label, clp[i].clp_label, + clp[i].clp_label != NULL ? + strlen(clp[i].clp_label) : 0); + if (rc != 0) { + cpr->cpr_skip = 1; + goto out; + } + + /* No engine report shard for the pool, it is dangling pool. */ + if (d_list_empty(&cpr->cpr_shard_list)) { + chk_pool_get(cpr); + cpr->cpr_dangling = 1; + rc = dss_ult_create(chk_leader_dp_ult, cpr, DSS_XS_SYS, 0, + DSS_DEEP_STACK_SZ, &cpr->cpr_thread); + if (rc != 0) { + D_ERROR("Failed to create ULT for pool "DF_UUIDF + " with %s (3): "DF_RC"\n", DP_UUID(cpr->cpr_uuid), + exit ? "failout" : "continue", DP_RC(rc)); + cpr->cpr_skip = 1; + chk_pool_put(cpr); + if (exit) + goto out; + rc = 0; + } + continue; + } + + /* + * For check orphan pool, since exist on both MS and engine, + * then it is normally, not orphan pool, remove the @cpr. + */ + if (cpr->cpr_for_orphan) { + chk_pool_remove_nowait(cpr); + continue; + } + + chk_pool_get(cpr); + /* + * Each pool will has a dedicated ULT to handle the subsequent check, + * then even if a pool check is blocked, it will not affect other pools. + */ + rc = dss_ult_create(chk_leader_pool_ult, cpr, DSS_XS_SYS, 0, + DSS_DEEP_STACK_SZ, &cpr->cpr_thread); + if (rc != 0) { + D_ERROR("Failed to create ULT for pool "DF_UUIDF" with %s (1): " + DF_RC"\n", DP_UUID(cpr->cpr_uuid), + exit ? "failout" : "continue", DP_RC(rc)); + cpr->cpr_skip = 1; + chk_pool_put(cpr); + } + } else if (rc == -DER_NONEXIST) { + /* + * If the user specified the pool in the check list, then it must exist in + * the pools tree. So if it is not there, then unless the check is for all + * pools, otherwise, skip it. + */ + if (!(ins->ci_start_flags & CSF_ORPHAN_POOL)) { + rc = 0; + continue; + } + + rc = chk_pool_add_shard(ins->ci_pool_hdl, &ins->ci_pool_list, + clp[i].clp_uuid, CHK_LEADER_RANK, + NULL /* bookmark */, ins, NULL /* shard_nr */, + NULL /* data */, NULL, &cpr); + if (rc != 0) { + D_ERROR("Failed to create record for dangling pool " + DF_UUIDF" with %s: "DF_RC"\n", DP_UUID(clp[i].clp_uuid), + exit ? "failout" : "continue", DP_RC(rc)); + if (!exit) + rc = 0; + } else { + cpr->cpr_exist_on_ms = 1; + rc = chk_dup_string(&cpr->cpr_label, clp[i].clp_label, + clp[i].clp_label != NULL ? + strlen(clp[i].clp_label) : 0); + if (rc != 0) { + cpr->cpr_skip = 1; + goto out; + } + + chk_pool_get(cpr); + cpr->cpr_dangling = 1; + rc = dss_ult_create(chk_leader_dp_ult, cpr, DSS_XS_SYS, 0, + DSS_DEEP_STACK_SZ, &cpr->cpr_thread); + if (rc != 0) { + D_ERROR("Failed to create ULT for pool "DF_UUIDF + " with %s (4): "DF_RC"\n", DP_UUID(cpr->cpr_uuid), + exit ? "failout" : "continue", DP_RC(rc)); + cpr->cpr_skip = 1; + chk_pool_put(cpr); + if (!exit) + rc = 0; + } + } + } else { + D_ERROR("Failed to verify pool "DF_UUIDF" existence with %s: "DF_RC"\n", + DP_UUID(clp[i].clp_uuid), exit ? "failout" : "continue", DP_RC(rc)); + if (!exit) + rc = 0; + } + + if (rc != 0) + goto out; + } + + d_list_for_each_entry_safe(cpr, tmp, &ins->ci_pool_list, cpr_link) { + if (cpr->cpr_skip || cpr->cpr_done || cpr->cpr_exist_on_ms) + continue; + + /* The cpr is only for check dangling, can be remove from the list now. */ + if (unlikely(cpr->cpr_bk.cb_phase == CHK__CHECK_SCAN_PHASE__CSP_DONE)) { + cpr->cpr_done = 1; + continue; + } + + chk_pool_get(cpr); + /* + * Each pool will has a dedicated ULT to handle the subsequent check, + * then even if a pool check is blocked, it will not affect other pools. + */ + rc = dss_ult_create(chk_leader_pool_ult, cpr, DSS_XS_SYS, 0, + DSS_DEEP_STACK_SZ, &cpr->cpr_thread); + if (rc != 0) { + D_ERROR("Failed to create ULT for pool "DF_UUIDF" with %s (2): "DF_RC"\n", + DP_UUID(cpr->cpr_uuid), exit ? "failout" : "continue", DP_RC(rc)); + cpr->cpr_skip = 1; + chk_pool_put(cpr); + goto out; + } + } + +out: + ds_chk_free_pool_list(clp, clp_nr); + + if (rc != 0) + D_ERROR(DF_LEADER" failed to handle pools list: "DF_RC"\n", + DP_LEADER(ins), DP_RC(rc)); + + return rc; +} + +static int +chk_leader_pool_mbs_one(struct chk_pool_rec *cpr) +{ + struct rsvc_client client = { 0 }; + crt_endpoint_t ep = { 0 }; + struct rsvc_hint hint = { 0 }; + struct chk_instance *ins = cpr->cpr_ins; + struct chk_bookmark *cbk = &ins->ci_bk; + d_rank_list_t *ps_ranks = NULL; + struct chk_pool_shard *cps; + struct ds_pool_clue *clue; + uint32_t interval; + int rc = 0; + int rc1; + int i = 0; + bool notify = true; + + D_ASSERT(cpr->cpr_mbs == NULL); + + D_ALLOC_ARRAY(cpr->cpr_mbs, cpr->cpr_shard_nr); + if (cpr->cpr_mbs == NULL) + D_GOTO(out_post, rc = -DER_NOMEM); + + d_list_for_each_entry(cps, &cpr->cpr_shard_list, cps_link) { + clue = cps->cps_data; + D_ASSERT(i < cpr->cpr_shard_nr); + + cpr->cpr_mbs[i].cpm_rank = cps->cps_rank; + cpr->cpr_mbs[i].cpm_tgt_nr = clue->pc_tgt_nr; + /* + * NOTE: Do not allocate space for cpm_tgt_status, instead, we can directly + * use clue->pc_tgt_status, that will be freed when free the pool rec + * via chk_pool_put()->cps_free_cb(). + */ + cpr->cpr_mbs[i].cpm_tgt_status = clue->pc_tgt_status; + i++; + } + + ps_ranks = chk_leader_cpr2ranklist(cpr, true); + if (ps_ranks == NULL) + D_GOTO(out_post, rc = -DER_NOMEM); + + /* + * The PS leader election needs some time, we do not need to retry chk_pool_mbs_remote() + * too frequently. Here, for each PS leader candidate, we will try once per second. Then + * if some one is elected as the PS leader, we will find it about one second later. + */ + interval = 1000 / ps_ranks->rl_nr; + + rc = rsvc_client_init(&client, ps_ranks); + d_rank_list_free(ps_ranks); + + if (rc != 0) + goto out_post; + +again: + rc = rsvc_client_choose(&client, &ep); + if (rc != 0) + goto out_client; + + rc = chk_pool_mbs_remote(ep.ep_rank, CHK__CHECK_SCAN_PHASE__CSP_POOL_MBS, cbk->cb_gen, + cpr->cpr_uuid, cpr->cpr_label, cpr->cpr_label_seq, + cpr->cpr_delay_label ? CMF_REPAIR_LABEL : 0, + cpr->cpr_shard_nr, cpr->cpr_mbs, &hint); + + rc1 = rsvc_client_complete_rpc(&client, &ep, rc, rc, &hint); + if (rc1 == RSVC_CLIENT_RECHOOSE || + (rc1 == RSVC_CLIENT_PROCEED && daos_rpc_retryable_rc(rc))) { + dss_sleep(interval); + if (cpr->cpr_stop || !ins->ci_sched_running || ins->ci_sched_exiting) { + notify = false; + D_GOTO(out_client, rc = 0); + } + goto again; + } + +out_client: + rsvc_client_fini(&client); + +out_post: + if (rc != 0) { + cpr->cpr_skip = 1; + if (rc == -DER_SHUTDOWN || rc == -DER_NONEXIST) + rc = 0; + + chk_leader_post_repair(ins, cpr, &rc, false, notify); + } + + return rc; +} + +static void +chk_leader_pool_ult(void *arg) +{ + struct chk_pool_rec *cpr = arg; + struct chk_instance *ins = cpr->cpr_ins; + struct chk_bookmark *cbk = &cpr->cpr_bk; + d_rank_list_t *ranks; + struct ds_pool_clue *clue; + struct chk_iv iv = { 0 }; + char uuid_str[DAOS_UUID_STR_SIZE]; + uint32_t flags = 0; + int rc = 0; + + D_INFO(DF_LEADER" pool ult enter for "DF_UUIDF"\n", DP_LEADER(ins), DP_UUID(cpr->cpr_uuid)); + + uuid_unparse_lower(cpr->cpr_uuid, uuid_str); + if (chk_leader_pool_need_stop(cpr, &rc)) + goto out; + + /* + * NOTE: We need build clues even if we are resuming from former stop/pause phase. + * That is the base for subsequent pool start and pool MBS. But it does not + * mean we will re-execute all the checking from the scratch. Because if we + * have passed some phases in former instance, then PS quorum related issues + * have already been resolved at that time. Then for current check instance, + * related check be skipped or become noop. + */ + + rc = chk_leader_handle_pool_clues(cpr); + if (chk_leader_pool_need_stop(cpr, &rc)) + goto out; + + if (cbk->cb_phase > CHK__CHECK_SCAN_PHASE__CSP_PREPARE) + goto start; + + if (!cpr->cpr_exist_on_ms) { + rc = chk_leader_orphan_pool(cpr); + if (chk_leader_pool_need_stop(cpr, &rc)) + goto out; + } else { + clue = cpr->cpr_clue; + if ((clue->pc_label != NULL && cpr->cpr_label == NULL) || + (clue->pc_label == NULL && cpr->cpr_label != NULL) || + (clue->pc_label != NULL && cpr->cpr_label != NULL && + strcmp(clue->pc_label, cpr->cpr_label) != 0)) { + rc = chk_leader_handle_pool_label(cpr, clue); + if (chk_leader_pool_need_stop(cpr, &rc)) + goto out; + } + } + + if (cbk->cb_phase < CHK__CHECK_SCAN_PHASE__CSP_POOL_LIST) { + cbk->cb_phase = CHK__CHECK_SCAN_PHASE__CSP_POOL_LIST; + rc = chk_bk_update_pool(cbk, uuid_str); + if (rc != 0) { + cpr->cpr_skip = 1; + goto out; + } + + if (DAOS_FAIL_CHECK(DAOS_CHK_LEADER_BLOCK)) { + while (!(chk_leader_pool_need_stop(cpr, &rc))) + dss_sleep(300); + goto exit; + } + } + +start: + ranks = chk_leader_cpr2ranklist(cpr, false); + if (ranks == NULL) { + cpr->cpr_skip = 1; + D_GOTO(out, rc = -DER_NOMEM); + } + + if (cpr->cpr_for_orphan) + flags |= CPSF_FOR_ORPHAN; + if (cpr->cpr_not_export_ps) + flags |= CPSF_NOT_EXPORT_PS; + + /* + * Notify all related pool shards to start the pool. Piggyback the + * phase CHK__CHECK_SCAN_PHASE__CSP_POOL_LIST to the pool shards. + */ + rc = chk_pool_start_remote(ranks, cbk->cb_gen, cpr->cpr_uuid, + CHK__CHECK_SCAN_PHASE__CSP_POOL_LIST, flags); + d_rank_list_free(ranks); + if (rc != 0) + cpr->cpr_skip = 1; + + if (rc == -DER_SHUTDOWN || rc == -DER_NONEXIST) + goto exit; + + if (chk_leader_pool_need_stop(cpr, &rc)) + goto out; + + rc = chk_leader_start_pool_svc(cpr); + if (chk_leader_pool_need_stop(cpr, &rc)) + goto out; + + if (cbk->cb_phase < CHK__CHECK_SCAN_PHASE__CSP_POOL_MBS) { + cbk->cb_phase = CHK__CHECK_SCAN_PHASE__CSP_POOL_MBS; + rc = chk_bk_update_pool(cbk, uuid_str); + if (rc != 0) { + cpr->cpr_skip = 1; + goto out; + } + } + + /* + * Notify the PS leader to drive the subsequent pool scan. Piggyback the + * phase CHK__CHECK_SCAN_PHASE__CSP_POOL_MBS to related PS leader. + * The PS leader will handle subsequent pool scan phases. + */ + rc = chk_leader_pool_mbs_one(cpr); + +out: + /* For stop case, the pool status will be updated via chk_pool_stop_one() by the sponsor. */ + if ((rc < 0 || cpr->cpr_skip) && !cpr->cpr_notified_exit && !cpr->cpr_stop) { + iv.ci_gen = cbk->cb_gen; + uuid_copy(iv.ci_uuid, cpr->cpr_uuid); + iv.ci_phase = cbk->cb_phase; + iv.ci_pool_status = CHK__CHECK_POOL_STATUS__CPS_FAILED; + + rc = chk_iv_update(ins->ci_iv_ns, &iv, CRT_IV_SHORTCUT_NONE, + CRT_IV_SYNC_EAGER, true); + D_CDEBUG(rc != 0, DLOG_ERR, DLOG_INFO, + DF_LEADER" notify engines to exit check for pool "DF_UUIDF" failure: %d\n", + DP_LEADER(ins), DP_UUID(cpr->cpr_uuid), rc); + if (rc == 0) + cpr->cpr_notified_exit = 1; + } + +exit: + D_INFO(DF_LEADER" pool ult exit for "DF_UUIDF": rc = %d\n", + DP_LEADER(ins), DP_UUID(cpr->cpr_uuid), rc); + + if (cpr->cpr_skip) + cpr->cpr_done = 1; + chk_pool_put(cpr); +} + +static void +chk_leader_mark_rank_dead(struct chk_instance *ins, struct chk_dead_rank *cdr) +{ + struct chk_property *prop = &ins->ci_prop; + struct chk_bookmark *cbk = &ins->ci_bk; + uint32_t version = cbk->cb_gen - prop->cp_rank_nr - 1; + int rc = 0; + + if (!chk_remove_rank_from_list(ins->ci_ranks, cdr->cdr_rank)) + D_GOTO(out, rc = -DER_NOTAPPLICABLE); + + prop->cp_rank_nr--; + rc = chk_prop_update(prop, ins->ci_ranks); + if (rc != 0) + goto out; + + rc = crt_group_secondary_modify(ins->ci_iv_group, ins->ci_ranks, ins->ci_ranks, + CRT_GROUP_MOD_OP_REPLACE, version); + if (rc != 0) + goto out; + + rc = chk_rank_del(ins, cdr->cdr_rank); + if (rc != 0) + goto out; + + /* + * NOTE: Some thought about removing related shards from the ins->ci_pool_list, + * that may reduce the possibility of CR failure caused by the dead rank. + * But consider the rank death event is totally random, we cannot make it + * to be transparent to user. For example, the dead rank maybe the unique + * replica of some pool service, that will cause related PS failure after + * its death. + * + * On the other hand, if we modify the pool shards list for related pools, + * then it may hide data corruption silently. It may be different from the + * user expectation. + * + * So here, we do not try to hide the rank death event. If subsequent CR + * processing failed because of the dead rank, just report it. + * + */ + + if (!d_list_empty(&ins->ci_rank_list)) + rc = chk_mark_remote(ins->ci_ranks, cbk->cb_gen, cdr->cdr_rank, version); + +out: + if (rc != -DER_NOTAPPLICABLE) + D_CDEBUG(rc != 0, DLOG_ERR, DLOG_INFO, + DF_LEADER" mark rank %u as dead with version %u: "DF_RC"\n", + DP_LEADER(ins), cdr->cdr_rank, version, DP_RC(rc)); + D_FREE(cdr); +} + +static void +chk_leader_sched(void *args) +{ + struct chk_instance *ins = args; + struct chk_bookmark *cbk = &ins->ci_bk; + struct chk_dead_rank *cdr; + struct chk_iv iv = { 0 }; + uint32_t ins_phase; + uint32_t ins_status; + uint32_t pool_status; + int done = 0; + int rc = 0; + bool bcast = false; + bool more_dead; + + D_INFO(DF_LEADER" scheduler enter at phase %u\n", DP_LEADER(ins), cbk->cb_phase); + + ABT_mutex_lock(ins->ci_abt_mutex); + +again: + if (ins->ci_sched_exiting) { + ABT_mutex_unlock(ins->ci_abt_mutex); + D_GOTO(out, rc = 0); + } + + if (ins->ci_started) { + ABT_mutex_unlock(ins->ci_abt_mutex); + goto handle; + } + + ABT_cond_wait(ins->ci_abt_cond, ins->ci_abt_mutex); + + goto again; + +handle: + if (!d_list_empty(&ins->ci_pool_list) || ins->ci_start_flags & CSF_ORPHAN_POOL) { + rc = chk_leader_handle_pools_list(ins); + if (rc != 0) + D_GOTO(out, bcast = true); + } + + while (1) { + dss_sleep(300); + +check_dead: + ABT_mutex_lock(ins->ci_abt_mutex); + if (!d_list_empty(&ins->ci_dead_ranks)) { + cdr = d_list_pop_entry(&ins->ci_dead_ranks, struct chk_dead_rank, cdr_link); + if (!d_list_empty(&ins->ci_dead_ranks)) + more_dead = true; + else + more_dead = false; + } else { + cdr = NULL; + more_dead = false; + } + ABT_mutex_unlock(ins->ci_abt_mutex); + + if (cdr != NULL) + chk_leader_mark_rank_dead(ins, cdr); + + if (chk_leader_need_stop(ins, &rc)) + D_GOTO(out, bcast = (rc > 0 ? true : false)); + + if (more_dead) + goto check_dead; + + /* + * TBD: The leader may need to detect engines' status/phase actively, otherwise + * if some engine failed to notify the leader for its status/phase changes, + * then the leader will be blocked there. + */ + + ins_phase = chk_pools_find_slowest(ins, &done); + + if (ins_phase >= CHK__CHECK_SCAN_PHASE__CSP_POOL_MBS && !ins->ci_orphan_done && + !DAOS_FAIL_CHECK(DAOS_CHK_SYNC_ORPHAN_PROCESS)) { + iv.ci_gen = cbk->cb_gen; + iv.ci_phase = ins_phase; + iv.ci_ins_status = CHK__CHECK_INST_STATUS__CIS_RUNNING; + + /* Synchronously notify engines that orphan pools have been processed. */ + rc = chk_iv_update(ins->ci_iv_ns, &iv, CRT_IV_SHORTCUT_NONE, + CRT_IV_SYNC_EAGER, true); + D_CDEBUG(rc != 0, DLOG_ERR, DLOG_INFO, + DF_LEADER" notify engines that orphan pools have been process: %d\n", + DP_LEADER(ins), rc); + if (rc == 0) + ins->ci_orphan_done = 1; + } + + /* + * Check @done before update cb_phase. Otherwise, the cb_phase may has become 'DONE' + * but cb_ins_status is still 'RUNNING'. + */ + if (done != 0) { + if (done > 0) { + D_INFO(DF_LEADER" has done\n", DP_LEADER(ins)); + rc = 1; + } else { + D_INFO(DF_LEADER" is stopped\n", DP_LEADER(ins)); + rc = 0; + } + + D_GOTO(out, rc); + } + + if (cbk->cb_phase == CHK_INVAL_PHASE || cbk->cb_phase < ins_phase) { + D_INFO(DF_LEADER" moves from phase %u to phase %u\n", + DP_LEADER(ins), cbk->cb_phase, ins_phase); + + cbk->cb_phase = ins_phase; + /* QUEST: How to estimate the left time? */ + cbk->cb_time.ct_left_time = CHK__CHECK_SCAN_PHASE__CSP_DONE - cbk->cb_phase; + rc = chk_bk_update_leader(cbk); + if (rc != 0) + D_GOTO(out, bcast = true); + } + } + +out: + ins_phase = CHK_INVAL_PHASE; + if (rc > 0) { + /* + * If some engine(s) failed during the start, then mark the instance as 'failed'. + * It means that there is at least one failure during the DAOS check at somewhere. + * pool_status is useless under this case since all pools have done. + */ + if (ins->ci_slowest_fail_phase != CHK_INVAL_PHASE && + ins->ci_prop.cp_flags & CHK__CHECK_FLAG__CF_FAILOUT) { + ins_phase = ins->ci_slowest_fail_phase; + ins_status = CHK__CHECK_INST_STATUS__CIS_FAILED; + pool_status = CHK__CHECK_POOL_STATUS__CPS_IMPLICATED; + } else { + ins_phase = CHK__CHECK_SCAN_PHASE__CSP_DONE; + ins_status = CHK__CHECK_INST_STATUS__CIS_COMPLETED; + pool_status = CHK__CHECK_POOL_STATUS__CPS_CHECKED; + } + } else if (rc == 0) { + if (ins->ci_implicated) { + ins_status = CHK__CHECK_INST_STATUS__CIS_IMPLICATED; + pool_status = CHK__CHECK_POOL_STATUS__CPS_IMPLICATED; + } else if (ins->ci_stopping) { + ins_status = CHK__CHECK_INST_STATUS__CIS_STOPPED; + pool_status = CHK__CHECK_POOL_STATUS__CPS_STOPPED; + } else { + ins_status = CHK__CHECK_INST_STATUS__CIS_PAUSED; + pool_status = CHK__CHECK_POOL_STATUS__CPS_PAUSED; + } + } else { + ins_status = CHK__CHECK_INST_STATUS__CIS_FAILED; + pool_status = CHK__CHECK_POOL_STATUS__CPS_IMPLICATED; + } + + chk_leader_exit(ins, ins_phase, ins_status, pool_status, bcast); + + D_INFO(DF_LEADER" scheduler exit at phase %u with status %u: rc %d\n", + DP_LEADER(ins), cbk->cb_phase, ins_status, rc); + + ins->ci_sched_running = 0; +} + +static int +chk_leader_pools2list(struct chk_instance *ins, int *pool_nr, uuid_t **p_pools) +{ + struct chk_pool_rec *cpr; + uuid_t *pools; + uuid_t *tmp; + int cap = 4; + int idx = 0; + int rc = 0; + + D_ALLOC_ARRAY(pools, cap); + if (pools == NULL) + D_GOTO(out, rc = -DER_NOMEM); + + d_list_for_each_entry(cpr, &ins->ci_pool_list, cpr_link) { + if (cpr->cpr_skip || cpr->cpr_done || cpr->cpr_for_orphan) + continue; + + if (idx >= cap) { + D_REALLOC_ARRAY(tmp, pools, cap, cap << 1); + if (tmp == NULL) + D_GOTO(out, rc = -DER_NOMEM); + + pools = tmp; + cap <<= 1; + } + + uuid_copy(pools[idx++], cpr->cpr_uuid); + } + +out: + if (rc == 0) { + *p_pools = pools; + *pool_nr = idx; + } else { + D_FREE(pools); + } + + return rc; +} + +int +chk_leader_ranks_prepare(struct chk_instance *ins, uint32_t rank_nr, d_rank_t *ranks, + d_rank_list_t **p_ranks) +{ + struct chk_bookmark *cbk = &ins->ci_bk; + struct chk_property *prop = &ins->ci_prop; + d_rank_list_t *rank_list = NULL; + int rc = 0; + + rank_list = uint32_array_to_rank_list(ranks, rank_nr); + if (rank_list == NULL) + D_GOTO(out, rc = -DER_NOMEM); + + d_rank_list_sort(rank_list); + + /* Corrupted bookmark or new created one. Nothing can be reused. */ + if ((ins->ci_is_leader && cbk->cb_magic != CHK_BK_MAGIC_LEADER) || + (!ins->ci_is_leader && cbk->cb_magic != CHK_BK_MAGIC_ENGINE)) { + memset(prop, 0, sizeof(*prop)); + D_GOTO(out, rc = 1); + } + + /* Reload former ranks if necessary. */ + if (ins->ci_ranks == NULL) { + rc = chk_prop_fetch(prop, &ins->ci_ranks); + if (rc != 0 && rc != -DER_NONEXIST) + goto out; + } + + /* New system or add new rank(s), need global reset. */ + if (ins->ci_ranks == NULL) + D_GOTO(out, rc = 1); + + /* Change rank list must be handled as 'reset' globally. */ + if (rank_nr != ins->ci_ranks->rl_nr || + memcmp(ins->ci_ranks->rl_ranks, rank_list->rl_ranks, sizeof(d_rank_t) * rank_nr) != 0) { + D_WARN("Use new rank list, reset the check globally\n"); + D_GOTO(out, rc = 1); + } + +out: + if (rc > 0) + *p_ranks = rank_list; + else + d_rank_list_free(rank_list); + + return rc; +} + +static int +chk_leader_start_prep(struct chk_instance *ins, uint32_t rank_nr, d_rank_t *ranks, + uint32_t policy_nr, struct chk_policy *policies, int pool_nr, + uuid_t pools[], int phase, d_rank_t leader, uint32_t flags) +{ + struct chk_property *prop = &ins->ci_prop; + struct chk_bookmark *cbk = &ins->ci_bk; + struct chk_traverse_pools_args ctpa = { 0 }; + struct chk_rank_bundle rbund = { 0 }; + d_iov_t riov; + d_iov_t kiov; + d_rank_list_t *rank_list = NULL; + uint64_t gen; + uint32_t cbk_phase = CHK__CHECK_SCAN_PHASE__CSP_DONE; + int rc = 0; + int i; + + if (rank_nr == 0) { + D_ERROR("Rank list cannot be NULL for check start\n"); + D_GOTO(out, rc = -DER_INVAL); + } + + if (flags & CHK__CHECK_FLAG__CF_FAILOUT && flags & CHK__CHECK_FLAG__CF_NO_FAILOUT) { + D_ERROR("failout and non_failout cannot be specified together\n"); + D_GOTO(out, rc = -DER_INVAL); + } + + if (flags & CHK__CHECK_FLAG__CF_AUTO && flags & CHK__CHECK_FLAG__CF_NO_AUTO) { + D_ERROR("auto and non_auto cannot be specified together\n"); + D_GOTO(out, rc = -DER_INVAL); + } + + /* New generation for each instance. */ + gen = d_hlc_get(); + + rc = chk_leader_ranks_prepare(ins, rank_nr, ranks, &rank_list); + if (rc > 0) + goto reset; + if (rc < 0) + goto out; + + if (prop->cp_flags & CHK__CHECK_FLAG__CF_DRYRUN) + ins->ci_start_flags |= CSF_RESET_NONCOMP; + + /* + * If "CHK__CHECK_FLAG__CF_RESET" is specified, then restart check from the + * scratch for the given pools (pool_nr > 0) or for al pools (pool_nr == 0). + */ + + if (pool_nr > 0) { + rc = chk_pools_load_list(ins, gen, flags, pool_nr, pools, &cbk_phase); + if (rc != 0) + goto out; + } else { + if (flags & CHK__CHECK_FLAG__CF_RESET) + goto reset; + + ctpa.ctpa_ins = ins; + ctpa.ctpa_gen = gen; + ctpa.ctpa_phase = cbk_phase; + rc = chk_traverse_pools(chk_pools_load_from_db, &ctpa); + if (rc != 0) + goto out; + + cbk_phase = ctpa.ctpa_phase; + } + + if (d_list_empty(&ins->ci_pool_list) && !(flags & CHK__CHECK_FLAG__CF_ORPHAN_POOL)) + D_GOTO(out, rc = 1); + + goto init; + +reset: + ins->ci_start_flags = CSF_RESET_ALL; + if (pool_nr <= 0) + ins->ci_start_flags |= CSF_ORPHAN_POOL; + + rc = chk_traverse_pools(chk_pools_cleanup_cb, NULL); + if (rc != 0) + goto out; + + memset(cbk, 0, sizeof(*cbk)); + cbk->cb_magic = CHK_BK_MAGIC_LEADER; + cbk->cb_version = DAOS_CHK_VERSION; + +init: + rc = chk_prop_prepare(leader, flags, phase, policy_nr, policies, rank_list, prop); + if (rc != 0) + goto out; + + if (rank_list != NULL) { + d_rank_list_free(ins->ci_ranks); + ins->ci_ranks = rank_list; + rank_list = NULL; + } + + cbk->cb_gen = gen; + if (flags & CHK__CHECK_FLAG__CF_RESET && !(ins->ci_start_flags & CSF_RESET_ALL)) { + memset(&cbk->cb_statistics, 0, sizeof(cbk->cb_statistics)); + memset(&cbk->cb_time, 0, sizeof(cbk->cb_time)); + } + + ins->ci_slowest_fail_phase = CHK_INVAL_PHASE; + if (flags & CHK__CHECK_FLAG__CF_ORPHAN_POOL) + ins->ci_start_flags |= CSF_ORPHAN_POOL; + + /* The leader bookmark will be stored via chk_leader_start_post() later. */ + if (cbk->cb_phase > cbk_phase) + cbk->cb_phase = cbk_phase; + + /* Prepare ranks tree. */ + for (i = 0; i < ins->ci_ranks->rl_nr; i++) { + rbund.crb_rank = ins->ci_ranks->rl_ranks[i]; + /* + * The phase for the rank may be not accurate, that is not important as long as it + * is not 'DONE'. If it is DONE, it will be refreshed via chk_leader_start_post(). + */ + rbund.crb_phase = cbk->cb_phase; + rbund.crb_ins = ins; + + d_iov_set(&riov, &rbund, sizeof(rbund)); + d_iov_set(&kiov, &ins->ci_ranks->rl_ranks[i], sizeof(d_rank_t)); + rc = dbtree_upsert(ins->ci_rank_hdl, BTR_PROBE_EQ, DAOS_INTENT_UPDATE, + &kiov, &riov, NULL); + if (rc != 0) + break; + } + +out: + d_rank_list_free(rank_list); + if (rc < 0) { + /* Reset ci_ranks if hit failure, then we can reload when use it next time. */ + d_rank_list_free(ins->ci_ranks); + ins->ci_ranks = NULL; + } + + return rc; +} + +static int +chk_leader_start_post(struct chk_instance *ins) +{ + struct chk_pool_rec *cpr; + struct chk_pool_rec *tmp; + struct chk_pool_shard *cps; + struct ds_pool_clue *clue; + struct chk_rank_rec *crr; + struct chk_iv iv = { 0 }; + struct chk_bookmark *ins_cbk = &ins->ci_bk; + struct chk_bookmark *pool_cbk; + char uuid_str[DAOS_UUID_STR_SIZE]; + uint32_t ins_phase = CHK__CHECK_SCAN_PHASE__CSP_DONE; + uint32_t pool_phase = CHK__CHECK_SCAN_PHASE__CSP_DONE; + int rc = 0; + + d_list_for_each_entry_safe(cpr, tmp, &ins->ci_pool_list, cpr_link) { + pool_cbk = &cpr->cpr_bk; + + if (pool_cbk->cb_magic != CHK_BK_MAGIC_POOL) { + memset(pool_cbk, 0, sizeof(*pool_cbk)); + pool_cbk->cb_magic = CHK_BK_MAGIC_POOL; + pool_cbk->cb_gen = ins_cbk->cb_gen; + pool_cbk->cb_version = DAOS_CHK_VERSION; + } + + /* + * No engine report shard for the pool, it is dangling pool, + * keep it for subsequent dangling pool logic to handle it. + */ + if (d_list_empty(&cpr->cpr_shard_list)) + continue; + + d_list_for_each_entry(cps, &cpr->cpr_shard_list, cps_link) { + clue = cps->cps_data; + /* + * The pool is only used to handle orphan pool, mark as 'for_orphan', + * then only orphan pool logic will handle it, others will skip it. + */ + if (clue->pc_phase == CHK_INVAL_PHASE) { + cpr->cpr_for_orphan = 1; + goto next; + } + + if (pool_phase > clue->pc_phase) + pool_phase = clue->pc_phase; + } + + if (pool_cbk->cb_phase <= CHK__CHECK_SCAN_PHASE__CSP_POOL_MBS || + pool_phase <= CHK__CHECK_SCAN_PHASE__CSP_POOL_MBS) + pool_phase = pool_cbk->cb_phase; + else + pool_cbk->cb_phase = pool_phase; + + if (likely(pool_phase != CHK__CHECK_SCAN_PHASE__CSP_DONE)) { + pool_cbk->cb_pool_status = CHK__CHECK_POOL_STATUS__CPS_CHECKING; + /* Always refresh the start time. */ + pool_cbk->cb_time.ct_start_time = time(NULL); + /* QUEST: How to estimate the left time? */ + pool_cbk->cb_time.ct_left_time = CHK__CHECK_SCAN_PHASE__CSP_DONE - + pool_cbk->cb_phase; + } else { + pool_cbk->cb_pool_status = CHK__CHECK_POOL_STATUS__CPS_CHECKED; + pool_cbk->cb_time.ct_stop_time = time(NULL); + cpr->cpr_done = 1; + } + + uuid_unparse_lower(cpr->cpr_uuid, uuid_str); + rc = chk_bk_update_pool(pool_cbk, uuid_str); + if (rc != 0) + break; + + /* + * NOTE: The pool has been checked, notify the engines to drop their cpr. + * But keek the leader's cpr for handling dangling pool. + */ + if (unlikely(pool_phase == CHK__CHECK_SCAN_PHASE__CSP_DONE)) { + iv.ci_gen = ins_cbk->cb_gen; + uuid_copy(iv.ci_uuid, cpr->cpr_uuid); + iv.ci_phase = CHK__CHECK_SCAN_PHASE__CSP_DONE; + iv.ci_pool_status = CHK__CHECK_POOL_STATUS__CPS_CHECKED; + + /* + * Synchronously notify the engines that check on the pool is done. + * The check leader is the last one to know that. So even if failed + * to notify the engine for the check done, that is not fatal. That + * can be redo in next check instance. + */ + rc = chk_iv_update(ins->ci_iv_ns, &iv, CRT_IV_SHORTCUT_NONE, + CRT_IV_SYNC_EAGER, true); + D_CDEBUG(rc != 0, DLOG_ERR, DLOG_INFO, + DF_LEADER" notify engines the pool "DF_UUIDF" is checked: %d\n", + DP_LEADER(ins), DP_UUID(cpr->cpr_uuid), rc); + } else if (ins_phase > pool_phase) { + ins_phase = pool_phase; + } + +next: + pool_phase = CHK__CHECK_SCAN_PHASE__CSP_DONE; + } + + if (rc == 0) { + /* + * The phase in leader bookmark may be larger than the phase in + * some pools that may be new added into current check instance. + * So we allow the phase to backward. + */ + if (ins_cbk->cb_phase != ins_phase) { + ins_cbk->cb_phase = ins_phase; + d_list_for_each_entry(crr, &ins->ci_rank_list, crr_link) + crr->crr_phase = ins_phase; + } + + if (likely(ins_phase != CHK__CHECK_SCAN_PHASE__CSP_DONE) || + ins->ci_start_flags & CSF_ORPHAN_POOL) { + if (ins_phase == CHK__CHECK_SCAN_PHASE__CSP_DONE) + ins->ci_for_orphan = 1; + ins_cbk->cb_ins_status = CHK__CHECK_INST_STATUS__CIS_RUNNING; + /* Always refresh the start time. */ + ins_cbk->cb_time.ct_start_time = time(NULL); + /* QUEST: How to estimate the left time? */ + ins_cbk->cb_time.ct_left_time = CHK__CHECK_SCAN_PHASE__CSP_DONE - + ins_cbk->cb_phase; + } else { + ins_cbk->cb_ins_status = CHK__CHECK_INST_STATUS__CIS_COMPLETED; + ins_cbk->cb_time.ct_stop_time = time(NULL); + } + + rc = chk_bk_update_leader(ins_cbk); + /* + * NOTE: If the check list is not empty or the user specifies 'orphan' option, + * then still need to further check for dangling/orphan pools. Otherwise + * we can return 1 to directly exit the check. + */ + if (rc == 0 && ins_phase == CHK__CHECK_SCAN_PHASE__CSP_DONE) { + if (ins->ci_for_orphan) + /* + * Mark instance bookmark as 'CSP_PREPARED' in DRAM to avoid + * confused 'CSP_DONE' but with check RUNNING for check query. + */ + ins_cbk->cb_phase = CHK__CHECK_SCAN_PHASE__CSP_PREPARE; + else if (d_list_empty(&ins->ci_pool_list)) + rc = 1; + } + } + + return rc; +} + +static int +chk_leader_dup_clue(struct ds_pool_clue **tgt, struct ds_pool_clue *src) +{ + struct ds_pool_clue *clue = NULL; + struct ds_pool_svc_clue *svc = NULL; + uint32_t *status = NULL; + char *label = NULL; + int rc = 0; + + D_ALLOC_PTR(clue); + if (clue == NULL) + D_GOTO(out, rc = -DER_NOMEM); + + if (src->pc_svc_clue != NULL) { + D_ALLOC_PTR(svc); + if (svc == NULL) + D_GOTO(out, rc = -DER_NOMEM); + + memcpy(svc, src->pc_svc_clue, sizeof(*svc)); + if (src->pc_svc_clue->psc_db_clue.bcl_replicas != NULL) { + rc = d_rank_list_dup(&svc->psc_db_clue.bcl_replicas, + src->pc_svc_clue->psc_db_clue.bcl_replicas); + if (rc != 0) { + svc->psc_db_clue.bcl_replicas = NULL; + goto out; + } + } + } + + rc = chk_dup_string(&label, src->pc_label, src->pc_label_len); + if (rc != 0) + goto out; + + if (src->pc_tgt_status != NULL) { + D_ALLOC_ARRAY(status, src->pc_tgt_nr); + if (status == NULL) + D_GOTO(out, rc = -DER_NOMEM); + + memcpy(status, src->pc_tgt_status, sizeof(*status) * src->pc_tgt_nr); + } + + memcpy(clue, src, sizeof(*clue)); + clue->pc_svc_clue = svc; + clue->pc_label = label; + clue->pc_tgt_status = status; + +out: + if (rc != 0) { + if (svc != NULL) { + d_rank_list_free(svc->psc_db_clue.bcl_replicas); + D_FREE(svc); + } + + D_FREE(status); + D_FREE(label); + D_FREE(clue); + } else { + *tgt = clue; + } + + return rc; +} + +static void +chk_leader_free_clue(void *data) +{ + struct ds_pool_clue *clue = data; + + if (clue != NULL) { + ds_pool_clue_fini(clue); + D_FREE(clue); + } +} + +static int +chk_leader_start_cb(struct chk_co_rpc_cb_args *cb_args) +{ + struct chk_instance *ins = cb_args->cb_priv; + struct ds_pool_clue *clues = cb_args->cb_data; + struct ds_pool_clue *clue; + int rc = 0; + int i; + + D_ASSERTF(cb_args->cb_result >= 0, "Unexpected result for start CB %d\n", + cb_args->cb_result); + + /* The engine has completed the check, remove it from the rank list. */ + if (cb_args->cb_result > 0) { + rc = chk_rank_del(ins, cb_args->cb_rank); + goto out; + } + + for (i = 0; i < cb_args->cb_nr; i++) { + /* + * @clues is from chk_start_remote RPC reply, the buffer will be released after + * the RPC done. Let's copy all related data to new the buffer for further using. + */ + rc = chk_leader_dup_clue(&clue, &clues[i]); + if (rc != 0) + goto out; + + rc = chk_pool_add_shard(ins->ci_pool_hdl, &ins->ci_pool_list, clue->pc_uuid, + clue->pc_rank, NULL, ins, NULL, clue, + chk_leader_free_clue, NULL); + if (rc != 0) { + chk_leader_free_clue(clue); + goto out; + } + } + +out: + if (rc != 0) + D_ERROR(DF_LEADER" failed to handle start CB with rank %u, result %d: " + DF_RC"\n", DP_LEADER(ins), cb_args->cb_rank, cb_args->cb_result, DP_RC(rc)); + + return rc; +} + +int +chk_leader_start(uint32_t rank_nr, d_rank_t *ranks, uint32_t policy_nr, struct chk_policy *policies, + int pool_nr, uuid_t pools[], uint32_t api_flags, int phase) +{ + struct chk_instance *ins = chk_leader; + struct chk_bookmark *cbk = &ins->ci_bk; + uuid_t *c_pools = NULL; + struct umem_attr uma = { 0 }; + uuid_t dummy_pool = { 0 }; + char uuid_str[DAOS_UUID_STR_SIZE]; + d_rank_t myrank = dss_self_rank(); + uint32_t flags = api_flags; + int c_pool_nr = 0; + int rc; + int rc1; + + rc = chk_ins_can_start(ins); + if (rc != 0) + goto out_log; + + ins->ci_starting = 1; + ins->ci_started = 0; + ins->ci_start_flags = 0; + ins->ci_for_orphan = 0; + ins->ci_orphan_done = 0; + ins->ci_implicated = 0; + ins->ci_pool_stopped = 0; + + D_ASSERT(daos_handle_is_inval(ins->ci_rank_hdl)); + D_ASSERT(d_list_empty(&ins->ci_rank_list)); + + D_ASSERT(daos_handle_is_inval(ins->ci_pool_hdl)); + D_ASSERT(d_list_empty(&ins->ci_pool_list)); + + D_ASSERT(daos_handle_is_inval(ins->ci_pending_hdl)); + + if (ins->ci_sched != ABT_THREAD_NULL) + ABT_thread_free(&ins->ci_sched); + + chk_iv_ns_cleanup(&ins->ci_iv_ns); + + if (ins->ci_iv_group != NULL) { + crt_group_secondary_destroy(ins->ci_iv_group); + ins->ci_iv_group = NULL; + } + + uma.uma_id = UMEM_CLASS_VMEM; + + rc = dbtree_create_inplace(DBTREE_CLASS_CHK_RANK, 0, CHK_BTREE_ORDER, &uma, + &ins->ci_rank_btr, &ins->ci_rank_hdl); + if (rc != 0) + goto out_tree; + + rc = dbtree_create_inplace(DBTREE_CLASS_CHK_POOL, 0, CHK_BTREE_ORDER, &uma, + &ins->ci_pool_btr, &ins->ci_pool_hdl); + if (rc != 0) + goto out_tree; + + rc = dbtree_create_inplace(DBTREE_CLASS_CHK_PA, 0, CHK_BTREE_ORDER, &uma, + &ins->ci_pending_btr, &ins->ci_pending_hdl); + if (rc != 0) + goto out_tree; + +reset: + rc = chk_leader_start_prep(ins, rank_nr, ranks, policy_nr, policies, pool_nr, pools, + phase, myrank, flags); + if (rc == 1 && !(flags & CHK__CHECK_FLAG__CF_RESET)) { + /* Former check instance has done, let's re-start from the beginning. */ + flags |= CHK__CHECK_FLAG__CF_RESET; + goto reset; + } + + if (rc != 0) + goto out_tree; + + if (ins->ci_iv_group != NULL) + goto remote; + + uuid_generate(dummy_pool); + uuid_unparse_lower(dummy_pool, uuid_str); + rc = crt_group_secondary_create(uuid_str, NULL, ins->ci_ranks, &ins->ci_iv_group); + if (rc != 0) + goto out_tree; + + rc = ds_iv_ns_create(dss_get_module_info()->dmi_ctx, dummy_pool, ins->ci_iv_group, + &ins->ci_iv_id, &ins->ci_iv_ns); + if (rc != 0) + goto out_group; + + ds_iv_ns_update(ins->ci_iv_ns, myrank, ins->ci_iv_ns->iv_master_term + 1); + + if (d_list_empty(&ins->ci_pool_list)) { + c_pool_nr = pool_nr; + c_pools = pools; + } else { + rc = chk_leader_pools2list(ins, &c_pool_nr, &c_pools); + if (rc != 0) + goto out_iv; + } + +remote: + rc = chk_start_remote(ins->ci_ranks, cbk->cb_gen, rank_nr, ranks, policy_nr, policies, + c_pool_nr, c_pools, flags, phase, myrank, ins->ci_start_flags, + dummy_pool, chk_leader_start_cb, ins); + if (rc != 0) { + if (rc == -DER_OOG || rc == -DER_GRPVER || rc == -DER_AGAIN) { + D_INFO(DF_LEADER" Someone is not ready %d, let's retry start after 1 sec\n", + DP_LEADER(ins), rc); + if (!d_list_empty(&ins->ci_pool_list)) + chk_pool_shard_cleanup(ins); + dss_sleep(1000); + goto remote; + } + + goto out_iv; + } + + uuid_copy(cbk->cb_iv_uuid, dummy_pool); + rc = chk_leader_start_post(ins); + if (rc == 1 && !(flags & CHK__CHECK_FLAG__CF_RESET)) { + rc = chk_stop_remote(ins->ci_ranks, cbk->cb_gen, c_pool_nr, c_pools, NULL, NULL); + if (rc < 0) { + D_WARN(DF_LEADER" failed to rollback former start before reset: "DF_RC"\n", + DP_LEADER(ins), DP_RC(rc)); + goto out_iv; + } + + /* Former check instance has done, let's re-start from the beginning. */ + flags |= CHK__CHECK_FLAG__CF_RESET; + goto reset; + } + + if (rc != 0) + goto out_stop_remote; + + ins->ci_sched_running = 1; + + rc = dss_ult_create(chk_leader_sched, ins, DSS_XS_SYS, 0, DSS_DEEP_STACK_SZ, + &ins->ci_sched); + if (rc != 0) { + ins->ci_sched_running = 0; + goto out_stop_pools; + } + + D_INFO("Leader %s check with api_flags %x, phase %d, leader %u, flags %x, gen " DF_X64 + " iv "DF_UUIDF": rc %d\n", + chk_is_ins_reset(ins, flags) ? "start" : "resume", api_flags, phase, myrank, + ins->ci_start_flags, cbk->cb_gen, DP_UUID(dummy_pool), rc); + + chk_ranks_dump(ins->ci_ranks->rl_nr, ins->ci_ranks->rl_ranks); + chk_pools_dump(&ins->ci_pool_list, c_pool_nr > 0 ? c_pool_nr : pool_nr, + c_pool_nr > 0 ? c_pools : pools); + + ABT_mutex_lock(ins->ci_abt_mutex); + ins->ci_started = 1; + ABT_cond_broadcast(ins->ci_abt_cond); + ABT_mutex_unlock(ins->ci_abt_mutex); + + ins->ci_starting = 0; + + goto out_exit; + +out_stop_pools: + chk_pool_stop_all(ins, CHK__CHECK_POOL_STATUS__CPS_IMPLICATED, NULL); + if (cbk->cb_ins_status == CHK__CHECK_INST_STATUS__CIS_RUNNING) { + cbk->cb_time.ct_stop_time = time(NULL); + cbk->cb_ins_status = CHK__CHECK_INST_STATUS__CIS_FAILED; + rc1 = chk_bk_update_leader(cbk); + if (rc1 != 0) + D_WARN(DF_LEADER" failed to update leader bookmark: "DF_RC"\n", + DP_LEADER(ins), DP_RC(rc1)); + } +out_stop_remote: + rc1 = chk_stop_remote(ins->ci_ranks, cbk->cb_gen, c_pool_nr, c_pools, NULL, NULL); + if (rc1 < 0) + D_WARN(DF_LEADER" failed to rollback failed check start: "DF_RC"\n", + DP_LEADER(ins), DP_RC(rc1)); +out_iv: + chk_iv_ns_cleanup(&ins->ci_iv_ns); +out_group: + crt_group_secondary_destroy(ins->ci_iv_group); + ins->ci_iv_group = NULL; +out_tree: + chk_leader_destroy_trees(ins); + ins->ci_starting = 0; +out_log: + D_CDEBUG(likely(rc < 0), DLOG_ERR, DLOG_INFO, + "Leader %s to start check on %u ranks for %d pools with " + "api_flags %x, phase %d, leader %u, gen "DF_X64": rc = %d\n", + rc < 0 ? "failed" : "try", rank_nr, pool_nr, api_flags, phase, + myrank, cbk->cb_gen, rc); + + if (unlikely(rc > 0)) + rc = 0; +out_exit: + /* Notify the control plane that the check (re-)starts from the scratch. */ + if (rc == 0 && chk_is_ins_reset(ins, flags)) + rc = 1; + + if (c_pools != NULL && c_pools != pools) + D_FREE(c_pools); + + return rc; +} + +static int +chk_leader_stop_cb(struct chk_co_rpc_cb_args *cb_args) +{ + struct chk_instance *ins = cb_args->cb_priv; + int rc; + + D_ASSERTF(cb_args->cb_result > 0, "Unexpected result for stop CB %d\n", cb_args->cb_result); + + if (cb_args->cb_flags & CSF_POOL_STOPPED) + ins->ci_pool_stopped = 1; + + /* The engine has stop on the rank, remove it from the rank list. */ + rc = chk_rank_del(ins, cb_args->cb_rank); + if (rc != 0) + D_ERROR(DF_LEADER" failed to handle stop CB with rank %u: "DF_RC"\n", + DP_LEADER(ins), cb_args->cb_rank, DP_RC(rc)); + + return rc; +} + +int +chk_leader_stop(int pool_nr, uuid_t pools[]) +{ + struct chk_instance *ins = chk_leader; + struct chk_bookmark *cbk = &ins->ci_bk; + int rc = 0; + int i; + + if (ins->ci_starting) + D_GOTO(log, rc = -DER_BUSY); + + if (ins->ci_stopping || ins->ci_sched_exiting) + D_GOTO(log, rc = -DER_INPROGRESS); + + /* + * NOTE: It is possible that the check leader is dead. If we want to stop the stale + * check instance on other engine, then we may execute the CHK_STOP on new + * check leader. But if the old leader is still active, and if the CHK_STOP + * dRPC is sent to non-leader (or new leader), then it will cause trouble. + * + * Here, it is not easy to know whether the old leader is still valid or not. + * We have to trust control plane. It is the control plane duty to guarantee + * that the CHK_STOP dRPC is sent to the right one. + */ + + ins->ci_stopping = 1; + + /* + * The check instance on current engine may have failed or stopped, but we do not know + * whether there is active check instance on other engines or not, send stop RPC anyway. + */ + + if (ins->ci_ranks == NULL) { + rc = chk_prop_fetch(&ins->ci_prop, &ins->ci_ranks); + /* + * We do not know the rank list, the sponsor needs to choose another leader. + * It may be that the DAOS check has never run on this engine. + */ + if (rc == -DER_NONEXIST) + D_GOTO(out, rc = -DER_NOTLEADER); + + if (rc != 0) + goto out; + + if (unlikely(ins->ci_ranks == NULL)) + D_GOTO(out, rc = -DER_NOTLEADER); + } + + /* Use 0 as @gen parameter to all current or former instance by force. */ + rc = chk_stop_remote(ins->ci_ranks, 0, pool_nr, pools, chk_leader_stop_cb, ins); + if (rc != 0) + goto out; + + if (pool_nr == 0) { + chk_pool_stop_all(ins, CHK__CHECK_POOL_STATUS__CPS_STOPPED, &rc); + if (rc != 0) + D_GOTO(out, rc); + } else { + for (i = 0; i < pool_nr; i++) { + chk_pool_stop_one(ins, pools[i], CHK__CHECK_POOL_STATUS__CPS_STOPPED, + CHK_INVAL_PHASE, &rc); + if (rc != 0) + D_GOTO(out, rc); + } + } + + if (d_list_empty(&ins->ci_rank_list)) + chk_stop_sched(ins); + +out: + ins->ci_pool_stopped = 0; + ins->ci_stopping = 0; +log: + if (rc >= 0) { + D_INFO("Leader stopped check with gen "DF_X64" for %d pools: rc %d\n", + cbk->cb_gen, pool_nr, rc); + + chk_pools_dump(NULL, pool_nr, pools); + } else { + D_ERROR("Leader failed to stop check with gen "DF_X64" for %d pools: "DF_RC"\n", + cbk->cb_gen, pool_nr, DP_RC(rc)); + } + + return rc; +} + +static int +chk_leader_dup_shard(struct chk_query_pool_shard **tgt, struct chk_query_pool_shard *src) +{ + struct chk_query_pool_shard *shard = NULL; + struct chk_query_target *target = NULL; + int rc = 0; + + D_ALLOC_PTR(shard); + if (shard == NULL) + D_GOTO(out, rc = -DER_NOMEM); + + if (src->cqps_targets != NULL) { + D_ALLOC_ARRAY(target, src->cqps_target_nr); + if (target == NULL) + D_GOTO(out, rc = -DER_NOMEM); + + memcpy(target, src->cqps_targets, sizeof(*target) * src->cqps_target_nr); + } + + memcpy(shard, src, sizeof(*shard)); + shard->cqps_targets = target; + +out: + if (rc != 0) + D_FREE(shard); + else + *tgt = shard; + + return rc; +} + +static void +chk_leader_free_shard(void *data) +{ + struct chk_query_pool_shard *shard = data; + + D_FREE(shard->cqps_targets); + D_FREE(shard); +} + +static int +chk_leader_query_cb(struct chk_co_rpc_cb_args *cb_args) +{ + struct chk_query_args *cqa = cb_args->cb_priv; + struct chk_query_pool_shard *shards = cb_args->cb_data; + struct chk_query_pool_shard *shard; + int rc = 0; + int i; + + if (cb_args->cb_result != 0) + goto out; + + cqa->cqa_ins_status = cb_args->cb_ins_status; + cqa->cqa_ins_phase = cb_args->cb_ins_phase; + cqa->cqa_gen = cb_args->cb_gen; + + for (i = 0; i < cb_args->cb_nr; i++) { + /* + * @shards is from chk_query_remote RPC reply, the buffer will be released after + * the RPC done. Let's copy all related data to new the buffer for further using. + */ + rc = chk_leader_dup_shard(&shard, &shards[i]); + if (rc != 0) + goto out; + + rc = chk_pool_add_shard(cqa->cqa_hdl, &cqa->cqa_list, shard->cqps_uuid, + shard->cqps_rank, NULL, cqa->cqa_ins, &cqa->cqa_count, + shard, chk_leader_free_shard, NULL); + if (rc != 0) { + chk_leader_free_shard(shard); + goto out; + } + } + +out: + if (rc != 0) + D_ERROR(DF_LEADER" failed to handle query CB with result %d: " + DF_RC"\n", DP_LEADER(cqa->cqa_ins), cb_args->cb_result, DP_RC(rc)); + + return rc; +} + +static struct chk_query_args * +chk_cqa_alloc(struct chk_instance *ins) +{ + struct umem_attr uma = { 0 }; + struct chk_query_args *cqa; + int rc; + + D_ALLOC_PTR(cqa); + if (cqa == NULL) + goto out; + + D_INIT_LIST_HEAD(&cqa->cqa_list); + cqa->cqa_ins = ins; + + uma.uma_id = UMEM_CLASS_VMEM; + rc = dbtree_create_inplace(DBTREE_CLASS_CHK_POOL, 0, CHK_BTREE_ORDER, &uma, + &cqa->cqa_btr, &cqa->cqa_hdl); + if (rc != 0) + D_FREE(cqa); + +out: + return cqa; +} + +static void +chk_cqa_free(struct chk_query_args *cqa) +{ + if (cqa != NULL) { + dbtree_destroy(cqa->cqa_hdl, NULL); + D_FREE(cqa); + } +} + +int +chk_leader_query(int pool_nr, uuid_t pools[], chk_query_head_cb_t head_cb, + chk_query_pool_cb_t pool_cb, void *buf) +{ + struct chk_instance *ins = chk_leader; + struct chk_bookmark *cbk = &ins->ci_bk; + struct chk_query_args *cqa = NULL; + struct chk_pool_rec *cpr; + struct chk_pool_rec *tmp; + struct chk_pool_shard *cps; + struct chk_query_pool_shard *shard; + d_iov_t kiov; + d_iov_t riov; + uint64_t gen = cbk->cb_gen; + uint32_t status; + uint32_t phase; + uint32_t idx = 0; + int rc; + int i; + bool skip; + + /* + * NOTE: Similar as stop case, we need the ability to query check information from + * new leader if the old one dead. But the information from new leader may be + * not very accurate. It is the control plane duty to send the CHK_QUERY dRPC + * to the right one. + */ + + if (ins->ci_ranks == NULL) { + rc = chk_prop_fetch(&ins->ci_prop, &ins->ci_ranks); + /* We do not know the rank list, the sponsor needs to choose another leader. */ + if (rc == -DER_NONEXIST) + D_GOTO(out, rc = -DER_NOTLEADER); + + if (rc != 0) + goto out; + + if (unlikely(ins->ci_ranks == NULL)) + D_GOTO(out, rc = -DER_NOTLEADER); + } + + cqa = chk_cqa_alloc(ins); + if (cqa == NULL) + D_GOTO(out, rc = -DER_NOMEM); + +again: + rc = chk_query_remote(ins->ci_ranks, gen, pool_nr, pools, chk_leader_query_cb, cqa); + if (rc != 0) { + if (rc == -DER_OOG || rc == -DER_GRPVER || rc == -DER_AGAIN) { + D_INFO(DF_LEADER" Someone is not ready %d, let's retry query after 1 sec\n", + DP_LEADER(ins), rc); + if (!d_list_empty(&cqa->cqa_list)) { + chk_cqa_free(cqa); + cqa = chk_cqa_alloc(ins); + if (cqa == NULL) + D_GOTO(out, rc = -DER_NOMEM); + } + dss_sleep(1000); + goto again; + } + + goto out; + } + + d_list_for_each_entry(cpr, &ins->ci_pool_list, cpr_link) { + /* + * For non-dangling pool, the check engine will return related pool shards + * information, and then merge with the check leader owned information via + * the subsequent chk_pool_merge_status(). + */ + if (!cpr->cpr_dangling) + continue; + + skip = false; + if (pool_nr != 0) { + skip = true; + for (i = 0; i < pool_nr && skip; i++) { + if (uuid_compare(cpr->cpr_uuid, pools[i]) == 0) + skip = false; + } + } + + if (!skip) { + D_ALLOC_PTR(shard); + if (shard == NULL) + D_GOTO(out, rc = -DER_NOMEM); + + uuid_copy(shard->cqps_uuid, cpr->cpr_uuid); + shard->cqps_status = cpr->cpr_bk.cb_pool_status; + shard->cqps_phase = cpr->cpr_bk.cb_phase; + shard->cqps_rank = CHK_LEADER_RANK; + + rc = chk_pool_add_shard(cqa->cqa_hdl, &cqa->cqa_list, cpr->cpr_uuid, + CHK_LEADER_RANK, NULL, ins, &cqa->cqa_count, + shard, chk_leader_free_shard, NULL); + if (rc != 0) + goto out; + } + } + + status = cbk->cb_ins_status; + phase = cbk->cb_phase; + chk_ins_merge_info(&status, cqa->cqa_ins_status, &phase, cqa->cqa_ins_phase, &gen, + cqa->cqa_gen); + rc = head_cb(status, phase, &cbk->cb_statistics, &cbk->cb_time, cqa->cqa_count, buf); + if (rc != 0) + goto out; + + d_list_for_each_entry(cpr, &cqa->cqa_list, cpr_link) { + d_iov_set(&riov, NULL, 0); + d_iov_set(&kiov, cpr->cpr_uuid, sizeof(uuid_t)); + rc = dbtree_lookup(ins->ci_pool_hdl, &kiov, &riov); + if (likely(rc == 0)) + tmp = (struct chk_pool_rec *)riov.iov_buf; + else + tmp = NULL; + + d_list_for_each_entry(cps, &cpr->cpr_shard_list, cps_link) { + shard = cps->cps_data; + + /* + * NOTE: The pool status on different engines may be different. For example: + * the PS leader may be in PENDING because of interaction, but others + * are still in running status. We summarize the status for the query + * result to avoid confusing. It is just temporary solution, and will + * be moved to control plane in the future - DAOS-13989. + */ + if (cps->cps_rank != CHK_LEADER_RANK && tmp != NULL) { + shard->cqps_status = chk_pool_merge_status(shard->cqps_status, + tmp->cpr_bk.cb_pool_status); + if (shard->cqps_phase < tmp->cpr_bk.cb_phase) + shard->cqps_phase = tmp->cpr_bk.cb_phase; + } + + rc = pool_cb(shard, idx++, buf); + if (rc != 0) + goto out; + + D_ASSERT(cqa->cqa_count >= idx); + } + } +out: + chk_cqa_free(cqa); + D_CDEBUG(rc != 0, DLOG_ERR, DLOG_INFO, + "Leader query check with gen "DF_X64" for %d pools: "DF_RC"\n", + gen, pool_nr, DP_RC(rc)); + + return rc; +} + +int +chk_leader_prop(chk_prop_cb_t prop_cb, void *buf) +{ + struct chk_property *prop = &chk_leader->ci_prop; + + return prop_cb(buf, (struct chk_policy *)prop->cp_policies, + CHK_POLICY_MAX - 1, prop->cp_flags); +} + +static int +chk_leader_act_internal(struct chk_instance *ins, uint64_t seq, uint32_t act, bool for_all, + bool locked, uint32_t *cla) +{ + struct chk_pending_rec *pending = NULL; + struct chk_pool_rec *pool = NULL; + d_iov_t kiov; + d_iov_t riov; + int rc; + + rc = chk_pending_del(ins, seq, locked, &pending); + if (rc != 0) + goto out; + + D_ASSERT(pending->cpr_busy); + + if (pending->cpr_on_leader) { + ABT_mutex_lock(pending->cpr_mutex); + /* + * It is the control plane's duty to guarantee that the decision is a valid + * action from the report options. Otherwise, related inconsistency will be ignored. + */ + pending->cpr_action = act; + ABT_cond_broadcast(pending->cpr_cond); + ABT_mutex_unlock(pending->cpr_mutex); + + if (cla != NULL) + *cla = pending->cpr_class; + } else { + d_iov_set(&riov, NULL, 0); + d_iov_set(&kiov, pending->cpr_uuid, sizeof(uuid_t)); + rc = dbtree_lookup(ins->ci_pool_hdl, &kiov, &riov); + if (rc == 0) { + pool = (struct chk_pool_rec *)riov.iov_buf; + if (pool->cpr_bk.cb_pool_status == CHK__CHECK_POOL_STATUS__CPS_PENDING) + pool->cpr_bk.cb_pool_status = CHK__CHECK_POOL_STATUS__CPS_CHECKING; + } else { + rc = 0; + } + + /* For locked case, check engines have already processed related interaction. */ + if (!locked) + rc = chk_act_remote(ins->ci_ranks, ins->ci_bk.cb_gen, seq, + pending->cpr_class, act, pending->cpr_rank, for_all); + + chk_pending_destroy(pending); + } + +out: + D_CDEBUG(rc != 0, DLOG_ERR, DLOG_INFO, + DF_LEADER" takes action for report with seq "DF_X64", action %u: "DF_RC"\n", + DP_LEADER(ins), seq, act, DP_RC(rc)); + + return rc; +} + +int +chk_leader_act(uint64_t seq, uint32_t act, bool for_all) +{ + struct chk_instance *ins = chk_leader; + struct chk_bookmark *cbk = &ins->ci_bk; + struct chk_property *prop = &ins->ci_prop; + struct chk_pool_rec *pool = NULL; + struct chk_pool_rec *pool_tmp = NULL; + struct chk_pending_rec *cpr = NULL; + struct chk_pending_rec *cpr_tmp = NULL; + uint32_t cla = 0; + int rc; + + if (cbk->cb_magic != CHK_BK_MAGIC_LEADER) + D_GOTO(out, rc = -DER_NOTLEADER); + + /* Tell control plane that no check instance is running via "-DER_NOTAPPLICABLE". */ + if (cbk->cb_ins_status != CHK__CHECK_INST_STATUS__CIS_RUNNING) + D_GOTO(out, rc = -DER_NOTAPPLICABLE); + + /* The admin may input the wrong option, not acceptable. */ + if (unlikely(act == CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT)) { + D_ERROR("%u is not acceptable for interaction decision.\n", act); + D_GOTO(out, rc = -DER_INVAL); + } + + rc = chk_leader_act_internal(ins, seq, act, for_all, false, &cla); + if (rc != 0 || !for_all) + goto out; + + if (likely(prop->cp_policies[cla] != act)) { + prop->cp_policies[cla] = act; + chk_prop_update(prop, NULL); + } + + /* + * Hold reference on each to guarantee that the next 'tmp' will not be unlinked from the + * pool list during current pool process. + */ + d_list_for_each_entry(pool, &ins->ci_pool_list, cpr_link) + chk_pool_get(pool); + + d_list_for_each_entry_safe(pool, pool_tmp, &ins->ci_pool_list, cpr_link) { + if (rc == 0) { + ABT_rwlock_wrlock(ins->ci_abt_lock); + d_list_for_each_entry_safe(cpr, cpr_tmp, &pool->cpr_pending_list, + cpr_pool_link) { + if (cpr->cpr_class != cla || + cpr->cpr_action != CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT) + continue; + + rc = chk_leader_act_internal(ins, cpr->cpr_seq, act, false, true, + NULL); + if (rc != 0) + break; + } + ABT_rwlock_unlock(ins->ci_abt_lock); + } + chk_pool_put(pool); + } + +out: + D_CDEBUG(rc != 0, DLOG_ERR, DLOG_INFO, + DF_LEADER" takes action for report with seq "DF_X64", action %u, flags %s: %d\n", + DP_LEADER(ins), seq, act, for_all ? "all" : "once", rc); + + return rc; +} + +/* + * \return Positive value if interaction is interrupted, such as check stop. + * Zero on success. + * Negative value if error. + */ +int +chk_leader_report(struct chk_report_unit *cru, uint64_t *seq, int *decision) +{ + struct chk_instance *ins = chk_leader; + struct chk_bookmark *cbk = &ins->ci_bk; + struct chk_pending_rec *cpr = NULL; + struct chk_pool_rec *pool = NULL; + struct chk_rank_rec *crr = NULL; + d_iov_t kiov; + d_iov_t riov; + int rc; + + if (cbk->cb_magic != CHK_BK_MAGIC_LEADER) + D_GOTO(out, rc = -DER_NOTLEADER); + + /* Tell check engine that check leader is not running via "-DER_NOTAPPLICABLE". */ + if (cbk->cb_ins_status != CHK__CHECK_INST_STATUS__CIS_RUNNING) + D_GOTO(out, rc = -DER_NOTAPPLICABLE); + + if (*seq == 0) { + +new_seq: + *seq = chk_report_seq_gen(ins); + } + + D_INFO(DF_LEADER" handle %s report from rank %u with seq " + DF_X64" class %u, action %u, result %d\n", DP_LEADER(ins), + decision != NULL ? "local" : "remote", cru->cru_rank, *seq, cru->cru_cla, + cru->cru_act, cru->cru_result); + + if (cru->cru_act == CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT) { + if (cru->cru_pool == NULL) + D_GOTO(log, rc = -DER_INVAL); + + d_iov_set(&riov, NULL, 0); + d_iov_set(&kiov, cru->cru_pool, sizeof(uuid_t)); + rc = dbtree_lookup(ins->ci_pool_hdl, &kiov, &riov); + if (rc != 0) + goto log; + + pool = (struct chk_pool_rec *)riov.iov_buf; + + if (decision == NULL) { + d_iov_set(&riov, NULL, 0); + d_iov_set(&kiov, &cru->cru_rank, sizeof(cru->cru_rank)); + rc = dbtree_lookup(ins->ci_rank_hdl, &kiov, &riov); + if (rc != 0) + goto log; + + crr = (struct chk_rank_rec *)riov.iov_buf; + } + + rc = chk_pending_add(ins, &pool->cpr_pending_list, + crr != NULL ? &crr->crr_pending_list : NULL, + *cru->cru_pool, *seq, cru->cru_rank, cru->cru_cla, &cpr); + if (decision != NULL) { + if (unlikely(rc == -DER_AGAIN)) + goto new_seq; + + cpr->cpr_on_leader = 1; + } + + if (rc != 0) + goto log; + } + + rc = chk_report_upcall(cru->cru_gen, *seq, cru->cru_cla, cru->cru_act, cru->cru_result, + cru->cru_rank, cru->cru_target, cru->cru_pool, cru->cru_pool_label, + cru->cru_cont, cru->cru_cont_label, cru->cru_obj, cru->cru_dkey, + cru->cru_akey, cru->cru_msg, cru->cru_option_nr, cru->cru_options, + cru->cru_detail_nr, cru->cru_details); + /* Check cpr->cpr_action for the case of "dmg check repair" by race. */ + if (rc == 0 && pool != NULL && + likely(cpr->cpr_action == CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT)) + pool->cpr_bk.cb_pool_status = CHK__CHECK_POOL_STATUS__CPS_PENDING; + +log: + if (rc != 0) { + D_ERROR(DF_LEADER" failed to handle %s report from rank %u with seq " + DF_X64", class %u, action %u, handle_rc %d, report_rc %d\n", + DP_LEADER(ins), decision != NULL ? "local" : "remote", cru->cru_rank, *seq, + cru->cru_cla, cru->cru_act, cru->cru_result, rc); + goto out; + } + + if (decision == NULL || cpr == NULL) + goto out; + + D_ASSERT(cpr->cpr_busy); + + D_INFO(DF_LEADER" need interaction for class %u with seq "DF_X64"\n", + DP_LEADER(ins), cru->cru_cla, *seq); + + ABT_mutex_lock(cpr->cpr_mutex); + +again: + if (cpr->cpr_action != CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT) { + *decision = cpr->cpr_action; + ABT_mutex_unlock(cpr->cpr_mutex); + goto out; + } + + if (!ins->ci_sched_running || ins->ci_sched_exiting || cpr->cpr_exiting) { + rc = 1; + ABT_mutex_unlock(cpr->cpr_mutex); + goto out; + } + + ABT_cond_wait(cpr->cpr_cond, cpr->cpr_mutex); + + goto again; + +out: + if (pool != NULL && pool->cpr_bk.cb_pool_status == CHK__CHECK_POOL_STATUS__CPS_PENDING && + (rc != 0 || (cpr != NULL && + cpr->cpr_action != CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT))) + pool->cpr_bk.cb_pool_status = CHK__CHECK_POOL_STATUS__CPS_CHECKING; + + if ((rc != 0 || decision != NULL) && cpr != NULL) + chk_pending_destroy(cpr); + + return rc; +} + +int +chk_leader_notify(struct chk_iv *iv) +{ + struct chk_instance *ins = chk_leader; + struct chk_property *prop = &ins->ci_prop; + struct chk_bookmark *cbk = &ins->ci_bk; + struct chk_rank_bundle rbund = { 0 }; + d_iov_t kiov; + d_iov_t riov; + int rc = 0; + + /* Ignore the notification that is not applicable to current rank. */ + + if (cbk->cb_magic != CHK_BK_MAGIC_LEADER) + D_GOTO(out, rc = -DER_NOTAPPLICABLE); + + if (cbk->cb_gen != iv->ci_gen) + D_GOTO(out, rc = -DER_NOTAPPLICABLE); + + if (cbk->cb_ins_status != CHK__CHECK_INST_STATUS__CIS_RUNNING) + D_GOTO(out, rc = -DER_NOTAPPLICABLE); + + if (!uuid_is_null(iv->ci_uuid)) { + rc = chk_pool_handle_notify(ins, iv); + goto out; + } + + switch (iv->ci_ins_status) { + case CHK__CHECK_INST_STATUS__CIS_INIT: + case CHK__CHECK_INST_STATUS__CIS_STOPPED: + case CHK__CHECK_INST_STATUS__CIS_PAUSED: + case CHK__CHECK_INST_STATUS__CIS_IMPLICATED: + /* Directly ignore above. */ + break; + case CHK__CHECK_INST_STATUS__CIS_RUNNING: + if (unlikely(iv->ci_phase < cbk->cb_phase)) { + rc = -DER_NOTAPPLICABLE; + } else if (iv->ci_phase != cbk->cb_phase) { + rbund.crb_rank = iv->ci_rank; + rbund.crb_phase = iv->ci_phase; + rbund.crb_ins = ins; + + d_iov_set(&riov, &rbund, sizeof(rbund)); + d_iov_set(&kiov, &iv->ci_rank, sizeof(iv->ci_rank)); + rc = dbtree_update(ins->ci_rank_hdl, &kiov, &riov); + } + break; + case CHK__CHECK_INST_STATUS__CIS_COMPLETED: + /* + * NOTE: Currently, we do not support to partial check till the specified phase. + * Then the completed phase will be either container cleanup or all done. + */ + if (unlikely(iv->ci_phase != CHK__CHECK_SCAN_PHASE__CSP_CONT_CLEANUP && + iv->ci_phase != CHK__CHECK_SCAN_PHASE__CSP_DONE)) + rc = -DER_INVAL; + else + rc = chk_rank_del(ins, iv->ci_rank); + break; + case CHK__CHECK_INST_STATUS__CIS_FAILED: + chk_ins_set_fail(ins, iv->ci_phase); + rc = chk_rank_del(ins, iv->ci_rank); + if (rc != 0 || !(prop->cp_flags & CHK__CHECK_FLAG__CF_FAILOUT)) + D_GOTO(out, rc = (rc == -DER_NONEXIST || rc == -DER_NO_HDL ? 0 : rc)); + + ins->ci_implicated = 1; + chk_stop_sched(ins); + break; + default: + rc = -DER_INVAL; + break; + } + +out: + D_CDEBUG(rc != 0 && rc != -DER_NOTAPPLICABLE, DLOG_ERR, DLOG_INFO, + DF_LEADER" handle notification from rank %u, for pool " + DF_UUIDF", phase %u, ins_status %u, pool_status %u, gen "DF_X64", seq "DF_X64 + ": "DF_RC"\n", DP_LEADER(ins), iv->ci_rank, DP_UUID(iv->ci_uuid), iv->ci_phase, + iv->ci_ins_status, iv->ci_pool_status, iv->ci_gen, iv->ci_seq, DP_RC(rc)); + + return rc == -DER_NOTAPPLICABLE ? 0 : rc; +} + +int +chk_leader_rejoin(uint64_t gen, d_rank_t rank, uuid_t iv_uuid, uint32_t *flags, int *pool_nr, + uuid_t **pools) +{ + struct chk_instance *ins = chk_leader; + struct chk_bookmark *cbk = &ins->ci_bk; + int rc = 0; + + if (cbk->cb_magic != CHK_BK_MAGIC_LEADER) + D_GOTO(out, rc = -DER_NOTLEADER); + + if (uuid_compare(cbk->cb_iv_uuid, iv_uuid)) + D_GOTO(out, rc = -DER_STALE); + + if (cbk->cb_gen != gen) + D_GOTO(out, rc = -DER_STALE); + + if (cbk->cb_ins_status != CHK__CHECK_INST_STATUS__CIS_RUNNING) + D_GOTO(out, rc = -DER_SHUTDOWN); + + /* The rank has been excluded from (or never been part of) the check instance. */ + if (!chk_rank_in_list(ins->ci_ranks, rank)) + D_GOTO(out, rc = -DER_NO_PERM); + + if (ins->ci_orphan_done) + *flags = CRF_ORPHAN_DONE; + + rc = chk_leader_pools2list(ins, pool_nr, pools); + +out: + D_CDEBUG(rc != 0, DLOG_ERR, DLOG_INFO, + DF_LEADER" %u handle rejoin from rank %u, gen "DF_X64", iv "DF_UUIDF":"DF_RC"\n", + DP_LEADER(ins), cbk->cb_ins_status, rank, gen, DP_UUID(iv_uuid), DP_RC(rc)); + + return rc; +} + +void +chk_leader_pause(void) +{ + struct chk_instance *ins = chk_leader; + + chk_stop_sched(ins); + D_ASSERT(d_list_empty(&ins->ci_rank_list)); +} + +static void +chk_rank_event_cb(d_rank_t rank, uint64_t incarnation, enum crt_event_source src, + enum crt_event_type type, void *arg) +{ + struct chk_instance *ins = chk_leader; + struct chk_dead_rank *cdr = NULL; + int rc = 0; + + /* Ignore the event that is not applicable to current rank. */ + + if (src != CRT_EVS_SWIM) + D_GOTO(out, rc = -DER_NOTAPPLICABLE); + + if (type != CRT_EVT_DEAD && type != CRT_EVT_ALIVE) + D_GOTO(out, rc = -DER_NOTAPPLICABLE); + + if (!ins->ci_sched_running) + D_GOTO(out, rc = -DER_NOTAPPLICABLE); + + if (type == CRT_EVT_DEAD) { + D_ALLOC_PTR(cdr); + if (cdr == NULL) + D_GOTO(out, rc = -DER_NOMEM); + + cdr->cdr_rank = rank; + } + + ABT_mutex_lock(ins->ci_abt_mutex); + if (cdr != NULL) { + /* + * The event may be triggered on non-system SX. Let's notify the leader scheduler + * to handle that on system XS. + */ + d_list_add_tail(&cdr->cdr_link, &ins->ci_dead_ranks); + } else { + /* Remove former non-handled dead rank from the list. */ + d_list_for_each_entry(cdr, &ins->ci_dead_ranks, cdr_link) { + if (cdr->cdr_rank == rank) { + d_list_del(&cdr->cdr_link); + D_FREE(cdr); + break; + } + } + } + ABT_mutex_unlock(ins->ci_abt_mutex); + +out: + if (rc != -DER_NOTAPPLICABLE) + D_CDEBUG(rc != 0, DLOG_ERR, DLOG_INFO, + DF_LEADER" receive rank %u %s event: "DF_RC"\n", + DP_LEADER(ins), rank, type == CRT_EVT_DEAD ? "dead" : "alive", DP_RC(rc)); +} + +int +chk_leader_init(void) +{ + struct chk_traverse_pools_args ctpa = { 0 }; + struct chk_bookmark *cbk; + int rc; + + rc = chk_ins_init(&chk_leader); + if (rc != 0) + goto fini; + + chk_leader->ci_is_leader = 1; + chk_report_seq_init(chk_leader); + + /* + * DAOS global consistency check depends on all related engines' local + * consistency. If hit some local data corruption, then it is possible + * that local consistency is not guaranteed. Need to break and resolve + * related local inconsistency firstly. + */ + + cbk = &chk_leader->ci_bk; + rc = chk_bk_fetch_leader(cbk); + if (rc == -DER_NONEXIST) + goto prop; + + /* It may be caused by local data corruption, let's break. */ + if (rc != 0) + goto fini; + + /* + * NOTE: The unknown magic may be caused by data corruption, also + * may for downgrade case. If we downgraded from new layout, + * we do not understand it. Under such case, reporting it as + * -DER_IO may be not the best choice, but it is better than + * damaging the system if we modify something with old logic. + * + * On the other hand, if we have to start old DAOS check on + * new layout, then please manually remove chk bookmark and + * property KVs from sys_db via DDB tools firstly. + */ + if (cbk->cb_magic != 0 && cbk->cb_magic != CHK_BK_MAGIC_LEADER) { + D_ERROR("Hit corrupted leader bookmark on rank %u: %u vs %u\n", + dss_self_rank(), cbk->cb_magic, CHK_BK_MAGIC_LEADER); + D_GOTO(fini, rc = -DER_IO); + } + + if (cbk->cb_ins_status == CHK__CHECK_INST_STATUS__CIS_RUNNING) { + /* + * Leader crashed before normally exit, reset the status as 'PAUSED' + * to avoid blocking next CHK_START. + */ + cbk->cb_ins_status = CHK__CHECK_INST_STATUS__CIS_PAUSED; + cbk->cb_time.ct_stop_time = time(NULL); + rc = chk_bk_update_leader(cbk); + if (rc != 0) { + D_ERROR(DF_LEADER" failed to reset ins status as 'PAUSED': "DF_RC"\n", + DP_LEADER(chk_leader), DP_RC(rc)); + goto fini; + } + + ctpa.ctpa_gen = cbk->cb_gen; + rc = chk_traverse_pools(chk_pools_pause_cb, &ctpa); + /* + * Failed to reset pool status will not affect next check start, so it is not fatal, + * but related check query result may be confused for user. + */ + if (rc != 0) + D_WARN(DF_LEADER" failed to reset pools status as 'PAUSED': "DF_RC"\n", + DP_LEADER(chk_leader), DP_RC(rc)); + } + +prop: + rc = chk_prop_fetch(&chk_leader->ci_prop, &chk_leader->ci_ranks); + if (rc == 0 || rc == -DER_NONEXIST) + rc = crt_register_event_cb(chk_rank_event_cb, NULL); +fini: + if (rc != 0) + chk_ins_fini(&chk_leader); + else + chk_leader->ci_inited = 1; + return rc; +} + +void +chk_leader_fini(void) +{ + crt_unregister_event_cb(chk_rank_event_cb, NULL); + chk_ins_fini(&chk_leader); +} diff --git a/src/chk/chk_rpc.c b/src/chk/chk_rpc.c new file mode 100644 index 00000000000..71bcee72e13 --- /dev/null +++ b/src/chk/chk_rpc.c @@ -0,0 +1,1363 @@ +/** + * (C) Copyright 2022-2023 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ + +#define D_LOGFAC DD_FAC(chk) + +#include +#include +#include +#include + +#include "chk_internal.h" + +#define X(a, b, c, d, e) \ +{ \ + .prf_flags = b, \ + .prf_req_fmt = c, \ + .prf_hdlr = NULL, \ + .prf_co_ops = NULL, \ +} + +static struct crt_proto_rpc_format chk_proto_rpc_fmt[] = { + CHK_PROTO_SRV_RPC_LIST, +}; + +#undef X + +struct crt_proto_format chk_proto_fmt = { + .cpf_name = "chk-proto", + .cpf_ver = DAOS_CHK_VERSION, + .cpf_count = ARRAY_SIZE(chk_proto_rpc_fmt), + .cpf_prf = chk_proto_rpc_fmt, + .cpf_base = DAOS_RPC_OPCODE(0, DAOS_CHK_MODULE, 0) +}; + +static int +chk_start_aggregator(crt_rpc_t *source, crt_rpc_t *result, void *priv) +{ + struct chk_start_in *in_source = crt_req_get(source); + struct chk_start_out *out_source = crt_reply_get(source); + struct chk_start_out *out_result = crt_reply_get(result); + struct ds_pool_clue *clues; + d_rank_t *ranks; + uint32_t cap; + uint32_t nr; + int i; + + if (out_source->cso_status < 0) { + D_ERROR("Failed to check start with gen "DF_X64": %d\n", + in_source->csi_gen, out_source->cso_status); + + if (out_result->cso_status == 0) + out_result->cso_status = out_source->cso_status; + + return 0; + } + + if (out_source->cso_clues.ca_count == 0) + goto cmp_ranks; + + nr = out_source->cso_clues.ca_count + out_result->cso_clues.ca_count; + if (nr > out_result->cso_clue_cap) { + cap = out_result->cso_clue_cap > 0 ? out_result->cso_clue_cap : 1; + while (cap < nr) + cap <<= 1; + +clue_again: + D_REALLOC_ARRAY(clues, out_result->cso_clues.ca_arrays, + out_result->cso_clue_cap, cap); + if (clues == NULL) { + if (cap > nr) { + cap = nr; + goto clue_again; + } + + if (out_result->cso_status == 0) + out_result->cso_status = -DER_NOMEM; + + return -DER_NOMEM; + } + + out_result->cso_clues.ca_arrays = clues; + out_result->cso_clue_cap = cap; + } + + memcpy((struct ds_pool_clue *)out_result->cso_clues.ca_arrays + + out_result->cso_clues.ca_count, out_source->cso_clues.ca_arrays, + sizeof(*clues) * out_source->cso_clues.ca_count); + out_result->cso_clues.ca_count = nr; + + /* + * pc_svc_clue/pc_label/pc_tgt_status are shared between out_source and out_result. + * Let's reset them in out_source to avoid being released when cleanup out_source. + */ + for (i = 0, clues = out_source->cso_clues.ca_arrays; + i < out_source->cso_clues.ca_count; i++, clues++) { + clues->pc_label_len = 0; + clues->pc_tgt_nr = 0; + clues->pc_svc_clue = NULL; + clues->pc_label = NULL; + clues->pc_tgt_status = NULL; + } + +cmp_ranks: + if (out_source->cso_cmp_ranks.ca_count == 0) + return 0; + + nr = out_source->cso_cmp_ranks.ca_count + out_result->cso_cmp_ranks.ca_count; + if (nr > out_result->cso_rank_cap) { + cap = out_result->cso_rank_cap > 0 ? out_result->cso_rank_cap : 1; + while (cap < nr) + cap <<= 1; + +rank_again: + D_REALLOC_ARRAY(ranks, out_result->cso_cmp_ranks.ca_arrays, + out_result->cso_rank_cap, cap); + if (ranks == NULL) { + if (cap > nr) { + cap = nr; + goto rank_again; + } + + if (out_result->cso_status == 0) + out_result->cso_status = -DER_NOMEM; + + return -DER_NOMEM; + } + + out_result->cso_cmp_ranks.ca_arrays = ranks; + out_result->cso_rank_cap = cap; + } + + memcpy((d_rank_t *)out_result->cso_cmp_ranks.ca_arrays + out_result->cso_cmp_ranks.ca_count, + out_source->cso_cmp_ranks.ca_arrays, + sizeof(*ranks) * out_source->cso_cmp_ranks.ca_count); + out_result->cso_cmp_ranks.ca_count = nr; + + return 0; +} + +static int +chk_start_post_reply(crt_rpc_t *rpc, void *arg) +{ + struct chk_start_out *cso = crt_reply_get(rpc); + struct ds_pool_clues clues = { 0 }; + + if (cso != NULL) { + D_FREE(cso->cso_cmp_ranks.ca_arrays); + + clues.pcs_len = cso->cso_clues.ca_count; + clues.pcs_array = cso->cso_clues.ca_arrays; + ds_pool_clues_fini(&clues); + } + + return 0; +} + +static int +chk_stop_aggregator(crt_rpc_t *source, crt_rpc_t *result, void *priv) +{ + struct chk_stop_in *in_source = crt_req_get(source); + struct chk_stop_out *out_source = crt_reply_get(source); + struct chk_stop_out *out_result = crt_reply_get(result); + d_rank_t *ranks; + uint32_t cap; + uint32_t nr; + + if (out_source->cso_status < 0) { + D_ERROR("Failed to check stop with gen "DF_X64": %d\n", + in_source->csi_gen, out_source->cso_status); + + if (out_result->cso_status == 0) + out_result->cso_status = out_source->cso_status; + + return 0; + } + + out_result->cso_flags |= out_source->cso_flags; + + if (out_source->cso_ranks.ca_count == 0) + return 0; + + nr = out_source->cso_ranks.ca_count + out_result->cso_ranks.ca_count; + if (nr > out_result->cso_cap) { + cap = out_result->cso_cap > 0 ? out_result->cso_cap : 1; + while (cap < nr) + cap <<= 1; + +again: + D_REALLOC_ARRAY(ranks, out_result->cso_ranks.ca_arrays, out_result->cso_cap, cap); + if (ranks == NULL) { + if (cap > nr) { + cap = nr; + goto again; + } + + if (out_result->cso_status == 0) + out_result->cso_status = -DER_NOMEM; + + return -DER_NOMEM; + } + + out_result->cso_ranks.ca_arrays = ranks; + out_result->cso_cap = cap; + } + + memcpy((d_rank_t *)out_result->cso_ranks.ca_arrays + out_result->cso_ranks.ca_count, + out_source->cso_ranks.ca_arrays, sizeof(*ranks) * out_source->cso_ranks.ca_count); + out_result->cso_ranks.ca_count = nr; + + return 0; +} + +static int +chk_stop_post_reply(crt_rpc_t *rpc, void *arg) +{ + struct chk_stop_out *cso = crt_reply_get(rpc); + + if (cso != NULL) + D_FREE(cso->cso_ranks.ca_arrays); + + return 0; +} + +static int +chk_query_aggregator(crt_rpc_t *source, crt_rpc_t *result, void *priv) +{ + struct chk_query_in *in_source = crt_req_get(source); + struct chk_query_out *out_source = crt_reply_get(source); + struct chk_query_out *out_result = crt_reply_get(result); + struct chk_query_pool_shard *shards; + uint32_t cap; + uint32_t nr; + int i; + + if (out_source->cqo_status != 0) { + D_ERROR("Failed to check query with gen "DF_X64": %d\n", + in_source->cqi_gen, out_source->cqo_status); + + if (out_result->cqo_status == 0) + out_result->cqo_status = out_source->cqo_status; + + return 0; + } + + if (out_source->cqo_shards.ca_count == 0) + return 0; + + nr = out_source->cqo_shards.ca_count + out_result->cqo_shards.ca_count; + if (nr >out_result->cqo_cap) { + cap = out_result->cqo_cap > 0 ? out_result->cqo_cap : 1; + while (cap < nr) + cap <<= 1; + +again: + D_REALLOC_ARRAY(shards, out_result->cqo_shards.ca_arrays, out_result->cqo_cap, cap); + if (shards == NULL) { + if (cap > nr) { + cap = nr; + goto again; + } + + if (out_result->cqo_status == 0) + out_result->cqo_status = -DER_NOMEM; + + return -DER_NOMEM; + } + + out_result->cqo_shards.ca_arrays = shards; + out_result->cqo_cap = cap; + } + + memcpy((struct chk_query_pool_shard *)out_result->cqo_shards.ca_arrays + + out_result->cqo_shards.ca_count, out_source->cqo_shards.ca_arrays, + sizeof(*shards) * out_source->cqo_shards.ca_count); + out_result->cqo_shards.ca_count = nr; + + chk_ins_merge_info(&out_result->cqo_ins_status, out_source->cqo_ins_status, + &out_result->cqo_ins_phase, out_source->cqo_ins_phase, + &out_result->cqo_gen, out_source->cqo_gen); + + /* + * cqps_target_nr and cqps_targets are shared between out_source and out_result. + * Let's reset them in out_source to avoid being released when cleanup out_source. + */ + for (i = 0, shards = out_source->cqo_shards.ca_arrays; + i < out_source->cqo_shards.ca_count; i++, shards++) { + shards->cqps_target_nr = 0; + shards->cqps_targets = NULL; + } + + return 0; +} + +static int +chk_query_post_reply(crt_rpc_t *rpc, void *arg) +{ + struct chk_query_out *cqo = crt_reply_get(rpc); + + if (cqo != NULL) + chk_query_free(cqo->cqo_shards.ca_arrays, cqo->cqo_shards.ca_count); + + return 0; +} + +static int +chk_mark_aggregator(crt_rpc_t *source, crt_rpc_t *result, void *priv) +{ + struct chk_mark_in *in_source = crt_req_get(source); + struct chk_mark_out *out_source = crt_reply_get(source); + struct chk_mark_out *out_result = crt_reply_get(result); + + if (out_source->cmo_status != 0) { + D_ERROR("Failed to check mark rank dead with gen "DF_X64": "DF_RC"\n", + in_source->cmi_gen, DP_RC(out_source->cmo_status)); + + if (out_result->cmo_status == 0) + out_result->cmo_status = out_source->cmo_status; + } + + return 0; +} + +static int +chk_act_aggregator(crt_rpc_t *source, crt_rpc_t *result, void *priv) +{ + struct chk_act_in *in_source = crt_req_get(source); + struct chk_act_out *out_source = crt_reply_get(source); + struct chk_act_out *out_result = crt_reply_get(result); + + if (out_source->cao_status != 0) { + D_ERROR("Failed to check act with gen "DF_X64": "DF_RC"\n", + in_source->cai_gen, DP_RC(out_source->cao_status)); + + if (out_result->cao_status == 0) + out_result->cao_status = out_source->cao_status; + } + + return 0; +} + +static int +chk_cont_list_aggregator(crt_rpc_t *source, crt_rpc_t *result, void *priv) +{ + struct chk_cont_list_in *in_source = crt_req_get(source); + struct chk_cont_list_out *out_source = crt_reply_get(source); + struct chk_cont_list_out *out_result = crt_reply_get(result); + uuid_t *uuids; + uint32_t cap; + uint32_t nr; + + if (out_source->cclo_status < 0) { + D_ERROR("Failed to check cont list with gen "DF_X64": %d\n", + in_source->ccli_gen, out_source->cclo_status); + + if (out_result->cclo_status == 0) + out_result->cclo_status = out_source->cclo_status; + + return 0; + } + + if (out_source->cclo_conts.ca_count == 0) + return 0; + + nr = out_source->cclo_conts.ca_count + out_result->cclo_conts.ca_count; + if (nr > out_result->cclo_cap) { + cap = out_result->cclo_cap > 0 ? out_result->cclo_cap : 1; + while (cap < nr) + cap <<= 1; + +again: + D_REALLOC_ARRAY(uuids, out_result->cclo_conts.ca_arrays, out_result->cclo_cap, cap); + if (uuids == NULL) { + if (cap > nr) { + cap = nr; + goto again; + } + + if (out_result->cclo_status == 0) + out_result->cclo_status = -DER_NOMEM; + + return -DER_NOMEM; + } + + out_result->cclo_conts.ca_arrays = uuids; + out_result->cclo_cap = cap; + } + + memcpy((uuid_t *)out_result->cclo_conts.ca_arrays + out_result->cclo_conts.ca_count, + out_source->cclo_conts.ca_arrays, sizeof(uuid_t) * out_source->cclo_conts.ca_count); + out_result->cclo_conts.ca_count = nr; + + return 0; +} + +static int +chk_cont_list_post_reply(crt_rpc_t *rpc, void *arg) +{ + struct chk_cont_list_out *cclo = crt_reply_get(rpc); + + if (cclo != NULL) + D_FREE(cclo->cclo_conts.ca_arrays); + + return 0; +} + +static int +chk_pool_start_aggregator(crt_rpc_t *source, crt_rpc_t *result, void *priv) +{ + struct chk_pool_start_in *in_source = crt_req_get(source); + struct chk_pool_start_out *out_source = crt_reply_get(source); + struct chk_pool_start_out *out_result = crt_reply_get(result); + + if (out_source->cpso_status != 0 && out_source->cpso_status != -DER_NONEXIST) { + D_ERROR("Failed to pool start with gen "DF_X64" on rank %u: "DF_RC"\n", + in_source->cpsi_gen, out_source->cpso_rank, DP_RC(out_source->cpso_status)); + + if (out_result->cpso_status == 0) + out_result->cpso_status = out_source->cpso_status; + } + + return 0; +} + +struct crt_corpc_ops chk_start_co_ops = { + .co_aggregate = chk_start_aggregator, + .co_pre_forward = NULL, + .co_post_reply = chk_start_post_reply, +}; + +struct crt_corpc_ops chk_stop_co_ops = { + .co_aggregate = chk_stop_aggregator, + .co_pre_forward = NULL, + .co_post_reply = chk_stop_post_reply, +}; + +struct crt_corpc_ops chk_query_co_ops = { + .co_aggregate = chk_query_aggregator, + .co_pre_forward = NULL, + .co_post_reply = chk_query_post_reply, +}; + +struct crt_corpc_ops chk_mark_co_ops = { + .co_aggregate = chk_mark_aggregator, + .co_pre_forward = NULL, +}; + +struct crt_corpc_ops chk_act_co_ops = { + .co_aggregate = chk_act_aggregator, + .co_pre_forward = NULL, +}; + +struct crt_corpc_ops chk_cont_list_co_ops = { + .co_aggregate = chk_cont_list_aggregator, + .co_pre_forward = NULL, + .co_post_reply = chk_cont_list_post_reply, +}; + +struct crt_corpc_ops chk_pool_start_co_ops = { + .co_aggregate = chk_pool_start_aggregator, + .co_pre_forward = NULL, +}; + +static inline int +chk_co_rpc_prepare(d_rank_list_t *rank_list, crt_opcode_t opc, crt_rpc_t **req, bool failout) +{ + uint32_t flags = CRT_RPC_FLAG_FILTER_INVERT; + + if (failout) + flags |= CRT_RPC_FLAG_CO_FAILOUT; + + return crt_corpc_req_create(dss_get_module_info()->dmi_ctx, NULL, rank_list, + DAOS_RPC_OPCODE(opc, DAOS_CHK_MODULE, DAOS_CHK_VERSION), + NULL, NULL, flags, crt_tree_topo(CRT_TREE_KNOMIAL, 32), req); +} + +static inline int +chk_sg_rpc_prepare(d_rank_t rank, crt_opcode_t opc, crt_rpc_t **req) +{ + crt_endpoint_t tgt_ep; + + tgt_ep.ep_grp = NULL; + tgt_ep.ep_rank = rank; + tgt_ep.ep_tag = daos_rpc_tag(DAOS_REQ_CHK, 0); + opc = DAOS_RPC_OPCODE(opc, DAOS_CHK_MODULE, DAOS_CHK_VERSION); + + return crt_req_create(dss_get_module_info()->dmi_ctx, &tgt_ep, opc, req); +} + +int +chk_start_remote(d_rank_list_t *rank_list, uint64_t gen, uint32_t rank_nr, d_rank_t *ranks, + uint32_t policy_nr, struct chk_policy *policies, int pool_nr, + uuid_t pools[], uint32_t api_flags, int phase, d_rank_t leader, uint32_t flags, + uuid_t iv_uuid, chk_co_rpc_cb_t start_cb, void *args) +{ + struct chk_co_rpc_cb_args cb_args = { 0 }; + crt_rpc_t *req = NULL; + struct chk_start_in *csi = NULL; + struct chk_start_out *cso = NULL; + d_rank_t *cmp_rank; + int rc; + int rc1; + int i; + + rc = chk_co_rpc_prepare(rank_list, CHK_START, &req, true); + if (rc != 0) + goto out; + + csi = crt_req_get(req); + csi->csi_gen = gen; + csi->csi_flags = flags; + csi->csi_phase = phase; + csi->csi_leader_rank = leader; + csi->csi_api_flags = api_flags; + uuid_copy(csi->csi_iv_uuid, iv_uuid); + csi->csi_ranks.ca_count = rank_nr; + csi->csi_ranks.ca_arrays = ranks; + csi->csi_policies.ca_count = policy_nr; + csi->csi_policies.ca_arrays = policies; + csi->csi_uuids.ca_count = pool_nr; + csi->csi_uuids.ca_arrays = pools; + + rc = dss_rpc_send(req); + if (rc != 0) + goto out; + + cso = crt_reply_get(req); + if (cso->cso_status < 0) + D_GOTO(out, rc = cso->cso_status); + + cb_args.cb_priv = args; + cb_args.cb_data = cso->cso_clues.ca_arrays; + cb_args.cb_nr = cso->cso_clues.ca_count; + rc = start_cb(&cb_args); + if (rc != 0) + goto out; + + if (cso->cso_cmp_ranks.ca_arrays == NULL) + return 0; + + cb_args.cb_data = NULL; /* unused data */ + cb_args.cb_nr = 0; /* unused nr */ + cb_args.cb_result = 1; + for (i = 0, cmp_rank = cso->cso_cmp_ranks.ca_arrays; i < cso->cso_cmp_ranks.ca_count; + i++, cmp_rank++) { + cb_args.cb_rank = *cmp_rank; + rc = start_cb(&cb_args); + if (rc != 0) + goto out; + } + +out: + if (req != NULL) { + /* + * co_post_reply will not be automatically called on the root node of the corpc. + * Let's trigger it explicitly to release related buffer. + */ + chk_start_post_reply(req, NULL); + + if (rc < 0) { + rc1 = chk_stop_remote(rank_list, gen, pool_nr, pools, NULL, NULL); + if (rc1 < 0 && rc1 != -DER_NOTAPPLICABLE) + D_ERROR("Failed to cleanup DAOS check with gen "DF_X64": "DF_RC"\n", + gen, DP_RC(rc1)); + } + + crt_req_decref(req); + } + + D_CDEBUG(rc < 0, DLOG_ERR, DLOG_INFO, + "Rank %u start checker, gen "DF_X64", flags %x, phase %d, iv "DF_UUIDF":"DF_RC"\n", + leader, gen, flags, phase, DP_UUID(iv_uuid), DP_RC(rc)); + + return rc; +} + +int +chk_stop_remote(d_rank_list_t *rank_list, uint64_t gen, int pool_nr, uuid_t pools[], + chk_co_rpc_cb_t stop_cb, void *args) +{ + struct chk_co_rpc_cb_args cb_args = { 0 }; + crt_rpc_t *req = NULL; + struct chk_stop_in *csi = NULL; + struct chk_stop_out *cso = NULL; + d_rank_t *rank; + int rc; + int i; + + rc = chk_co_rpc_prepare(rank_list, CHK_STOP, &req, false); + if (rc != 0) + goto out; + + csi = crt_req_get(req); + csi->csi_gen = gen; + csi->csi_uuids.ca_count = pool_nr; + csi->csi_uuids.ca_arrays = pools; + + rc = dss_rpc_send(req); + if (rc != 0) + goto out; + + cso = crt_reply_get(req); + if (cso->cso_status < 0) + D_GOTO(out, rc = cso->cso_status); + + if (stop_cb == NULL) + D_GOTO(out, rc = 0); + + if (cso->cso_ranks.ca_arrays == NULL) + D_GOTO(out, rc = 0); + + cb_args.cb_priv = args; + cb_args.cb_result = 1; + cb_args.cb_flags = cso->cso_flags; + for (i = 0, rank = cso->cso_ranks.ca_arrays; i < cso->cso_ranks.ca_count; i++, rank++) { + cb_args.cb_rank = *rank; + rc = stop_cb(&cb_args); + if (rc != 0) + goto out; + } + +out: + if (req != NULL) { + /* + * co_post_reply will not be automatically called on the root node of the corpc. + * Let's trigger it explicitly to release related buffer. + */ + chk_stop_post_reply(req, NULL); + crt_req_decref(req); + } + + D_CDEBUG(rc < 0, DLOG_ERR, DLOG_INFO, + "Rank %u stop DAOS check with gen "DF_X64", pool_nr %d: "DF_RC"\n", + dss_self_rank(), gen, pool_nr, DP_RC(rc)); + + return rc; +} + +int +chk_query_remote(d_rank_list_t *rank_list, uint64_t gen, int pool_nr, uuid_t pools[], + chk_co_rpc_cb_t query_cb, void *args) +{ + struct chk_co_rpc_cb_args cb_args = { 0 }; + crt_rpc_t *req = NULL; + struct chk_query_in *cqi = NULL; + struct chk_query_out *cqo = NULL; + int rc; + + rc = chk_co_rpc_prepare(rank_list, CHK_QUERY, &req, true); + if (rc != 0) + goto out; + + cqi = crt_req_get(req); + cqi->cqi_gen = gen; + cqi->cqi_uuids.ca_count = pool_nr; + cqi->cqi_uuids.ca_arrays = pools; + + rc = dss_rpc_send(req); + if (rc != 0) + goto out; + + cqo = crt_reply_get(req); + if (cqo->cqo_status < 0) + D_GOTO(out, rc = cqo->cqo_status); + + cb_args.cb_priv = args; + cb_args.cb_gen = gen; + cb_args.cb_result = cqo->cqo_status; + cb_args.cb_ins_status = cqo->cqo_ins_status; + cb_args.cb_ins_phase = cqo->cqo_ins_phase; + cb_args.cb_data = cqo->cqo_shards.ca_arrays; + cb_args.cb_nr = cqo->cqo_shards.ca_count; + rc = query_cb(&cb_args); + +out: + if (req != NULL) { + /* + * co_post_reply will not be automatically called on the root node of the corpc. + * Let's trigger it explicitly to release related buffer. + */ + chk_query_post_reply(req, NULL); + crt_req_decref(req); + } + + D_CDEBUG(rc != 0, DLOG_ERR, DLOG_INFO, + "Rank %u query DAOS check with gen "DF_X64", pool_nr %d: "DF_RC"\n", + dss_self_rank(), gen, pool_nr, DP_RC(rc)); + + return rc; +} + +int +chk_mark_remote(d_rank_list_t *rank_list, uint64_t gen, d_rank_t rank, uint32_t version) +{ + crt_rpc_t *req; + struct chk_mark_in *cmi; + struct chk_mark_out *cmo; + int rc; + + rc = chk_co_rpc_prepare(rank_list, CHK_MARK, &req, false); + if (rc != 0) + goto out; + + cmi = crt_req_get(req); + cmi->cmi_gen = gen; + cmi->cmi_rank = rank; + cmi->cmi_version = version; + + rc = dss_rpc_send(req); + if (rc != 0) + goto out; + + cmo = crt_reply_get(req); + rc = cmo->cmo_status; + +out: + if (req != NULL) + crt_req_decref(req); + + D_CDEBUG(rc != 0, DLOG_ERR, DLOG_INFO, + "Mark rank %u as dead for DAOS check with gen "DF_X64": "DF_RC"\n", + rank, gen, DP_RC(rc)); + + return rc; +} + +int +chk_act_remote(d_rank_list_t *rank_list, uint64_t gen, uint64_t seq, uint32_t cla, + uint32_t act, d_rank_t rank, bool for_all) +{ + crt_rpc_t *req; + struct chk_act_in *cai; + struct chk_act_out *cao; + int rc; + + if (for_all) + rc = chk_co_rpc_prepare(rank_list, CHK_ACT, &req, false); + else + rc = chk_sg_rpc_prepare(rank, CHK_ACT, &req); + + if (rc != 0) + goto out; + + cai = crt_req_get(req); + cai->cai_gen = gen; + cai->cai_seq = seq; + cai->cai_cla = cla; + cai->cai_act = act; + cai->cai_flags = for_all ? CAF_FOR_ALL : 0; + + rc = dss_rpc_send(req); + if (rc != 0) + goto out; + + cao = crt_reply_get(req); + rc = cao->cao_status; + +out: + if (req != NULL) + crt_req_decref(req); + + D_CDEBUG(rc != 0, DLOG_ERR, DLOG_INFO, + "Rank %u take action for DAOS check with gen "DF_X64", seq "DF_X64": "DF_RC"\n", + rank, gen, seq, DP_RC(rc)); + + return rc; +} + +int +chk_cont_list_remote(struct ds_pool *pool, uint64_t gen, chk_co_rpc_cb_t list_cb, void *args) +{ + struct chk_co_rpc_cb_args cb_args = { 0 }; + crt_rpc_t *req = NULL; + struct chk_cont_list_in *ccli = NULL; + struct chk_cont_list_out *cclo = NULL; + int rc; + + rc = ds_pool_bcast_create(dss_get_module_info()->dmi_ctx, pool, DAOS_CHK_MODULE, + CHK_CONT_LIST, DAOS_CHK_VERSION, &req, NULL, NULL, NULL); + if (rc != 0) { + D_ERROR("Failed to create RPC for check cont list for "DF_UUIDF": "DF_RC"\n", + DP_UUID(pool->sp_uuid), DP_RC(rc)); + D_GOTO(out, rc); + } + + ccli = crt_req_get(req); + ccli->ccli_gen = gen; + ccli->ccli_rank = dss_self_rank(); + uuid_copy(ccli->ccli_pool, pool->sp_uuid); + + rc = dss_rpc_send(req); + if (rc != 0) + goto out; + + cclo = crt_reply_get(req); + if (cclo->cclo_status < 0) + D_GOTO(out, rc = cclo->cclo_status); + + cb_args.cb_priv = args; + cb_args.cb_result = cclo->cclo_status; + cb_args.cb_data = cclo->cclo_conts.ca_arrays; + cb_args.cb_nr = cclo->cclo_conts.ca_count; + rc = list_cb(&cb_args); + +out: + if (req != NULL) { + /* + * co_post_reply will not be automatically called on the root node of the corpc. + * Let's trigger it explicitly to release related buffer. + */ + chk_cont_list_post_reply(req, NULL); + crt_req_decref(req); + } + + D_CDEBUG(rc != 0, DLOG_ERR, DLOG_INFO, + "Rank %u DAOS check cont list for "DF_UUIDF" with gen "DF_X64": "DF_RC"\n", + dss_self_rank(), DP_UUID(pool->sp_uuid), gen, DP_RC(rc)); + + return rc; +} + +int +chk_pool_start_remote(d_rank_list_t *rank_list, uint64_t gen, uuid_t uuid, uint32_t phase, + uint32_t flags) +{ + crt_rpc_t *req; + struct chk_pool_start_in *cpsi; + struct chk_pool_start_out *cpso; + int rc; + + rc = chk_co_rpc_prepare(rank_list, CHK_POOL_START, &req, true); + if (rc != 0) + goto out; + + cpsi = crt_req_get(req); + cpsi->cpsi_gen = gen; + uuid_copy(cpsi->cpsi_pool, uuid); + cpsi->cpsi_phase = phase; + cpsi->cpsi_flags = flags; + + rc = dss_rpc_send(req); + if (rc != 0) + goto out; + + cpso = crt_reply_get(req); + rc = cpso->cpso_status; + +out: + if (req != NULL) + crt_req_decref(req); + + D_CDEBUG(rc != 0, DLOG_ERR, DLOG_INFO, + "Start pool ("DF_UUIDF") with gen "DF_X64": "DF_RC"\n", + DP_UUID(uuid), gen, DP_RC(rc)); + + return rc; +} + +int +chk_pool_mbs_remote(d_rank_t rank, uint32_t phase, uint64_t gen, uuid_t uuid, char *label, + uint64_t seq, uint32_t flags, uint32_t mbs_nr, struct chk_pool_mbs *mbs_array, + struct rsvc_hint *hint) +{ + crt_rpc_t *req; + struct chk_pool_mbs_in *cpmi; + struct chk_pool_mbs_out *cpmo; + int rc; + + rc = chk_sg_rpc_prepare(rank, CHK_POOL_MBS, &req); + if (rc != 0) + goto out; + + cpmi = crt_req_get(req); + cpmi->cpmi_gen = gen; + uuid_copy(cpmi->cpmi_pool, uuid); + cpmi->cpmi_flags = flags; + cpmi->cpmi_phase = phase; + cpmi->cpmi_label = label; + cpmi->cpmi_label_seq = seq; + cpmi->cpmi_targets.ca_count = mbs_nr; + cpmi->cpmi_targets.ca_arrays = mbs_array; + + rc = dss_rpc_send(req); + if (rc != 0) + goto out; + + cpmo = crt_reply_get(req); + rc = cpmo->cpmo_status; + *hint = cpmo->cpmo_hint; + +out: + if (req != NULL) + crt_req_decref(req); + + D_CDEBUG(rc != 0, DLOG_ERR, DLOG_INFO, + "Sent pool ("DF_UUIDF") members and label %s (" + DF_X64") to rank %u with phase %d gen "DF_X64": "DF_RC"\n", + DP_UUID(uuid), label != NULL ? label : "(null)", seq, rank, phase, gen, DP_RC(rc)); + + return rc; +} + +int chk_report_remote(d_rank_t leader, uint64_t gen, uint32_t cla, uint32_t act, int result, + d_rank_t rank, uint32_t target, uuid_t *pool, char *pool_label, uuid_t *cont, + char *cont_label, daos_unit_oid_t *obj, daos_key_t *dkey, daos_key_t *akey, + char *msg, uint32_t option_nr, uint32_t *options, uint32_t detail_nr, + d_sg_list_t *details, uint64_t seq) +{ + crt_rpc_t *req; + struct chk_report_in *cri; + struct chk_report_out *cro; + int rc; + + rc = chk_sg_rpc_prepare(leader, CHK_REPORT, &req); + if (rc != 0) + goto out; + + cri = crt_req_get(req); + cri->cri_gen = gen; + cri->cri_ics_class = cla; + cri->cri_ics_action = act; + cri->cri_ics_result = result; + cri->cri_rank = rank; + cri->cri_target = target; + cri->cri_seq = seq; + + if (pool != NULL) + uuid_copy(cri->cri_pool, *pool); + else + memset(cri->cri_pool, 0, sizeof(uuid_t)); + + cri->cri_pool_label = pool_label; + + if (cont != NULL) + uuid_copy(cri->cri_cont, *cont); + else + memset(cri->cri_cont, 0, sizeof(uuid_t)); + + cri->cri_cont_label = cont_label; + + if (obj != NULL) + cri->cri_obj = *obj; + else + memset(&cri->cri_obj, 0, sizeof(cri->cri_obj)); + + if (dkey != NULL) + cri->cri_dkey = *dkey; + else + memset(&cri->cri_dkey, 0, sizeof(cri->cri_dkey)); + + if (akey != NULL) + cri->cri_akey = *akey; + else + memset(&cri->cri_akey, 0, sizeof(cri->cri_akey)); + + cri->cri_msg = msg; + cri->cri_options.ca_count = option_nr; + cri->cri_options.ca_arrays = options; + cri->cri_details.ca_count = detail_nr; + cri->cri_details.ca_arrays = details; + + rc = dss_rpc_send(req); + if (rc != 0) + goto out; + + cro = crt_reply_get(req); + rc = cro->cro_status; + +out: + if (req != NULL) + crt_req_decref(req); + + D_CDEBUG(rc != 0, DLOG_ERR, DLOG_INFO, + "Rank %u report DAOS check to leader %u, gen "DF_X64", class %u, action %u, " + "result %d, "DF_UUIDF"/"DF_UUIDF", seq "DF_X64": "DF_RC"\n", rank, leader, + gen, cla, act, result, DP_UUID(pool), DP_UUID(cont), seq, DP_RC(rc)); + + return rc; +} + +int +chk_rejoin_remote(d_rank_t leader, uint64_t gen, d_rank_t rank, uuid_t iv_uuid, uint32_t *flags, + uint32_t *pool_nr, uuid_t **pools) +{ + crt_rpc_t *req; + struct chk_rejoin_in *cri; + struct chk_rejoin_out *cro; + uuid_t *tmp; + int rc; + + rc = chk_sg_rpc_prepare(leader, CHK_REJOIN, &req); + if (rc != 0) + goto out; + + cri = crt_req_get(req); + cri->cri_gen = gen; + cri->cri_rank = rank; + uuid_copy(cri->cri_iv_uuid, iv_uuid); + + rc = dss_rpc_send(req); + if (rc != 0) + goto out; + + cro = crt_reply_get(req); + rc = cro->cro_status; + if (rc == 0 && cro->cro_pools.ca_count > 0) { + *flags = cro->cro_flags; + D_ALLOC(tmp, cro->cro_pools.ca_count); + if (tmp == NULL) + D_GOTO(out, rc = -DER_NOMEM); + + memcpy(tmp, cro->cro_pools.ca_arrays, sizeof(*tmp) * cro->cro_pools.ca_count); + *pool_nr = cro->cro_pools.ca_count; + *pools = tmp; + } + +out: + if (req != NULL) + crt_req_decref(req); + + D_CDEBUG(rc != 0, DLOG_ERR, DLOG_INFO, + "Rank %u rejoin DAOS check with leader %u, gen "DF_X64", iv "DF_UUIDF": "DF_RC"\n", + rank, leader, gen, DP_UUID(iv_uuid), DP_RC(rc)); + + return rc; +} + +static int +crt_proc_struct_chk_policy(crt_proc_t proc, crt_proc_op_t proc_op, struct chk_policy *policy) +{ + int rc; + + rc = crt_proc_uint32_t(proc, proc_op, &policy->cp_class); + if (unlikely(rc != 0)) + return rc; + + return crt_proc_uint32_t(proc, proc_op, &policy->cp_action); +} + +static int +crt_proc_struct_chk_time(crt_proc_t proc, crt_proc_op_t proc_op, struct chk_time *time) +{ + int rc; + + rc = crt_proc_uint64_t(proc, proc_op, &time->ct_start_time); + if (unlikely(rc != 0)) + return rc; + + return crt_proc_uint64_t(proc, proc_op, &time->ct_start_time); +} + +static int +crt_proc_struct_chk_statistics(crt_proc_t proc, crt_proc_op_t proc_op, struct chk_statistics *cs) +{ + int rc; + + rc = crt_proc_uint64_t(proc, proc_op, &cs->cs_total); + if (unlikely(rc != 0)) + return rc; + + rc = crt_proc_uint64_t(proc, proc_op, &cs->cs_repaired); + if (unlikely(rc != 0)) + return rc; + + rc = crt_proc_uint64_t(proc, proc_op, &cs->cs_ignored); + if (unlikely(rc != 0)) + return rc; + + return crt_proc_uint64_t(proc, proc_op, &cs->cs_failed); +} + +static int +crt_proc_struct_chk_query_target(crt_proc_t proc, crt_proc_op_t proc_op, + struct chk_query_target *target) +{ + int rc; + + rc = crt_proc_d_rank_t(proc, proc_op, &target->cqt_rank); + if (unlikely(rc != 0)) + return rc; + + rc = crt_proc_uint32_t(proc, proc_op, &target->cqt_tgt); + if (unlikely(rc != 0)) + return rc; + + rc = crt_proc_uint32_t(proc, proc_op, &target->cqt_ins_status); + if (unlikely(rc != 0)) + return rc; + + rc = crt_proc_uint32_t(proc, proc_op, &target->cqt_padding); + if (unlikely(rc != 0)) + return rc; + + rc = crt_proc_struct_chk_statistics(proc, proc_op, &target->cqt_statistics); + if (unlikely(rc != 0)) + return rc; + + return crt_proc_struct_chk_time(proc, proc_op, &target->cqt_time); +} + +static int +crt_proc_struct_chk_query_pool_shard(crt_proc_t proc, crt_proc_op_t proc_op, + struct chk_query_pool_shard *shard) +{ + int rc; + int i; + + rc = crt_proc_uuid_t(proc, proc_op, &shard->cqps_uuid); + if (unlikely(rc != 0)) + return rc; + + rc = crt_proc_uint32_t(proc, proc_op, &shard->cqps_status); + if (unlikely(rc != 0)) + return rc; + + rc = crt_proc_uint32_t(proc, proc_op, &shard->cqps_phase); + if (unlikely(rc != 0)) + return rc; + + rc = crt_proc_struct_chk_statistics(proc, proc_op, &shard->cqps_statistics); + if (unlikely(rc != 0)) + return rc; + + rc = crt_proc_struct_chk_time(proc, proc_op, &shard->cqps_time); + if (unlikely(rc != 0)) + return rc; + + rc = crt_proc_uint32_t(proc, proc_op, &shard->cqps_rank); + if (unlikely(rc != 0)) + return rc; + + rc = crt_proc_uint32_t(proc, proc_op, &shard->cqps_target_nr); + if (unlikely(rc != 0)) + return rc; + + if (shard->cqps_target_nr == 0) + return 0; + + if (FREEING(proc_op)) { + D_FREE(shard->cqps_targets); + return 0; + } + + if (DECODING(proc_op)) { + D_ALLOC_ARRAY(shard->cqps_targets, shard->cqps_target_nr); + if (shard->cqps_targets == NULL) + return -DER_NOMEM; + } + + for (i = 0; i < shard->cqps_target_nr; i++) { + rc = crt_proc_struct_chk_query_target(proc, proc_op, &shard->cqps_targets[i]); + if (unlikely(rc != 0)) { + if (DECODING(proc_op)) + D_FREE(shard->cqps_targets); + return rc; + } + } + + return 0; +} + +static int +crp_proc_struct_rdb_clue(crt_proc_t proc, crt_proc_op_t proc_op, struct rdb_clue *rdb) +{ + int rc; + + rc = crt_proc_uint64_t(proc, proc_op, &rdb->bcl_term); + if (unlikely(rc != 0)) + return rc; + + rc = crt_proc_int32_t(proc, proc_op, &rdb->bcl_vote); + if (unlikely(rc != 0)) + return rc; + + rc = crt_proc_d_rank_t(proc, proc_op, &rdb->bcl_self); + if (unlikely(rc != 0)) + return rc; + + rc = crt_proc_uint64_t(proc, proc_op, &rdb->bcl_last_index); + if (unlikely(rc != 0)) + return rc; + + rc = crt_proc_uint64_t(proc, proc_op, &rdb->bcl_last_term); + if (unlikely(rc != 0)) + return rc; + + rc = crt_proc_uint64_t(proc, proc_op, &rdb->bcl_base_index); + if (unlikely(rc != 0)) + return rc; + + rc = crt_proc_uint64_t(proc, proc_op, &rdb->bcl_base_term); + if (unlikely(rc != 0)) + return rc; + + rc = crt_proc_d_rank_list_t(proc, proc_op, &rdb->bcl_replicas); + if (unlikely(rc != 0)) + return rc; + + return crt_proc_uint64_t(proc, proc_op, &rdb->bcl_oid_next); +} + +static int +crt_proc_struct_ds_pool_svc_clue(crt_proc_t proc, crt_proc_op_t proc_op, + struct ds_pool_svc_clue *psc) +{ + int rc; + + rc = crp_proc_struct_rdb_clue(proc, proc_op, &psc->psc_db_clue); + if (unlikely(rc != 0)) + return rc; + + return crt_proc_uint32_t(proc, proc_op, &psc->psc_map_version); +} + +static int +crt_proc_struct_ds_pool_clue(crt_proc_t proc, crt_proc_op_t proc_op, struct ds_pool_clue *clue) +{ + int rc; + int i; + + rc = crt_proc_uuid_t(proc, proc_op, &clue->pc_uuid); + if (unlikely(rc != 0)) + return rc; + + rc = crt_proc_d_rank_t(proc, proc_op, &clue->pc_rank); + if (unlikely(rc != 0)) + return rc; + + rc = crt_proc_uint32_t(proc, proc_op, &clue->pc_dir); + if (unlikely(rc != 0)) + return rc; + + rc = crt_proc_int32_t(proc, proc_op, &clue->pc_rc); + if (unlikely(rc != 0)) + return rc; + + rc = crt_proc_int32_t(proc, proc_op, &clue->pc_tgt_nr); + if (unlikely(rc != 0)) + return rc; + + rc = crt_proc_uint32_t(proc, proc_op, &clue->pc_label_len); + if (unlikely(rc != 0)) + return rc; + + rc = crt_proc_uint32_t(proc, proc_op, &clue->pc_phase); + if (unlikely(rc != 0)) + return rc; + + if (FREEING(proc_op)) + goto out; + + if (clue->pc_rc > 0) { + if (DECODING(proc_op)) { + D_ALLOC_PTR(clue->pc_svc_clue); + if (clue->pc_svc_clue == NULL) + D_GOTO(out, rc = -DER_NOMEM); + } + + rc = crt_proc_struct_ds_pool_svc_clue(proc, proc_op, clue->pc_svc_clue); + if (unlikely(rc != 0)) + goto out; + } + + if (clue->pc_label_len > 0) { + if (DECODING(proc_op)) { + D_ALLOC(clue->pc_label, clue->pc_label_len + 1); + if (clue->pc_label == NULL) + D_GOTO(out, rc = -DER_NOMEM); + } + + rc = crt_proc_memcpy(proc, proc_op, clue->pc_label, clue->pc_label_len); + if (unlikely(rc != 0)) + goto out; + } + + if (clue->pc_tgt_nr > 0) { + if (DECODING(proc_op)) { + D_ALLOC_ARRAY(clue->pc_tgt_status, clue->pc_tgt_nr); + if (clue->pc_tgt_status == NULL) + D_GOTO(out, rc = -DER_NOMEM); + } + + for (i = 0; i < clue->pc_tgt_nr; i++) { + rc = crt_proc_uint32_t(proc, proc_op, &clue->pc_tgt_status[i]); + if (unlikely(rc != 0)) + goto out; + } + } + +out: + if (unlikely(rc != 0 && DECODING(proc_op)) || FREEING(proc_op)) + ds_pool_clue_fini(clue); + + return rc; +} + +static int +crt_proc_struct_chk_pool_mbs(crt_proc_t proc, crt_proc_op_t proc_op, struct chk_pool_mbs *mbs) +{ + int rc; + int i; + + rc = crt_proc_d_rank_t(proc, proc_op, &mbs->cpm_rank); + if (unlikely(rc != 0)) + return rc; + + rc = crt_proc_uint32_t(proc, proc_op, &mbs->cpm_tgt_nr); + if (unlikely(rc != 0)) + return rc; + + if (FREEING(proc_op)) + goto out; + + if (mbs->cpm_tgt_nr > 0) { + if (DECODING(proc_op)) { + D_ALLOC_ARRAY(mbs->cpm_tgt_status, mbs->cpm_tgt_nr); + if (mbs->cpm_tgt_status == NULL) + D_GOTO(out, rc = -DER_NOMEM); + } + + for (i = 0; i < mbs->cpm_tgt_nr; i++) { + rc = crt_proc_uint32_t(proc, proc_op, &mbs->cpm_tgt_status[i]); + if (unlikely(rc != 0)) + goto out; + } + } + +out: + if (unlikely(rc != 0 && DECODING(proc_op)) || FREEING(proc_op)) + D_FREE(mbs->cpm_tgt_status); + + return rc; +} + +static int +crt_proc_struct_rsvc_hint(crt_proc_t proc, crt_proc_op_t proc_op, + struct rsvc_hint *hint) +{ + int rc; + + rc = crt_proc_uint32_t(proc, proc_op, &hint->sh_flags); + if (rc != 0) + return -DER_HG; + + rc = crt_proc_uint32_t(proc, proc_op, &hint->sh_rank); + if (rc != 0) + return -DER_HG; + + return crt_proc_uint64_t(proc, proc_op, &hint->sh_term); +} + +CRT_RPC_DEFINE(chk_start, DAOS_ISEQ_CHK_START, DAOS_OSEQ_CHK_START); +CRT_RPC_DEFINE(chk_stop, DAOS_ISEQ_CHK_STOP, DAOS_OSEQ_CHK_STOP); +CRT_RPC_DEFINE(chk_query, DAOS_ISEQ_CHK_QUERY, DAOS_OSEQ_CHK_QUERY); +CRT_RPC_DEFINE(chk_mark, DAOS_ISEQ_CHK_MARK, DAOS_OSEQ_CHK_MARK); +CRT_RPC_DEFINE(chk_act, DAOS_ISEQ_CHK_ACT, DAOS_OSEQ_CHK_ACT); +CRT_RPC_DEFINE(chk_cont_list, DAOS_ISEQ_CHK_CONT_LIST, DAOS_OSEQ_CHK_CONT_LIST); +CRT_RPC_DEFINE(chk_pool_start, DAOS_ISEQ_CHK_POOL_START, DAOS_OSEQ_CHK_POOL_START); +CRT_RPC_DEFINE(chk_pool_mbs, DAOS_ISEQ_CHK_POOL_MBS, DAOS_OSEQ_CHK_POOL_MBS); +CRT_RPC_DEFINE(chk_report, DAOS_ISEQ_CHK_REPORT, DAOS_OSEQ_CHK_REPORT); +CRT_RPC_DEFINE(chk_rejoin, DAOS_ISEQ_CHK_REJOIN, DAOS_OSEQ_CHK_REJOIN); diff --git a/src/chk/chk_srv.c b/src/chk/chk_srv.c new file mode 100644 index 00000000000..8f9b7f45e72 --- /dev/null +++ b/src/chk/chk_srv.c @@ -0,0 +1,379 @@ +/** + * (C) Copyright 2022-2023 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ + +#define D_LOGFAC DD_FAC(chk) + +#include +#include +#include +#include +#include + +#include "chk_internal.h" + +static void +ds_chk_start_hdlr(crt_rpc_t *rpc) +{ + struct chk_start_in *csi = crt_req_get(rpc); + struct chk_start_out *cso = crt_reply_get(rpc); + struct ds_pool_clues clues = { 0 }; + d_rank_t *rank; + int rc; + + rc = chk_engine_start(csi->csi_gen, csi->csi_ranks.ca_count, csi->csi_ranks.ca_arrays, + csi->csi_policies.ca_count, csi->csi_policies.ca_arrays, + csi->csi_uuids.ca_count, csi->csi_uuids.ca_arrays, csi->csi_api_flags, + csi->csi_phase, csi->csi_leader_rank, csi->csi_flags, + csi->csi_iv_uuid, &clues); + if (rc > 0) { + D_ALLOC_PTR(rank); + if (rank == NULL) { + rc = -DER_NOMEM; + cso->cso_cmp_ranks.ca_count = 0; + cso->cso_cmp_ranks.ca_arrays = NULL; + } else { + *rank = dss_self_rank(); + cso->cso_cmp_ranks.ca_count = 1; + cso->cso_cmp_ranks.ca_arrays = rank; + } + } else { + cso->cso_cmp_ranks.ca_count = 0; + cso->cso_cmp_ranks.ca_arrays = NULL; + } + + cso->cso_status = rc; + cso->cso_clues.ca_count = clues.pcs_len; + cso->cso_clues.ca_arrays = clues.pcs_array; + cso->cso_rank_cap = cso->cso_cmp_ranks.ca_count; + cso->cso_clue_cap = cso->cso_clues.ca_count; + + rc = crt_reply_send(rpc); + if (rc != 0) + D_ERROR("Failed to reply check start: "DF_RC"\n", DP_RC(rc)); + + /* @clues will be freed via chk_start_post_reply. Do not free it here. */ +} + +static void +ds_chk_stop_hdlr(crt_rpc_t *rpc) +{ + struct chk_stop_in *csi = crt_req_get(rpc); + struct chk_stop_out *cso = crt_reply_get(rpc); + d_rank_t *rank; + int rc; + + rc = chk_engine_stop(csi->csi_gen, csi->csi_uuids.ca_count, csi->csi_uuids.ca_arrays, + &cso->cso_flags); + if (rc > 0) { + D_ALLOC_PTR(rank); + if (rank == NULL) { + rc = -DER_NOMEM; + cso->cso_ranks.ca_count = 0; + cso->cso_ranks.ca_arrays = NULL; + } else { + *rank = dss_self_rank(); + cso->cso_ranks.ca_count = 1; + cso->cso_ranks.ca_arrays = rank; + } + } else { + cso->cso_ranks.ca_count = 0; + cso->cso_ranks.ca_arrays = NULL; + } + + + cso->cso_status = rc; + cso->cso_cap = cso->cso_ranks.ca_count; + rc = crt_reply_send(rpc); + if (rc != 0) + D_ERROR("Failed to reply check stop: "DF_RC"\n", DP_RC(rc)); +} + +static void +ds_chk_query_hdlr(crt_rpc_t *rpc) +{ + struct chk_query_in *cqi = crt_req_get(rpc); + struct chk_query_out *cqo = crt_reply_get(rpc); + struct chk_query_pool_shard *shards = NULL; + uint32_t shard_nr = 0; + int rc; + + rc = chk_engine_query(cqi->cqi_gen, cqi->cqi_uuids.ca_count, cqi->cqi_uuids.ca_arrays, + &cqo->cqo_ins_status, &cqo->cqo_ins_phase, &shard_nr, &shards, + &cqo->cqo_gen); + if (rc != 0) { + cqo->cqo_status = rc; + cqo->cqo_cap = 0; + cqo->cqo_shards.ca_count = 0; + cqo->cqo_shards.ca_arrays = NULL; + } else { + cqo->cqo_status = 0; + cqo->cqo_cap = shard_nr; + cqo->cqo_shards.ca_count = shard_nr; + cqo->cqo_shards.ca_arrays = shards; + } + + rc = crt_reply_send(rpc); + if (rc != 0) + D_ERROR("Failed to reply check query: "DF_RC"\n", DP_RC(rc)); + + /* @shards will be freed via chk_query_post_reply. Do not free it here. */ +} + +static void +ds_chk_mark_hdlr(crt_rpc_t *rpc) +{ + struct chk_mark_in *cmi = crt_req_get(rpc); + struct chk_mark_out *cmo = crt_reply_get(rpc); + int rc; + + rc = chk_engine_mark_rank_dead(cmi->cmi_gen, cmi->cmi_rank, cmi->cmi_version); + + cmo->cmo_status = rc; + rc = crt_reply_send(rpc); + if (rc != 0) + D_ERROR("Failed to reply check mark rank dead: "DF_RC"\n", DP_RC(rc)); +} + +static void +ds_chk_act_hdlr(crt_rpc_t *rpc) +{ + struct chk_act_in *cai = crt_req_get(rpc); + struct chk_act_out *cao = crt_reply_get(rpc); + int rc; + + rc = chk_engine_act(cai->cai_gen, cai->cai_seq, cai->cai_cla, cai->cai_act, cai->cai_flags); + + cao->cao_status = rc; + rc = crt_reply_send(rpc); + if (rc != 0) + D_ERROR("Failed to reply check act: "DF_RC"\n", DP_RC(rc)); +} + +static void +ds_chk_cont_list_hdlr(crt_rpc_t *rpc) +{ + struct chk_cont_list_in *ccli = crt_req_get(rpc); + struct chk_cont_list_out *cclo = crt_reply_get(rpc); + uuid_t *conts = NULL; + uint32_t count = 0; + int rc = 0; + + rc = chk_engine_cont_list(ccli->ccli_gen, ccli->ccli_pool, &conts, &count); + + cclo->cclo_status = rc; + cclo->cclo_cap = count; + cclo->cclo_conts.ca_arrays = conts; + cclo->cclo_conts.ca_count = count; + + rc = crt_reply_send(rpc); + if (rc != 0) + D_ERROR("Failed to reply check cont list: "DF_RC"\n", DP_RC(rc)); + + /* @conts will be freed via chk_cont_list_post_reply. Do not free it here. */ +} + +static void +ds_chk_pool_start_hdlr(crt_rpc_t *rpc) +{ + struct chk_pool_start_in *cpsi = crt_req_get(rpc); + struct chk_pool_start_out *cpso = crt_reply_get(rpc); + int rc; + + rc = chk_engine_pool_start(cpsi->cpsi_gen, cpsi->cpsi_pool, cpsi->cpsi_phase, + cpsi->cpsi_flags); + + cpso->cpso_status = rc; + cpso->cpso_rank = dss_self_rank(); + rc = crt_reply_send(rpc); + if (rc != 0) + D_ERROR("Failed to reply check pool start: "DF_RC"\n", DP_RC(rc)); +} + +static void +ds_chk_pool_mbs_hdlr(crt_rpc_t *rpc) +{ + struct chk_pool_mbs_in *cpmi = crt_req_get(rpc); + struct chk_pool_mbs_out *cpmo = crt_reply_get(rpc); + int rc; + + rc = chk_engine_pool_mbs(cpmi->cpmi_gen, cpmi->cpmi_pool, cpmi->cpmi_phase, + cpmi->cpmi_label, cpmi->cpmi_label_seq, cpmi->cpmi_flags, + cpmi->cpmi_targets.ca_count, cpmi->cpmi_targets.ca_arrays, + &cpmo->cpmo_hint); + + cpmo->cpmo_status = rc; + rc = crt_reply_send(rpc); + if (rc != 0) + D_ERROR("Failed to reply check pool mbs: "DF_RC"\n", DP_RC(rc)); +} + +static void +ds_chk_report_hdlr(crt_rpc_t *rpc) +{ + struct chk_report_in *cri = crt_req_get(rpc); + struct chk_report_out *cro = crt_reply_get(rpc); + struct chk_report_unit cru; + int rc; + + cru.cru_gen = cri->cri_gen; + cru.cru_cla = cri->cri_ics_class; + cru.cru_act = cri->cri_ics_action; + cru.cru_target = cri->cri_target; + cru.cru_rank = cri->cri_rank; + cru.cru_option_nr = cri->cri_options.ca_count; + cru.cru_detail_nr = cri->cri_details.ca_count; + cru.cru_pool = &cri->cri_pool; + cru.cru_pool_label = cri->cri_pool_label; + cru.cru_cont = &cri->cri_cont; + cru.cru_cont_label = cri->cri_cont_label; + cru.cru_obj = &cri->cri_obj; + cru.cru_dkey = &cri->cri_dkey; + cru.cru_akey = &cri->cri_akey; + cru.cru_msg = cri->cri_msg; + cru.cru_options = cri->cri_options.ca_arrays; + cru.cru_details = cri->cri_details.ca_arrays; + cru.cru_result = cri->cri_ics_result; + + rc = chk_leader_report(&cru, &cri->cri_seq, NULL); + + cro->cro_status = rc; + rc = crt_reply_send(rpc); + if (rc != 0) + D_ERROR("Failed to reply check report: "DF_RC"\n", DP_RC(rc)); +} + +static void +ds_chk_rejoin_hdlr(crt_rpc_t *rpc) +{ + struct chk_rejoin_in *cri = crt_req_get(rpc); + struct chk_rejoin_out *cro = crt_reply_get(rpc); + uuid_t *pools = NULL; + int pool_nr = 0; + int rc; + + rc = chk_leader_rejoin(cri->cri_gen, cri->cri_rank, cri->cri_iv_uuid, &cro->cro_flags, + &pool_nr, &pools); + + cro->cro_status = rc; + if (rc == 0) { + cro->cro_pools.ca_count = pool_nr; + cro->cro_pools.ca_arrays = pools; + } + + rc = crt_reply_send(rpc); + if (rc != 0) + D_ERROR("Failed to reply check rejoin: "DF_RC"\n", DP_RC(rc)); + + D_FREE(pools); +} + +static int +ds_chk_init(void) +{ + int rc; + + rc = dbtree_class_register(DBTREE_CLASS_CHK_POOL, 0, &chk_pool_ops); + if (rc != 0) + goto out; + + rc = dbtree_class_register(DBTREE_CLASS_CHK_RANK, 0, &chk_rank_ops); + if (rc != 0) + goto out; + + rc = dbtree_class_register(DBTREE_CLASS_CHK_PA, 0, &chk_pending_ops); + if (rc != 0) + goto out; + + rc = dbtree_class_register(DBTREE_CLASS_CHK_CONT, 0, &chk_cont_ops); + if (rc != 0) + goto out; + + rc = chk_iv_init(); + +out: + return rc; +} + +static int +ds_chk_fini(void) +{ + return chk_iv_fini(); +} + +static int +ds_chk_setup(void) +{ + int rc; + + /* Do NOT move chk_vos_init into ds_chk_init, because sys_db is not ready at that time. */ + chk_vos_init(); + + rc = chk_leader_init(); + if (rc != 0) + goto out_vos; + + rc = chk_engine_init(); + if (rc != 0) + goto out_leader; + + /* + * Currently, we do NOT support leader to rejoin the former check instance. Because we do + * not support leader switch, during current leader down time, the reported inconsistency + * and related repair result are lost. Under such case, the admin has to stop and restart + * the check explicitly. + */ + + rc = dss_ult_create(chk_engine_rejoin, NULL, DSS_XS_SYS, 0, 0, NULL); + D_ASSERT(rc == 0); + + goto out_done; + +out_leader: + chk_leader_fini(); +out_vos: + chk_vos_fini(); +out_done: + return rc; +} + +static int +ds_chk_cleanup(void) +{ + chk_engine_pause(); + chk_leader_pause(); + chk_engine_fini(); + chk_leader_fini(); + chk_vos_fini(); + + return 0; +} + +#define X(a, b, c, d, e) \ +{ \ + .dr_opc = a, \ + .dr_hdlr = d, \ + .dr_corpc_ops = e, \ +} + +static struct daos_rpc_handler chk_handlers[] = { + CHK_PROTO_SRV_RPC_LIST, +}; + +#undef X + +struct dss_module chk_module = { + .sm_name = "chk", + .sm_mod_id = DAOS_CHK_MODULE, + .sm_ver = DAOS_CHK_VERSION, + .sm_init = ds_chk_init, + .sm_fini = ds_chk_fini, + .sm_setup = ds_chk_setup, + .sm_cleanup = ds_chk_cleanup, + .sm_proto_count = 1, + .sm_proto_fmt = {&chk_proto_fmt}, + .sm_cli_count = {0}, + .sm_handlers = {chk_handlers}, +}; diff --git a/src/chk/chk_upcall.c b/src/chk/chk_upcall.c new file mode 100644 index 00000000000..893b7d1ec32 --- /dev/null +++ b/src/chk/chk_upcall.c @@ -0,0 +1,174 @@ +/* + * (C) Copyright 2022 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ + +#define D_LOGFAC DD_FAC(chk) + +#include +#include +#include +#include +#include +#include + +#include "chk.pb-c.h" +#include "chk_internal.h" + +#define CHK_ACTION_MAX CHK__CHECK_INCONSIST_ACTION__CIA_TRUST_EC_DATA + +static void +chk_sg_free(char **buf, int cnt) +{ + int i; + + if (buf != NULL) { + for (i = 0; i < cnt; i++) + D_FREE(buf[i]); + D_FREE(buf); + } +} + +static int +chk_sg_list2string_array(d_sg_list_t *sgls, uint32_t sgl_nr, char ***array) +{ + char **buf = NULL; + int cnt = 0; + int rc = 0; + int i; + int j; + int k; + + for (i = 0; i < sgl_nr; i++) + cnt += sgls[i].sg_nr; + + if (unlikely(cnt == 0)) + goto out; + + D_ALLOC_ARRAY(buf, cnt); + if (buf == NULL) + D_GOTO(out, rc = -DER_NOMEM); + + /* QUEST: How to transfer all the data into d_sg_list_t array? Some may be not string. */ + + for (i = 0, k = 0; i < sgl_nr; i++) { + for (j = 0; j < sgls[i].sg_nr; j++, k++) { + rc = chk_dup_string(&buf[k], sgls[i].sg_iovs[j].iov_buf, + sgls[i].sg_iovs[j].iov_len); + if (rc != 0) + goto out; + } + } + +out: + if (rc == 0) { + *array = buf; + } else { + chk_sg_free(buf, cnt); + cnt = rc; + } + + return cnt; +} + +int +chk_report_upcall(uint64_t gen, uint64_t seq, uint32_t cla, uint32_t act, int result, + d_rank_t rank, uint32_t target, uuid_t *pool, char *pool_label, + uuid_t *cont, char *cont_label, daos_unit_oid_t *obj, + daos_key_t *dkey, daos_key_t *akey, char *msg, uint32_t option_nr, + uint32_t *options, uint32_t detail_nr, d_sg_list_t *details) +{ + Chk__CheckReport report = CHK__CHECK_REPORT__INIT; + time_t tm = time(NULL); + int rc; + + report.seq = seq; + report.class_ = cla; + report.action = act; + report.result = result; + report.rank = rank; + report.target = target; + + if (pool != NULL && !uuid_is_null(*pool)) { + D_ASPRINTF(report.pool_uuid, DF_UUIDF, DP_UUID(*pool)); + if (report.pool_uuid == NULL) + D_GOTO(out, rc = -DER_NOMEM); + } else { + report.pool_uuid = NULL; + } + + report.pool_label = pool_label; + + if (cont != NULL && !uuid_is_null(*cont)) { + D_ASPRINTF(report.cont_uuid, DF_UUIDF, DP_UUID(*cont)); + if (report.cont_uuid == NULL) + D_GOTO(out, rc = -DER_NOMEM); + } else { + report.cont_uuid = NULL; + } + + report.cont_label = cont_label; + + if (obj != NULL && !daos_unit_oid_is_null(*obj)) { + D_ASPRINTF(report.objid, DF_UOID, DP_UOID(*obj)); + if (report.objid == NULL) + D_GOTO(out, rc = -DER_NOMEM); + } else { + report.objid = NULL; + } + + if (!daos_iov_empty(dkey)) { + D_ASPRINTF(report.dkey, DF_KEY, DP_KEY(dkey)); + if (report.dkey == NULL) + D_GOTO(out, rc = -DER_NOMEM); + } else { + report.dkey = NULL; + } + + if (!daos_iov_empty(akey)) { + D_ASPRINTF(report.akey, DF_KEY, DP_KEY(akey)); + if (report.akey == NULL) + D_GOTO(out, rc = -DER_NOMEM); + } else { + report.akey = NULL; + } + + D_ASPRINTF(report.timestamp, "%s", ctime(&tm)); + if (report.timestamp == NULL) + D_GOTO(out, rc = -DER_NOMEM); + + report.msg = msg; + report.n_act_choices = option_nr; + report.act_choices = option_nr != 0 ? options : NULL; + + if (detail_nr != 0) { + D_ASSERT(details != NULL); + + rc = chk_sg_list2string_array(details, detail_nr, &report.act_details); + if (rc < 0) + goto out; + + report.n_act_details = rc; + } else { + report.n_act_details = 0; + report.act_details = NULL; + } + + rc = ds_chk_report_upcall(&report); + +out: + D_FREE(report.pool_uuid); + D_FREE(report.cont_uuid); + D_FREE(report.objid); + D_FREE(report.dkey); + D_FREE(report.akey); + D_FREE(report.timestamp); + chk_sg_free(report.act_details, report.n_act_details); + + D_CDEBUG(rc != 0, DLOG_ERR, DLOG_INFO, + "Check leader upcall for instance "DF_X64" for seq "DF_X64": "DF_RC"\n", + gen, seq, DP_RC(rc)); + + return rc; +} diff --git a/src/chk/chk_vos.c b/src/chk/chk_vos.c new file mode 100644 index 00000000000..fdefc2995f7 --- /dev/null +++ b/src/chk/chk_vos.c @@ -0,0 +1,307 @@ +/** + * (C) Copyright 2022 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ + +#define D_LOGFAC DD_FAC(chk) + +#include +#include +#include + +#include "chk_internal.h" + +static struct sys_db *chk_db; + +static int +chk_db_fetch(char *key, int key_size, void *val, int val_size) +{ + d_iov_t key_iov; + d_iov_t val_iov; + + d_iov_set(&key_iov, key, key_size); + d_iov_set(&val_iov, val, val_size); + + return chk_db->sd_fetch(chk_db, CHK_DB_TABLE, &key_iov, &val_iov); +} + +static int +chk_db_update(char *key, int key_size, void *val, int val_size) +{ + d_iov_t key_iov; + d_iov_t val_iov; + int rc; + + if (chk_db->sd_tx_begin) { + rc = chk_db->sd_tx_begin(chk_db); + if (rc != 0) + goto out; + } + + d_iov_set(&key_iov, key, key_size); + d_iov_set(&val_iov, val, val_size); + + rc = chk_db->sd_upsert(chk_db, CHK_DB_TABLE, &key_iov, &val_iov); + + if (chk_db->sd_tx_end) + rc = chk_db->sd_tx_end(chk_db, rc); + +out: + return rc; +} + +static int +chk_db_delete(char *key, int key_size) +{ + d_iov_t key_iov; + int rc; + + if (chk_db->sd_tx_begin) { + rc = chk_db->sd_tx_begin(chk_db); + if (rc != 0) + goto out; + } + + d_iov_set(&key_iov, key, key_size); + + rc = chk_db->sd_delete(chk_db, CHK_DB_TABLE, &key_iov); + + if (chk_db->sd_tx_end) + rc = chk_db->sd_tx_end(chk_db, rc); + +out: + return rc; +} + +static int +chk_db_traverse(sys_db_trav_cb_t cb, void *args) +{ + return chk_db->sd_traverse(chk_db, CHK_DB_TABLE, cb, args); +} + +int +chk_bk_fetch_leader(struct chk_bookmark *cbk) +{ + int rc; + + rc = chk_db_fetch(CHK_BK_LEADER, strlen(CHK_BK_LEADER), cbk, sizeof(*cbk)); + if (rc != 0 && rc != -DER_NONEXIST) + D_ERROR("Failed to fetch leader bookmark on rank %u: "DF_RC"\n", + dss_self_rank(), DP_RC(rc)); + + return rc; +} + +int +chk_bk_update_leader(struct chk_bookmark *cbk) +{ + int rc; + + rc = chk_db_update(CHK_BK_LEADER, strlen(CHK_BK_LEADER), cbk, sizeof(*cbk)); + DL_CDEBUG(rc == 0, DLOG_INFO, DLOG_ERR, rc, + "Update leader bookmark on rank %u, status %u, phase %u", + dss_self_rank(), cbk->cb_ins_status, cbk->cb_phase); + + return rc; +} + +int +chk_bk_delete_leader(void) +{ + int rc; + + rc = chk_db_delete(CHK_BK_LEADER, strlen(CHK_BK_LEADER)); + if (rc != 0) + D_ERROR("Failed to delete leader bookmark on rank %u: "DF_RC"\n", + dss_self_rank(), DP_RC(rc)); + + return rc; +} + +int +chk_bk_fetch_engine(struct chk_bookmark *cbk) +{ + int rc; + + rc = chk_db_fetch(CHK_BK_ENGINE, strlen(CHK_BK_ENGINE), cbk, sizeof(*cbk)); + if (rc != 0 && rc != -DER_NONEXIST) + D_ERROR("Failed to fetch engine bookmark on rank %u: "DF_RC"\n", + dss_self_rank(), DP_RC(rc)); + + return rc; +} + +int +chk_bk_update_engine(struct chk_bookmark *cbk) +{ + int rc; + + rc = chk_db_update(CHK_BK_ENGINE, strlen(CHK_BK_ENGINE), cbk, sizeof(*cbk)); + DL_CDEBUG(rc == 0, DLOG_INFO, DLOG_ERR, rc, + "Update engine bookmark on rank %u, status %u, phase %u", + dss_self_rank(), cbk->cb_ins_status, cbk->cb_phase); + + return rc; +} + +int +chk_bk_delete_engine(void) +{ + int rc; + + rc = chk_db_delete(CHK_BK_ENGINE, strlen(CHK_BK_ENGINE)); + if (rc != 0) + D_ERROR("Failed to delete engine bookmark on rank %u: "DF_RC"\n", + dss_self_rank(), DP_RC(rc)); + + return rc; +} + +int +chk_bk_fetch_pool(struct chk_bookmark *cbk, char *uuid_str) +{ + int rc; + + rc = chk_db_fetch(uuid_str, strlen(uuid_str), cbk, sizeof(*cbk)); + if (rc != 0 && rc != -DER_NONEXIST) + D_ERROR("Failed to fetch pool %s bookmark on rank %u: "DF_RC"\n", + uuid_str, dss_self_rank(), DP_RC(rc)); + + return rc; +} + +int +chk_bk_update_pool(struct chk_bookmark *cbk, char *uuid_str) +{ + int rc; + + rc = chk_db_update(uuid_str, strlen(uuid_str), cbk, sizeof(*cbk)); + DL_CDEBUG(rc == 0, DLOG_INFO, DLOG_ERR, rc, + "Update pool %s bookmark on rank %u, status %u, phase %u", + uuid_str, dss_self_rank(), cbk->cb_ins_status, cbk->cb_phase); + + return rc; +} + +int +chk_bk_delete_pool(char *uuid_str) +{ + int rc; + + rc = chk_db_delete(uuid_str, strlen(uuid_str)); + if (rc != 0) + D_ERROR("Failed to delete pool %s bookmark on rank %u: "DF_RC"\n", + uuid_str, dss_self_rank(), DP_RC(rc)); + + return rc; +} + +int +chk_prop_fetch(struct chk_property *cpp, d_rank_list_t **rank_list) +{ + d_rank_list_t *ranks = NULL; + int rc; + + rc = chk_db_fetch(CHK_PROPERTY, strlen(CHK_PROPERTY), cpp, sizeof(*cpp)); + if (rc == 0 && cpp->cp_rank_nr != 0 && rank_list != NULL) { + ranks = d_rank_list_alloc(cpp->cp_rank_nr); + if (ranks == NULL) + D_GOTO(out, rc = -DER_NOMEM); + + rc = chk_db_fetch(CHK_RANKS, strlen(CHK_RANKS), ranks->rl_ranks, + sizeof(*ranks->rl_ranks) * ranks->rl_nr); + /* + * CHK_PROPERTY and CHK_RANKS are bound together, either both exist or both not. + * Otherwise there is local corruption. + */ + if (rc == -DER_NONEXIST) { + d_rank_list_free(ranks); + ranks = NULL; + rc = -DER_IO; + } + + if (rc != 0) + goto out; + } + +out: + if (rank_list != NULL) + *rank_list = ranks; + + if (rc != 0 && rc != -DER_NONEXIST) + D_ERROR("Failed to fetch check property on rank %u: "DF_RC"\n", + dss_self_rank(), DP_RC(rc)); + + return rc; +} + +int +chk_prop_update(struct chk_property *cpp, d_rank_list_t *rank_list) +{ + d_iov_t key_iov; + d_iov_t val_iov; + int rc; + + if (chk_db->sd_tx_begin) { + rc = chk_db->sd_tx_begin(chk_db); + if (rc != 0) + goto out; + } + + if (cpp->cp_rank_nr != 0 && rank_list != NULL) { + D_ASSERTF(cpp->cp_rank_nr == rank_list->rl_nr, "Invalid rank nr %u/%u\n", + cpp->cp_rank_nr, rank_list->rl_nr); + + d_iov_set(&key_iov, CHK_RANKS, strlen(CHK_RANKS)); + d_iov_set(&val_iov, rank_list->rl_ranks, + sizeof(*rank_list->rl_ranks) * rank_list->rl_nr); + + rc = chk_db->sd_upsert(chk_db, CHK_DB_TABLE, &key_iov, &val_iov); + if (rc != 0) + goto end; + } + + d_iov_set(&key_iov, CHK_PROPERTY, strlen(CHK_PROPERTY)); + d_iov_set(&val_iov, cpp, sizeof(*cpp)); + + rc = chk_db->sd_upsert(chk_db, CHK_DB_TABLE, &key_iov, &val_iov); + +end: + if (chk_db->sd_tx_end) + rc = chk_db->sd_tx_end(chk_db, rc); + +out: + if (rc != 0) + D_ERROR("Failed to update check property on rank %u: "DF_RC"\n", + dss_self_rank(), DP_RC(rc)); + + return rc; +} + +int +chk_traverse_pools(sys_db_trav_cb_t cb, void *args) +{ + int rc; + + rc = chk_db_traverse(cb, args); + if (rc == -DER_NONEXIST) + rc = 0; + else if (rc < 0) + D_ERROR("Failed to traverse pools on rank %u for pause: "DF_RC"\n", + dss_self_rank(), DP_RC(rc)); + + return rc; +} + +void +chk_vos_init(void) +{ + chk_db = vos_db_get(); +} + +void +chk_vos_fini(void) +{ + chk_db = NULL; +} diff --git a/src/common/pool_map.c b/src/common/pool_map.c index cdf4bd50b46..4780078a155 100644 --- a/src/common/pool_map.c +++ b/src/common/pool_map.c @@ -421,6 +421,7 @@ pool_buf_attach(struct pool_buf *buf, struct pool_component *comps, buf->pb_domain_nr++; buf->pb_comps[nr] = comps[0]; + buf->pb_comps[nr].co_flags &= ~PO_COMPF_CHK_DONE; D_DEBUG(DB_TRACE, "nr %d %s\n", nr, pool_comp_type2str(comps[0].co_type)); @@ -2549,7 +2550,7 @@ pmap_comp_failed(struct pool_component *comp) { return (comp->co_status == PO_COMP_ST_DOWN) || (comp->co_status == PO_COMP_ST_DOWNOUT && - comp->co_flags == PO_COMPF_DOWN2OUT); + comp->co_flags & PO_COMPF_DOWN2OUT); } static bool @@ -3092,6 +3093,18 @@ pool_map_set_version(struct pool_map *map, uint32_t version) return 0; } +/** + * Bump the pool map version. + */ +uint32_t +pool_map_bump_version(struct pool_map *map) +{ + map->po_version++; + D_DEBUG(DB_TRACE, "Bump pool map to version %u\n", map->po_version); + + return map->po_version; +} + int pool_map_get_failed_cnt(struct pool_map *map, uint32_t domain) { diff --git a/src/common/proc.c b/src/common/proc.c index 6b6f0fe5857..703658afd45 100644 --- a/src/common/proc.c +++ b/src/common/proc.c @@ -204,3 +204,49 @@ crt_proc_daos_prop_t(crt_proc_t proc, crt_proc_op_t proc_op, daos_prop_t **data) return -DER_INVAL; } } + +int +crt_proc_d_sg_list_t(crt_proc_t proc, crt_proc_op_t proc_op, d_sg_list_t *p) +{ + int i; + int rc; + + if (FREEING(proc_op)) { + /* NB: don't need free in crt_proc_d_iov_t() */ + D_FREE(p->sg_iovs); + return 0; + } + + rc = crt_proc_uint32_t(proc, proc_op, &p->sg_nr); + if (unlikely(rc)) + return rc; + + rc = crt_proc_uint32_t(proc, proc_op, &p->sg_nr_out); + if (unlikely(rc)) + return rc; + + if (p->sg_nr == 0) + return 0; + + switch (proc_op) { + case CRT_PROC_DECODE: + D_ALLOC_ARRAY(p->sg_iovs, p->sg_nr); + if (p->sg_iovs == NULL) + return -DER_NOMEM; + /* fall through to fill sg_iovs */ + case CRT_PROC_ENCODE: + for (i = 0; i < p->sg_nr; i++) { + rc = crt_proc_d_iov_t(proc, proc_op, &p->sg_iovs[i]); + if (unlikely(rc)) { + if (DECODING(proc_op)) + D_FREE(p->sg_iovs); + return rc; + } + } + break; + default: + return -DER_INVAL; + } + + return rc; +} diff --git a/src/common/tests_dmg_helpers.c b/src/common/tests_dmg_helpers.c index 949fe8358aa..29826e6df80 100644 --- a/src/common/tests_dmg_helpers.c +++ b/src/common/tests_dmg_helpers.c @@ -524,6 +524,57 @@ print_acl_entry(FILE *outstream, struct daos_prop_entry *acl_entry) return rc; } +static int +parse_dmg_string(struct json_object *obj, const char *key, char **tgt) +{ + struct json_object *tmp; + const char *str; + + if (!json_object_object_get_ex(obj, key, &tmp)) { + D_ERROR("Unable to extract %s from check query result\n", key); + return -DER_INVAL; + } + + str = json_object_get_string(tmp); + if (str == NULL) { + D_ERROR("Got empty %s from check query result\n", key); + return -DER_INVAL; + } + + D_STRNDUP(*tgt, str, strlen(str)); + if (*tgt == NULL) { + D_ERROR("Failed to dup %s from check query result\n", key); + return -DER_NOMEM; + } + + return 0; +} + +static int +parse_dmg_uuid(struct json_object *obj, const char *key, uuid_t uuid) +{ + struct json_object *tmp; + const char *str; + int rc; + + if (!json_object_object_get_ex(obj, key, &tmp)) { + D_ERROR("Unable to extract %s from check query result\n", key); + return -DER_INVAL; + } + + str = json_object_get_string(tmp); + if (str == NULL) { + D_ERROR("Got empty %s from check query result\n", key); + return -DER_INVAL; + } + + rc = uuid_parse(str, uuid); + if (rc != 0) + D_ERROR("Failed to parse uuid %s from check query result\n", str); + + return rc; +} + int dmg_pool_set_prop(const char *dmg_config_file, const char *prop_name, const char *prop_value, @@ -560,6 +611,58 @@ dmg_pool_set_prop(const char *dmg_config_file, return rc; } +int +dmg_pool_get_prop(const char *dmg_config_file, const char *label, + const uuid_t uuid, const char *name, char **value) +{ + char uuid_str[DAOS_UUID_STR_SIZE]; + int argcount = 0; + char **args = NULL; + struct json_object *dmg_out = NULL; + int len; + int rc = 0; + + D_ASSERT(name != NULL); + D_ASSERT(value != NULL); + + if (label != NULL) { + args = cmd_push_arg(args, &argcount, "%s %s", label, name); + } else { + uuid_unparse_lower(uuid, uuid_str); + args = cmd_push_arg(args, &argcount, "%s %s", uuid_str, name); + } + if (args == NULL) + D_GOTO(out, rc = -DER_NOMEM); + + rc = daos_dmg_json_pipe("pool get-prop", dmg_config_file, args, argcount, &dmg_out); + if (rc != 0) { + D_ERROR("pool get-prop for %s failed: %d\n", label != NULL ? label : uuid_str, rc); + goto out_json; + } + + D_ASSERT(dmg_out != NULL); + + if (json_object_is_type(dmg_out, json_type_null)) { + D_ERROR("Cannot find the property %s for %s\n", + name, label != NULL ? label : uuid_str); + D_GOTO(out_json, rc = -DER_ENOENT); + } + + len = json_object_array_length(dmg_out); + D_ASSERTF(len >= 1, "Invalid prop entries count: %d\n", len); + + rc = parse_dmg_string(json_object_array_get_idx(dmg_out, 0), "value", value); + +out_json: + if (dmg_out != NULL) + json_object_put(dmg_out); + + cmd_free_args(args, argcount); + +out: + return rc; +} + int dmg_pool_create(const char *dmg_config_file, uid_t uid, gid_t gid, const char *grp, @@ -1422,9 +1525,11 @@ int dmg_system_stop_rank(const char *dmg_config_file, d_rank_t rank, int force) struct json_object *dmg_out = NULL; int rc = 0; - args = cmd_push_arg(args, &argcount, " -r %d ", rank); - if (args == NULL) - D_GOTO(out, rc = -DER_NOMEM); + if (rank != CRT_NO_RANK) { + args = cmd_push_arg(args, &argcount, " -r %d ", rank); + if (args == NULL) + D_GOTO(out, rc = -DER_NOMEM); + } if (force != 0) { args = cmd_push_arg(args, &argcount, " --force "); @@ -1452,22 +1557,81 @@ int dmg_system_start_rank(const char *dmg_config_file, d_rank_t rank) struct json_object *dmg_out = NULL; int rc = 0; + if (rank != CRT_NO_RANK) { + args = cmd_push_arg(args, &argcount, " -r %d ", rank); + if (args == NULL) + D_GOTO(out, rc = -DER_NOMEM); + } + + rc = daos_dmg_json_pipe("system start", dmg_config_file, + args, argcount, &dmg_out); + if (rc != 0) + D_ERROR("dmg failed\n"); + + if (dmg_out != NULL) + json_object_put(dmg_out); + + cmd_free_args(args, argcount); +out: + return rc; +} + +int dmg_system_reint_rank(const char *dmg_config_file, d_rank_t rank) +{ + int argcount = 0; + char **args = NULL; + struct json_object *dmg_out = NULL; + int rc = 0; + + if (rank == CRT_NO_RANK) + D_GOTO(out, rc = -DER_INVAL); + args = cmd_push_arg(args, &argcount, " -r %d ", rank); if (args == NULL) D_GOTO(out, rc = -DER_NOMEM); - rc = daos_dmg_json_pipe("system start", dmg_config_file, + rc = daos_dmg_json_pipe("system clear-exclude", dmg_config_file, args, argcount, &dmg_out); if (rc != 0) - D_ERROR("dmg failed\n"); + D_ERROR("dmg system clear-exclude failed\n"); + + if (dmg_out != NULL) + json_object_put(dmg_out); + + cmd_free_args(args, argcount); + +out: + return rc; +} + +int dmg_system_exclude_rank(const char *dmg_config_file, d_rank_t rank) +{ + int argcount = 0; + char **args = NULL; + struct json_object *dmg_out = NULL; + int rc = 0; + + if (rank == CRT_NO_RANK) + D_GOTO(out, rc = -DER_INVAL); + + args = cmd_push_arg(args, &argcount, " -r %d ", rank); + if (args == NULL) + D_GOTO(out, rc = -DER_NOMEM); + + rc = daos_dmg_json_pipe("system exclude", dmg_config_file, + args, argcount, &dmg_out); + if (rc != 0) + D_ERROR("dmg system exclude failed\n"); if (dmg_out != NULL) json_object_put(dmg_out); cmd_free_args(args, argcount); + out: return rc; } + const char * daos_target_state_enum_to_str(int state) { @@ -1482,3 +1646,459 @@ daos_target_state_enum_to_str(int state) return "Undefined State"; } + +int +dmg_fault_inject(const char *dmg_config_file, uuid_t uuid, bool mgmt, const char *fault) +{ + char uuid_str[DAOS_UUID_STR_SIZE]; + char **args = NULL; + struct json_object *dmg_out = NULL; + int argcount = 0; + int rc = 0; + + if (mgmt) + args = cmd_push_arg(args, &argcount, " mgmt-svc pool"); + else + args = cmd_push_arg(args, &argcount, " pool-svc"); + if (args == NULL) + D_GOTO(out, rc = -DER_NOMEM); + + uuid_unparse_lower(uuid, uuid_str); + args = cmd_push_arg(args, &argcount, " %s", uuid_str); + if (args == NULL) + D_GOTO(out, rc = -DER_NOMEM); + + args = cmd_push_arg(args, &argcount, " %s", fault); + if (args == NULL) + D_GOTO(out, rc = -DER_NOMEM); + + rc = daos_dmg_json_pipe("faults", dmg_config_file, args, argcount, &dmg_out); + if (rc != 0) + D_ERROR("dmg %s fault injection for " DF_UUID " with %s got failure: %d\n", + mgmt ? "mgmt" : "pool", DP_UUID(uuid), fault, rc); + + if (dmg_out != NULL) + json_object_put(dmg_out); + + cmd_free_args(args, argcount); + +out: + return rc; +} + +int +dmg_check_switch(const char *dmg_config_file, bool enable) +{ + char **args = NULL; + struct json_object *dmg_out = NULL; + int argcount = 0; + int rc = 0; + + if (enable) + args = cmd_push_arg(args, &argcount, " enable"); + else + args = cmd_push_arg(args, &argcount, " disable"); + if (args == NULL) + D_GOTO(out, rc = -DER_NOMEM); + + rc = daos_dmg_json_pipe("check", dmg_config_file, args, argcount, &dmg_out); + if (rc != 0) + D_ERROR("dmg check switch to %s failed: %d\n", enable ? "enable" : "disable", rc); + + if (dmg_out != NULL) + json_object_put(dmg_out); + + cmd_free_args(args, argcount); + +out: + return rc; +} + +int +dmg_check_start(const char *dmg_config_file, uint32_t flags, uint32_t pool_nr, uuid_t uuids[], + const char *policies) +{ + char uuid_str[DAOS_UUID_STR_SIZE]; + char **args = NULL; + struct json_object *dmg_out = NULL; + int argcount = 0; + int rc = 0; + int i; + + if (flags & TCSF_DRYRUN) { + args = cmd_push_arg(args, &argcount, " -n"); + if (args == NULL) + D_GOTO(out, rc = -DER_NOMEM); + } + + if (flags & TCSF_RESET) { + args = cmd_push_arg(args, &argcount, " -r"); + if (args == NULL) + D_GOTO(out, rc = -DER_NOMEM); + } + + if (flags & TCSF_FAILOUT) { + args = cmd_push_arg(args, &argcount, " --failout=on"); + if (args == NULL) + D_GOTO(out, rc = -DER_NOMEM); + } + + if (flags & TCSF_AUTO) { + args = cmd_push_arg(args, &argcount, " --auto=on"); + if (args == NULL) + D_GOTO(out, rc = -DER_NOMEM); + } + + if (flags & TCSF_ORPHAN) { + args = cmd_push_arg(args, &argcount, " -O"); + if (args == NULL) + D_GOTO(out, rc = -DER_NOMEM); + } + + if (flags & TCSF_NO_FAILOUT) { + args = cmd_push_arg(args, &argcount, " --failout=off"); + if (args == NULL) + D_GOTO(out, rc = -DER_NOMEM); + } + + if (flags & TCSF_NO_AUTO) { + args = cmd_push_arg(args, &argcount, " --auto=off"); + if (args == NULL) + D_GOTO(out, rc = -DER_NOMEM); + } + + if (policies != NULL) { + args = cmd_push_arg(args, &argcount, " --policies=%s", policies); + if (args == NULL) + D_GOTO(out, rc = -DER_NOMEM); + } + + for (i = 0; i < pool_nr; i++) { + uuid_unparse_lower(uuids[i], uuid_str); + args = cmd_push_arg(args, &argcount, " %s", uuid_str); + if (args == NULL) + D_GOTO(out, rc = -DER_NOMEM); + } + + rc = daos_dmg_json_pipe("check start", dmg_config_file, args, argcount, &dmg_out); + if (rc != 0) + D_ERROR("dmg check start with flags %x, policies %s failed: %d\n", flags, + policies != NULL ? policies : "(null)", rc); + + if (dmg_out != NULL) + json_object_put(dmg_out); + +out: + cmd_free_args(args, argcount); + + return rc; +} + +int +dmg_check_stop(const char *dmg_config_file, uint32_t pool_nr, uuid_t uuids[]) +{ + char uuid_str[DAOS_UUID_STR_SIZE]; + char **args = NULL; + struct json_object *dmg_out = NULL; + int argcount = 0; + int rc = 0; + int i; + + for (i = 0; i < pool_nr; i++) { + uuid_unparse_lower(uuids[i], uuid_str); + args = cmd_push_arg(args, &argcount, " %s", uuid_str); + if (args == NULL) + D_GOTO(out, rc = -DER_NOMEM); + } + + rc = daos_dmg_json_pipe("check stop", dmg_config_file, args, argcount, &dmg_out); + if (rc != 0) + D_ERROR("dmg check stop failed: %d\n", rc); + + if (dmg_out != NULL) + json_object_put(dmg_out); + + cmd_free_args(args, argcount); + +out: + return rc; +} + +static int +check_query_reports_cmp(const void *p1, const void *p2) +{ + const struct daos_check_report_info *dcri1 = p1; + const struct daos_check_report_info *dcri2 = p2; + + if (dcri1->dcri_class > dcri2->dcri_class) + return 1; + + if (dcri1->dcri_class < dcri2->dcri_class) + return -1; + + return 0; +} + +static int +parse_check_query_pool(struct json_object *obj, uuid_t uuid, struct daos_check_info *dci) +{ + struct daos_check_pool_info *dcpi; + struct json_object *pool; + char uuid_str[DAOS_UUID_STR_SIZE]; + int rc; + + uuid_unparse_lower(uuid, uuid_str); + + /* The queried pool may not exist. */ + if (!json_object_object_get_ex(obj, uuid_str, &pool)) { + D_WARN("Do not find the pool %s in check query result, may not exist\n", uuid_str); + return 0; + } + + dcpi = &dci->dci_pools[dci->dci_pool_nr]; + + rc = parse_dmg_uuid(pool, "uuid", dcpi->dcpi_uuid); + if (rc != 0) + return rc; + + rc = parse_dmg_string(pool, "status", &dcpi->dcpi_status); + if (rc != 0) + return rc; + + rc = parse_dmg_string(pool, "phase", &dcpi->dcpi_phase); + if (rc == 0) + dci->dci_pool_nr++; + + return rc; +} + +static int +parse_check_query_report(struct json_object *obj, struct daos_check_report_info *dcri) +{ + struct json_object *tmp; + int rc; + int i; + + rc = parse_dmg_uuid(obj, "pool_uuid", dcri->dcri_uuid); + if (rc != 0) + return rc; + + if (!json_object_object_get_ex(obj, "seq", &tmp)) { + D_ERROR("Unable to extract seq for pool " DF_UUID " from check query result\n", + DP_UUID(dcri->dcri_uuid)); + return -DER_INVAL; + } + + dcri->dcri_seq = json_object_get_int64(tmp); + + if (!json_object_object_get_ex(obj, "class", &tmp)) { + D_ERROR("Unable to extract class for pool " DF_UUID " from check query result\n", + DP_UUID(dcri->dcri_uuid)); + return -DER_INVAL; + } + + dcri->dcri_class = json_object_get_int(tmp); + + if (!json_object_object_get_ex(obj, "action", &tmp)) { + D_ERROR("Unable to extract action for pool " DF_UUID " from check query result\n", + DP_UUID(dcri->dcri_uuid)); + return -DER_INVAL; + } + + dcri->dcri_act = json_object_get_int(tmp); + + if (!json_object_object_get_ex(obj, "result", &tmp)) + dcri->dcri_result = 0; + else + dcri->dcri_result = json_object_get_int(tmp); + + /* Not interaction. */ + if (!json_object_object_get_ex(obj, "act_choices", &tmp)) + return 0; + + dcri->dcri_option_nr = json_object_array_length(tmp); + D_ASSERTF(dcri->dcri_option_nr > 0, + "Invalid options count for pool " DF_UUID " in check query result: %d\n", + DP_UUID(dcri->dcri_uuid), dcri->dcri_option_nr); + + for (i = 0; i < dcri->dcri_option_nr; i++) + dcri->dcri_options[i] = json_object_get_int(json_object_array_get_idx(tmp, i)); + + return 0; +} + +static int +parse_check_query_info(struct json_object *query_output, uint32_t pool_nr, uuid_t uuids[], + struct daos_check_info *dci) +{ + struct json_object *obj; + int i; + int rc; + + rc = parse_dmg_string(query_output, "status", &dci->dci_status); + if (rc != 0) + return rc; + + rc = parse_dmg_string(query_output, "scan_phase", &dci->dci_phase); + if (rc != 0) + return rc; + + dci->dci_pool_nr = 0; + + if (pool_nr <= 0) + goto reports; + + if (!json_object_object_get_ex(query_output, "pools", &obj)) { + D_ERROR("Unable to extract pools from check query result\n"); + return -DER_INVAL; + } + + if (json_object_is_type(obj, json_type_null)) + goto reports; + + D_ALLOC_ARRAY(dci->dci_pools, pool_nr); + if (dci->dci_pools == NULL) { + D_ERROR("Failed to allocate pools (len %d) for check query result\n", pool_nr); + return -DER_NOMEM; + } + + for (i = 0; i < pool_nr; i++) { + rc = parse_check_query_pool(obj, uuids[i], dci); + if (rc != 0) + return rc; + } + +reports: + if (!json_object_object_get_ex(query_output, "reports", &obj)) { + D_ERROR("Unable to extract reports from check query result\n"); + return -DER_INVAL; + } + + if (json_object_is_type(obj, json_type_null)) { + dci->dci_report_nr = 0; + return 0; + } + + dci->dci_report_nr = json_object_array_length(obj); + D_ASSERTF(dci->dci_report_nr > 0, + "Invalid reports count pool in check query result: %d\n", dci->dci_report_nr); + + D_ALLOC_ARRAY(dci->dci_reports, dci->dci_report_nr); + if (dci->dci_reports == NULL) { + D_ERROR("Failed to allocate reports (len %d) for check query result\n", + dci->dci_report_nr); + return -DER_NOMEM; + } + + for (i = 0; i < dci->dci_report_nr; i++) { + rc = parse_check_query_report(json_object_array_get_idx(obj, i), + &dci->dci_reports[i]); + if (rc != 0) + return rc; + } + + /* Sort the inconsistency reports for easy verification. */ + if (dci->dci_report_nr > 1) + qsort(dci->dci_reports, dci->dci_report_nr, sizeof(dci->dci_reports[0]), + check_query_reports_cmp); + + return 0; +} + +int +dmg_check_query(const char *dmg_config_file, uint32_t pool_nr, uuid_t uuids[], + struct daos_check_info *dci) +{ + char uuid_str[DAOS_UUID_STR_SIZE]; + char **args = NULL; + struct json_object *dmg_out = NULL; + int argcount = 0; + int rc = 0; + int i; + + for (i = 0; i < pool_nr; i++) { + uuid_unparse_lower(uuids[i], uuid_str); + args = cmd_push_arg(args, &argcount, " %s", uuid_str); + if (args == NULL) + D_GOTO(out, rc = -DER_NOMEM); + } + + rc = daos_dmg_json_pipe("check query", dmg_config_file, args, argcount, &dmg_out); + if (rc != 0) + D_ERROR("dmg check query failed: %d\n", rc); + else + rc = parse_check_query_info(dmg_out, pool_nr, uuids, dci); + + if (dmg_out != NULL) + json_object_put(dmg_out); + + cmd_free_args(args, argcount); + +out: + return rc; +} + +int +dmg_check_repair(const char *dmg_config_file, uint64_t seq, uint32_t opt, bool for_all) +{ + char **args = NULL; + struct json_object *dmg_out = NULL; + int argcount = 0; + int rc = 0; + + args = cmd_push_arg(args, &argcount, " %Lu %u", seq, opt); + if (args == NULL) + D_GOTO(out, rc = -DER_NOMEM); + + if (for_all) { + args = cmd_push_arg(args, &argcount, " -f"); + if (args == NULL) + D_GOTO(out, rc = -DER_NOMEM); + } + + rc = daos_dmg_json_pipe("check repair", dmg_config_file, args, argcount, &dmg_out); + if (rc != 0) + D_ERROR("dmg check repair with seq %lu, opt %u, for_all %s, failed: %d\n", + (unsigned long)seq, opt, for_all ? "yes" : "no", rc); + + if (dmg_out != NULL) + json_object_put(dmg_out); + + cmd_free_args(args, argcount); + +out: + return rc; +} +int +dmg_check_set_policy(const char *dmg_config_file, uint32_t flags, const char *policies) +{ + char **args = NULL; + struct json_object *dmg_out = NULL; + int argcount = 0; + int rc = 0; + + if (flags & TCPF_RESET) { + args = cmd_push_arg(args, &argcount, " -d"); + if (args == NULL) + D_GOTO(out, rc = -DER_NOMEM); + } + + if (flags & TCPF_INTERACT) { + args = cmd_push_arg(args, &argcount, " -a"); + if (args == NULL) + D_GOTO(out, rc = -DER_NOMEM); + } + + rc = daos_dmg_json_pipe("check set-policy", dmg_config_file, args, argcount, &dmg_out); + if (rc != 0) + D_ERROR("dmg check set-policy with flags %x, policies %s failed: %d\n", flags, + policies != NULL ? policies : "(null)", rc); + + if (dmg_out != NULL) + json_object_put(dmg_out); + + cmd_free_args(args, argcount); + +out: + return rc; +} diff --git a/src/container/srv_container.c b/src/container/srv_container.c index 593ce26c11a..2f0e6f7b221 100644 --- a/src/container/srv_container.c +++ b/src/container/srv_container.c @@ -246,7 +246,7 @@ ds_cont_bcast_create(crt_context_t ctx, struct cont_svc *svc, crt_opcode_t opcode, crt_rpc_t **rpc) { return ds_pool_bcast_create(ctx, svc->cs_pool, DAOS_CONT_MODULE, opcode, - DAOS_CONT_VERSION, rpc, NULL, NULL); + DAOS_CONT_VERSION, rpc, NULL, NULL, NULL); } void @@ -3554,7 +3554,7 @@ capas_can_set_prop(struct cont *cont, uint64_t sec_capas, /* Sanity check set-prop label, and update cs_uuids KVS */ static int -check_set_prop_label(struct rdb_tx *tx, struct ds_pool *pool, struct cont *cont, +check_set_prop_label(struct rdb_tx *tx, struct cont *cont, daos_prop_t *prop_in, daos_prop_t *prop_old) { struct daos_prop_entry *in_ent; @@ -3591,17 +3591,37 @@ check_set_prop_label(struct rdb_tx *tx, struct ds_pool *pool, struct cont *cont, return -DER_INVAL; } + d_iov_set(&val, match_cuuid, sizeof(uuid_t)); + /* If specified label matches existing label, nothing more to do */ old_ent = daos_prop_entry_get(prop_old, DAOS_PROP_CO_LABEL); if (old_ent) { old_lbl = old_ent->dpe_str; if (strncmp(old_lbl, in_lbl, DAOS_PROP_LABEL_MAX_LEN) == 0) return 0; + + d_iov_set(&key, old_lbl, strnlen(old_lbl, DAOS_PROP_MAX_LABEL_BUF_LEN)); + rc = rdb_tx_lookup(tx, &cont->c_svc->cs_uuids, &key, &val); + if (rc == 0) { + if (uuid_compare(cont->c_uuid, match_cuuid) != 0) { + D_ERROR("The old label %s is used for different container " + DF_UUIDF", cannot be removed when set label %s for " + DF_UUIDF"\n", old_lbl, DP_UUID(match_cuuid), + in_lbl, DP_UUID(cont->c_uuid)); + return -DER_NO_PERM; + } + } else if (rc == -DER_NONEXIST) { + /* The old label does not exist, do not need to remove from cs_uuids. */ + old_lbl = NULL; + } else { + D_ERROR(DF_UUID": lookup old label (%s) failed: "DF_RC"\n", + DP_UUID(cont->c_uuid), old_lbl, DP_RC(rc)); + return rc; + } } /* Insert new label into cs_uuids KVS, fail if already in use */ d_iov_set(&key, in_lbl, strnlen(in_lbl, DAOS_PROP_MAX_LABEL_BUF_LEN)); - d_iov_set(&val, match_cuuid, sizeof(uuid_t)); rc = rdb_tx_lookup(tx, &cont->c_svc->cs_uuids, &key, &val); if (rc != -DER_NONEXIST) { if (rc != 0) { @@ -3623,6 +3643,9 @@ check_set_prop_label(struct rdb_tx *tx, struct ds_pool *pool, struct cont *cont, D_DEBUG(DB_MD, DF_UUID": inserted new label in cs_uuids KVS: %s\n", DP_UUID(cont->c_uuid), in_lbl); + if (old_lbl == NULL) + return 0; + /* Remove old label from cs_uuids KVS, if applicable */ if (old_lbl == NULL) return 0; @@ -3691,10 +3714,13 @@ set_prop(struct rdb_tx *tx, struct ds_pool *pool, D_GOTO(out, rc = -DER_NOMEM); /* If label property given, run sanity checks & update cs_uuids */ - rc = check_set_prop_label(tx, pool, cont, prop_in, prop_old); + rc = check_set_prop_label(tx, cont, prop_in, prop_old); if (rc != 0) goto out; + if (DAOS_FAIL_CHECK(DAOS_CHK_CONT_BAD_LABEL)) + goto out; + rc = cont_prop_write(tx, &cont->c_prop, prop_in, false); if (rc != 0) D_GOTO(out, rc); @@ -5868,3 +5894,174 @@ ds_cont_hdl_rdb_lookup(uuid_t pool_uuid, uuid_t cont_hdl_uuid, struct container_ cont_svc_put_leader(svc); return rc; } + +/* + * Check whether the specified container exists in the container service or not. + * If yes, return the container label via the @prop. + */ +int +ds_cont_existence_check(struct cont_svc *svc, uuid_t uuid, daos_prop_t **prop) +{ + struct cont *cont = NULL; + daos_prop_t *tmp = NULL; + struct rdb_tx tx; + int rc; + + rc = rdb_tx_begin(svc->cs_rsvc->s_db, svc->cs_rsvc->s_term, &tx); + if (rc != 0) + goto out; + + ABT_rwlock_rdlock(svc->cs_lock); + rc = cont_lookup(&tx, svc, uuid, &cont); + if (rc != 0) + goto out_tx; + + rc = cont_prop_read(&tx, cont, DAOS_CO_QUERY_PROP_LABEL, &tmp, true); + if (rc != 0) + D_GOTO(out_cont, rc = (rc == -DER_NONEXIST ? 0 : rc)); + + if (tmp->dpp_entries[0].dpe_str == NULL || + strncmp(DAOS_PROP_NO_CO_LABEL, tmp->dpp_entries[0].dpe_str, + DAOS_PROP_LABEL_MAX_LEN) == 0) + daos_prop_free(tmp); + else + *prop = tmp; + +out_cont: + cont_put(cont); +out_tx: + ABT_rwlock_unlock(svc->cs_lock); + rdb_tx_end(&tx); +out: + return rc; +} + +/* + * Destroy the orphan container that is not registered + * to the container service (then without open handle). + */ +int +ds_cont_destroy_orphan(struct cont_svc *svc, uuid_t uuid) +{ + struct rdb_tx tx; + d_iov_t key; + d_iov_t tmp; + int rc; + + rc = rdb_tx_begin(svc->cs_rsvc->s_db, svc->cs_rsvc->s_term, &tx); + if (rc != 0) + goto out; + + ABT_rwlock_rdlock(svc->cs_lock); + d_iov_set(&key, uuid, sizeof(uuid_t)); + d_iov_set(&tmp, NULL, 0); + rc = rdb_tx_lookup(&tx, &svc->cs_conts, &key, &tmp); + ABT_rwlock_unlock(svc->cs_lock); + rdb_tx_end(&tx); + if (rc == 0) + /* Forbid to destroy non-orphan container. */ + D_GOTO(out, rc = -DER_BUSY); + + rc = cont_destroy_bcast(dss_get_module_info()->dmi_ctx, svc, uuid); + if (rc == 0) + cont_ec_agg_delete(svc, uuid); + +out: + D_CDEBUG(rc != 0, DLOG_ERR, DLOG_INFO, + DF_CONT" destroy orphan container: "DF_RC"\n", + DP_CONT(svc->cs_pool->sp_uuid, uuid), DP_RC(rc)); + + return rc; +} + +int +ds_cont_iterate_labels(struct cont_svc *svc, rdb_iterate_cb_t cb, void *arg) +{ + struct rdb_tx tx; + int rc; + + rc = rdb_tx_begin(svc->cs_rsvc->s_db, svc->cs_rsvc->s_term, &tx); + if (rc == 0) { + ABT_rwlock_rdlock(svc->cs_lock); + rc = rdb_tx_iterate(&tx, &svc->cs_uuids, false /* !backward */, cb, arg); + ABT_rwlock_unlock(svc->cs_lock); + rdb_tx_end(&tx); + } + + return rc; +} + +int +ds_cont_set_label(struct cont_svc *svc, uuid_t uuid, daos_prop_t *prop_in, + daos_prop_t *prop_old, bool for_svc) +{ + struct ds_pool *pool = svc->cs_pool; + daos_prop_t *prop_cur = NULL; + daos_prop_t *prop_iv = NULL; + struct cont *cont = NULL; + struct daos_prop_entry *entry; + struct rdb_tx tx; + int rc = 0; + + D_ASSERT(prop_in != NULL); + + if (!daos_prop_valid(prop_in, false, true)) + D_GOTO(out, rc = -DER_INVAL); + + rc = rdb_tx_begin(svc->cs_rsvc->s_db, svc->cs_rsvc->s_term, &tx); + if (rc != 0) + goto out; + + ABT_rwlock_wrlock(svc->cs_lock); + rc = cont_lookup(&tx, svc, uuid, &cont); + if (rc != 0) + goto out_tx; + + if (!for_svc) { + /* Read all props for prop IV update */ + rc = cont_prop_read(&tx, cont, DAOS_CO_QUERY_PROP_ALL, &prop_cur, true); + if (rc != 0) + D_GOTO(out_cont, rc); + + D_ASSERT(prop_cur != NULL); + + entry = daos_prop_entry_get(prop_cur, DAOS_PROP_CO_LABEL); + D_ASSERT(entry != NULL); + + /* If specified label matches existing label, do nothing. */ + if (strncmp(entry->dpe_str, prop_in->dpp_entries[0].dpe_str, + DAOS_PROP_LABEL_MAX_LEN) == 0) + D_GOTO(out_cont, rc = 1); + + prop_iv = daos_prop_merge(prop_cur, prop_in); + if (prop_iv == NULL) + D_GOTO(out_cont, rc = -DER_NOMEM); + + rc = cont_prop_write(&tx, &cont->c_prop, prop_in, false); + if (rc != 0) + D_GOTO(out_cont, rc); + + /* Update prop IV with merged prop */ + rc = cont_iv_prop_update(pool->sp_iv_ns, uuid, prop_iv, true); + } else { + rc = check_set_prop_label(&tx, cont, prop_in, prop_old); + } + + if (rc == 0) + rc = rdb_tx_commit(&tx); + +out_cont: + cont_put(cont); +out_tx: + ABT_rwlock_unlock(svc->cs_lock); + rdb_tx_end(&tx); +out: + daos_prop_free(prop_iv); + daos_prop_free(prop_cur); + + D_CDEBUG(rc < 0, DLOG_INFO, DLOG_ERR, + "set label %s for container "DF_UUIDF", svc %s: rc = %d\n", + prop_in->dpp_entries[0].dpe_str, DP_UUID(uuid), for_svc ? "yes" : "no", rc); + + return rc > 0 ? 0 : rc; +} diff --git a/src/container/srv_target.c b/src/container/srv_target.c index 048a4f00abc..a6c52f87d7b 100644 --- a/src/container/srv_target.c +++ b/src/container/srv_target.c @@ -895,14 +895,16 @@ cont_child_start(struct ds_pool_child *pool_child, const uuid_t co_uuid, DP_CONT(pool_child->spc_uuid, co_uuid), tgt_id); rc = -DER_SHUTDOWN; } else if (!cont_child_started(cont_child)) { - rc = cont_start_agg(cont_child); - if (rc != 0) - goto out; - - rc = dtx_cont_register(cont_child); - if (rc != 0) { - cont_stop_agg(cont_child); - goto out; + if (!engine_in_check()) { + rc = cont_start_agg(cont_child); + if (rc != 0) + goto out; + + rc = dtx_cont_register(cont_child); + if (rc != 0) { + cont_stop_agg(cont_child); + goto out; + } } d_list_add_tail(&cont_child->sc_link, &pool_child->spc_cont_list); @@ -950,6 +952,58 @@ ds_cont_child_start_all(struct ds_pool_child *pool_child) return rc; } +static int +cont_child_chk_post_cb(daos_handle_t ih, vos_iter_entry_t *entry, vos_iter_type_t type, + vos_iter_param_t *iter_param, void *data, unsigned *acts) +{ + struct dsm_tls *tls = dsm_tls_get(); + struct ds_pool_child *pool_child = data; + struct ds_cont_child *cont_child = NULL; + int rc = 0; + + /* The container shard must has been opened. */ + rc = cont_child_lookup(tls->dt_cont_cache, entry->ie_couuid, + pool_child->spc_uuid, false /* create */, &cont_child); + if (rc != 0) + goto out; + + if (cont_child->sc_stopping || !cont_child_started(cont_child)) + D_GOTO(out, rc = -DER_SHUTDOWN); + + rc = cont_start_agg(cont_child); + if (rc != 0) + goto out; + + rc = dtx_cont_register(cont_child); + +out: + if (cont_child != NULL) { + if (rc != 0) + cont_stop_agg(cont_child); + + ds_cont_child_put(cont_child); + } + + D_CDEBUG(rc != 0, DLOG_ERR, DLOG_INFO, + "[%d]: Post handle container "DF_CONTF" start after DAOS check: "DF_RC"\n", + dss_get_module_info()->dmi_tgt_id, + DP_CONT(pool_child->spc_uuid, entry->ie_couuid), DP_RC(rc)); + + return rc; +} + +int +ds_cont_chk_post(struct ds_pool_child *pool_child) +{ + vos_iter_param_t iter_param = { 0 }; + struct vos_iter_anchors anchors = { 0 }; + + iter_param.ip_hdl = pool_child->spc_hdl; + + return vos_iterate(&iter_param, VOS_ITER_COUUID, false, &anchors, + cont_child_chk_post_cb, NULL, (void *)pool_child, NULL); +} + /* ds_cont_hdl ****************************************************************/ static inline struct ds_cont_hdl * @@ -1290,7 +1344,9 @@ ds_cont_tgt_destroy_handler(crt_rpc_t *rpc) D_DEBUG(DB_MD, DF_CONT": handling rpc %p\n", DP_CONT(in->tdi_pool_uuid, in->tdi_uuid), rpc); - rc = ds_cont_tgt_destroy(in->tdi_pool_uuid, in->tdi_uuid); + if (!DAOS_FAIL_CHECK(DAOS_CHK_CONT_ORPHAN)) + rc = ds_cont_tgt_destroy(in->tdi_pool_uuid, in->tdi_uuid); + out->tdo_rc = (rc == 0 ? 0 : 1); D_DEBUG(DB_MD, DF_CONT ": replying rpc: %p %d " DF_RC "\n", DP_CONT(in->tdi_pool_uuid, in->tdi_uuid), rpc, out->tdo_rc, DP_RC(rc)); diff --git a/src/control/README.md b/src/control/README.md index d858b89652c..7ebbb9659ea 100644 --- a/src/control/README.md +++ b/src/control/README.md @@ -12,8 +12,8 @@ DAOS Data Plane (Engine) processes that run on the same host. ## Code Organization -The control directory contains a "cmd" subdirectory for server, agent, and dmg -applications. These applications import the control API +The control directory contains a "cmd" subdirectory for server, agent, ddb, and +dmg applications. These applications import the control API (`src/control/lib/control`) or server packages along with peripheral shared packages common, drpc, fault, logging, and security where necessary to provide the given features. diff --git a/src/control/SConscript b/src/control/SConscript index ff397f1c91e..4b26e001a45 100644 --- a/src/control/SConscript +++ b/src/control/SConscript @@ -19,6 +19,7 @@ def get_build_tags(benv): if is_firmware_mgmt_build(benv): tags.append("firmware") if not is_release_build(benv): + tags.append("fault_injection") tags.append("pprof") else: tags.append("release") @@ -160,7 +161,7 @@ def scons(): install_go_bin(senv, "daos_server") aenv = denv.Clone() - aenv.require('spdk', 'pmdk', 'ofi') + aenv.require('spdk', 'pmdk', 'ofi', 'argobots') aenv.AppendUnique(LINKFLAGS=["-Wl,--no-as-needed"]) aenv.Replace(RPATH=[]) @@ -168,6 +169,7 @@ def scons(): "-L$BUILD_DIR/src/gurt " "-L$BUILD_DIR/src/cart " "-L$BUILD_DIR/src/common " + "-L$BUILD_DIR/src/ddb " "-L$SPDK_PREFIX/lib " "-L$OFI_PREFIX/lib $_RPATH") # Explicitly link RTE & SPDK libs for CGO access @@ -187,6 +189,15 @@ def scons(): print("(EXPERIMENTAL) Building DAOS firmware tools") install_go_bin(aenv, "daos_firmware_helper", libs=['nvme_control']) + ddb_env = aenv.Clone() + ddb_env.AppendUnique(RPATH_FULL=['$PREFIX/lib64/daos_srv']) + ddb_env.d_add_rpaths(None, True, True) + + # Add vos and dependent libs for ddb + ddb_env.AppendENVPath("CGO_LDFLAGS", " -lvos -ldaos_common_pmem -lpmem " + "-labt -lgurt -luuid -lbio -lcart", sep=" ") + install_go_bin(ddb_env, "ddb", ['ddb']) + if __name__ == "SCons.Script": scons() diff --git a/src/control/cmd/daos/fi.go b/src/control/cmd/daos/fi.go new file mode 100644 index 00000000000..b47f463d0ce --- /dev/null +++ b/src/control/cmd/daos/fi.go @@ -0,0 +1,160 @@ +// +// (C) Copyright 2022 Intel Corporation. +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// +//go:build fault_injection +// +build fault_injection + +package main + +/* +#include +#include +*/ +import "C" +import ( + "fmt" + "strconv" + "strings" + + "github.com/pkg/errors" +) + +type faultsCmdRoot struct { + Faults faultsCmd `command:"faults" description:"Inject server faults"` +} + +type faultsCmd struct { + Param debugFaultCmd `command:"set-param" description:"Use daos_debug_set_params() to set fault parameters"` + Container containerFaultCmd `command:"container" description:"Inject container fault"` +} + +type faultFrequency uint64 + +func (ff *faultFrequency) UnmarshalFlag(fv string) error { + switch strings.ToLower(strings.TrimSpace(fv)) { + case "always": + *ff = faultFrequency(C.DAOS_FAIL_ALWAYS) + case "once": + *ff = faultFrequency(C.DAOS_FAIL_ONCE) + default: + v, err := strconv.ParseUint(fv, 10, 16) + if err != nil { + errors.Errorf("invalid fault frequency %q", fv) + } + *ff = faultFrequency(C.DAOS_FAIL_SOME | C.uint64_t(v)) + } + return nil +} + +func (ff faultFrequency) HasSome() (uint64, bool) { + if ff&C.DAOS_FAIL_SOME != 0 { + return uint64(ff &^ C.DAOS_FAIL_SOME), true + } + return 0, false +} + +type faultLocation uint64 + +func (fl *faultLocation) UnmarshalFlag(fv string) error { + // Ugh. Seems like there should be a more clever way to do this... + switch strings.TrimSpace(fv) { + case "DAOS_CHK_CONT_ORPHAN": + *fl = faultLocation(C.DAOS_CHK_CONT_ORPHAN) + case "DAOS_CHK_CONT_BAD_LABEL": + *fl = faultLocation(C.DAOS_CHK_CONT_BAD_LABEL) + default: + return errors.Errorf("unhandled fault location %q", fv) + } + + return nil +} + +type faultRank uint32 + +func (fr *faultRank) UnmarshalFlag(fv string) error { + if fv == strconv.FormatUint(uint64(C.CRT_NO_RANK), 10) || fv == "-1" { + *fr = faultRank(C.CRT_NO_RANK) + return nil + } + + v, err := strconv.ParseUint(fv, 10, 32) + if err != nil { + return errors.Errorf("invalid rank %q", fv) + } + *fr = faultRank(v) + return nil +} + +type faultInjectionCmd struct { + daosCmd + + Rank faultRank `short:"r" long:"rank" description:"Rank to inject fault on" default:"4294967295"` + Frequency faultFrequency `short:"f" long:"frequency" description:"Fault injection frequency" choices:"always,once" default:"once"` + Location faultLocation `short:"l" long:"location" description:"Fault injection location" required:"1"` +} + +func (cmd *faultInjectionCmd) setParams() error { + faultMask := C.uint64_t(cmd.Location) + if someVal, hasSome := cmd.Frequency.HasSome(); hasSome { + cmd.Debugf("setting fault injection frequency to %d", someVal) + rc := C.daos_debug_set_params(nil, C.d_rank_t(cmd.Rank), C.DMG_KEY_FAIL_NUM, C.uint64_t(someVal), 0, nil) + if err := daosError(rc); err != nil { + return errors.Wrap(err, "failed to set fault injection frequency") + } + faultMask |= C.DAOS_FAIL_SOME + } else { + faultMask |= C.uint64_t(cmd.Frequency) + } + + rankMsg := "all ranks" + if cmd.Rank != C.CRT_NO_RANK { + rankMsg = fmt.Sprintf("rank %d", cmd.Rank) + } + cmd.Debugf("injecting fault %d on %s", faultMask, rankMsg) + rc := C.daos_debug_set_params(nil, C.d_rank_t(cmd.Rank), C.DMG_KEY_FAIL_LOC, faultMask, 0, nil) + if err := daosError(rc); err != nil { + return errors.Wrap(err, "failed to set fault injection") + } + return nil +} + +type debugFaultCmd struct { + faultInjectionCmd +} + +func (cmd *debugFaultCmd) Execute(_ []string) error { + return cmd.setParams() +} + +type containerFaultCmd struct { + existingContainerCmd + faultInjectionCmd +} + +func (cmd *containerFaultCmd) Execute(_ []string) error { + if err := cmd.setParams(); err != nil { + return err + } + + // Quick hack; find a more maintainable solution for this later. + switch cmd.Location { + case faultLocation(C.DAOS_CHK_CONT_ORPHAN): + cdCmd := containerDestroyCmd{ + existingContainerCmd: cmd.existingContainerCmd, + } + cdCmd.Logger = cmd.Logger + return cdCmd.Execute(nil) + case faultLocation(C.DAOS_CHK_CONT_BAD_LABEL): + cspCmd := containerSetPropCmd{ + existingContainerCmd: cmd.existingContainerCmd, + } + if err := cspCmd.Args.Props.UnmarshalFlag("label:new-label"); err != nil { + return err + } + cspCmd.Logger = cmd.Logger + return cspCmd.Execute(nil) + } + return nil +} diff --git a/src/control/cmd/daos/fi_disabled.go b/src/control/cmd/daos/fi_disabled.go new file mode 100644 index 00000000000..fb2aa170f27 --- /dev/null +++ b/src/control/cmd/daos/fi_disabled.go @@ -0,0 +1,11 @@ +// +// (C) Copyright 2022 Intel Corporation. +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// +//go:build !fault_injection +// +build !fault_injection + +package main + +type faultsCmdRoot struct{} diff --git a/src/control/cmd/daos/main.go b/src/control/cmd/daos/main.go index a38632426c8..e15dec0254e 100644 --- a/src/control/cmd/daos/main.go +++ b/src/control/cmd/daos/main.go @@ -34,6 +34,7 @@ type cliOptions struct { System systemCmd `command:"system" alias:"sys" description:"DAOS system operations"` Version versionCmd `command:"version" description:"Print daos version"` ManPage cmdutil.ManCmd `command:"manpage" hidden:"true"` + faultsCmdRoot } type versionCmd struct { diff --git a/src/control/cmd/ddb/command_completers.go b/src/control/cmd/ddb/command_completers.go new file mode 100644 index 00000000000..51065826a8f --- /dev/null +++ b/src/control/cmd/ddb/command_completers.go @@ -0,0 +1,53 @@ +// +// (C) Copyright 2022 Intel Corporation. +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// + +package main + +import ( + "io/fs" + "path/filepath" + "strings" +) + +const ( + defMntPrefix = "/mnt" +) + +func listDir(match string) (result []string) { + if strings.HasSuffix(match, "vos-") { + match = filepath.Dir(match) + } + filepath.Walk(match, func(path string, info fs.FileInfo, err error) error { + if err != nil { + /* ignore error */ + return nil + } + if strings.Contains(path, "vos-") { + result = append(result, path) + } + return nil + }) + return +} + +func openCompleter(prefix string, args []string) []string { + suggestions := []string{"-h", "-w", "--write_mode"} + suggestions = append(suggestions, listDir(defMntPrefix)...) + + if len(prefix) > 0 { + var newSuggestions []string + for _, s := range suggestions { + if strings.HasPrefix(s, prefix) { + newSuggestions = append(newSuggestions, strings.Trim(s, prefix)) + } + } + suggestions = newSuggestions + + } + + return suggestions + +} diff --git a/src/control/cmd/ddb/commands_wrapper.go b/src/control/cmd/ddb/commands_wrapper.go new file mode 100644 index 00000000000..4b641169361 --- /dev/null +++ b/src/control/cmd/ddb/commands_wrapper.go @@ -0,0 +1,225 @@ +// +// (C) Copyright 2022-2023 Intel Corporation. +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// + +package main + +import ( + "runtime" + "unsafe" + + "github.com/daos-stack/daos/src/control/lib/daos" +) + +/* + #cgo CFLAGS: -I${SRCDIR}/../../../ddb/ + #cgo LDFLAGS: -lddb -lgurt + + #include + #include +*/ +import "C" + +func daosError(rc C.int) error { + if rc != 0 { + return daos.Status(rc) + } + return nil +} + +func freeString(s *C.char) { + C.free(unsafe.Pointer(s)) +} + +// InitDdb initializes the ddb context and returns a closure to finalize it. +func InitDdb() (*DdbContext, func(), error) { + // Must lock to OS thread because vos init/fini uses ABT init and finalize which must be called on the same thread + runtime.LockOSThread() + + if err := daosError(C.ddb_init()); err != nil { + runtime.UnlockOSThread() + return nil, nil, err + } + + ctx := &DdbContext{} + C.ddb_ctx_init(&ctx.ctx) // Initialize with ctx default values + + return ctx, func() { + C.ddb_fini() + runtime.UnlockOSThread() + }, nil +} + +// DdbContext structure for wrapping the C code context structure +type DdbContext struct { + ctx C.struct_ddb_ctx +} + +func ddbPoolIsOpen(ctx *DdbContext) bool { + return bool(C.ddb_pool_is_open(&ctx.ctx)) +} + +func ddbLs(ctx *DdbContext, path string, recursive bool, details bool) error { + /* Set up the options */ + options := C.struct_ls_options{} + options.path = C.CString(path) + defer freeString(options.path) + options.recursive = C.bool(recursive) + options.details = C.bool(details) + /* Run the c code command */ + return daosError(C.ddb_run_ls(&ctx.ctx, &options)) +} + +func ddbOpen(ctx *DdbContext, path string, write_mode bool) error { + /* Set up the options */ + options := C.struct_open_options{} + options.path = C.CString(path) + defer freeString(options.path) + options.write_mode = C.bool(write_mode) + /* Run the c code command */ + return daosError(C.ddb_run_open(&ctx.ctx, &options)) +} + +func ddbVersion(ctx *DdbContext) error { + /* Run the c code command */ + return daosError(C.ddb_run_version(&ctx.ctx)) +} + +func ddbClose(ctx *DdbContext) error { + /* Run the c code command */ + return daosError(C.ddb_run_close(&ctx.ctx)) +} + +func ddbSuperblockDump(ctx *DdbContext) error { + /* Run the c code command */ + return daosError(C.ddb_run_superblock_dump(&ctx.ctx)) +} + +func ddbValueDump(ctx *DdbContext, path string, dst string) error { + /* Set up the options */ + options := C.struct_value_dump_options{} + options.path = C.CString(path) + defer freeString(options.path) + options.dst = C.CString(dst) + defer freeString(options.dst) + /* Run the c code command */ + return daosError(C.ddb_run_value_dump(&ctx.ctx, &options)) +} + +func ddbRm(ctx *DdbContext, path string) error { + /* Set up the options */ + options := C.struct_rm_options{} + options.path = C.CString(path) + defer freeString(options.path) + /* Run the c code command */ + return daosError(C.ddb_run_rm(&ctx.ctx, &options)) +} + +func ddbValueLoad(ctx *DdbContext, src string, dst string) error { + /* Set up the options */ + options := C.struct_value_load_options{} + options.src = C.CString(src) + defer freeString(options.src) + options.dst = C.CString(dst) + defer freeString(options.dst) + /* Run the c code command */ + return daosError(C.ddb_run_value_load(&ctx.ctx, &options)) +} + +func ddbIlogDump(ctx *DdbContext, path string) error { + /* Set up the options */ + options := C.struct_ilog_dump_options{} + options.path = C.CString(path) + defer freeString(options.path) + /* Run the c code command */ + return daosError(C.ddb_run_ilog_dump(&ctx.ctx, &options)) +} + +func ddbIlogCommit(ctx *DdbContext, path string) error { + /* Set up the options */ + options := C.struct_ilog_commit_options{} + options.path = C.CString(path) + defer freeString(options.path) + /* Run the c code command */ + return daosError(C.ddb_run_ilog_commit(&ctx.ctx, &options)) +} + +func ddbIlogClear(ctx *DdbContext, path string) error { + /* Set up the options */ + options := C.struct_ilog_clear_options{} + options.path = C.CString(path) + defer freeString(options.path) + /* Run the c code command */ + return daosError(C.ddb_run_ilog_clear(&ctx.ctx, &options)) +} + +func ddbDtxDump(ctx *DdbContext, path string, active bool, committed bool) error { + /* Set up the options */ + options := C.struct_dtx_dump_options{} + options.path = C.CString(path) + defer freeString(options.path) + options.active = C.bool(active) + options.committed = C.bool(committed) + /* Run the c code command */ + return daosError(C.ddb_run_dtx_dump(&ctx.ctx, &options)) +} + +func ddbDtxCmtClear(ctx *DdbContext, path string) error { + /* Set up the options */ + options := C.struct_dtx_cmt_clear_options{} + options.path = C.CString(path) + defer freeString(options.path) + /* Run the c code command */ + return daosError(C.ddb_run_dtx_cmt_clear(&ctx.ctx, &options)) +} + +func ddbSmdSync(ctx *DdbContext, nvme_conf string, db_path string) error { + /* Set up the options */ + options := C.struct_smd_sync_options{} + options.nvme_conf = C.CString(nvme_conf) + defer freeString(options.nvme_conf) + options.db_path = C.CString(db_path) + defer freeString(options.db_path) + /* Run the c code command */ + return daosError(C.ddb_run_smd_sync(&ctx.ctx, &options)) +} + +func ddbVeaDump(ctx *DdbContext) error { + /* Run the c code command */ + return daosError(C.ddb_run_vea_dump(&ctx.ctx)) +} + +func ddbVeaUpdate(ctx *DdbContext, offset string, blk_cnt string) error { + /* Set up the options */ + options := C.struct_vea_update_options{} + options.offset = C.CString(offset) + defer freeString(options.offset) + options.blk_cnt = C.CString(blk_cnt) + defer freeString(options.blk_cnt) + /* Run the c code command */ + return daosError(C.ddb_run_vea_update(&ctx.ctx, &options)) +} + +func ddbDtxActCommit(ctx *DdbContext, path string, dtx_id string) error { + /* Set up the options */ + options := C.struct_dtx_act_commit_options{} + options.path = C.CString(path) + defer freeString(options.path) + options.dtx_id = C.CString(dtx_id) + defer freeString(options.dtx_id) + /* Run the c code command */ + return daosError(C.ddb_run_dtx_act_commit(&ctx.ctx, &options)) +} + +func ddbDtxActAbort(ctx *DdbContext, path string, dtx_id string) error { + /* Set up the options */ + options := C.struct_dtx_act_abort_options{} + options.path = C.CString(path) + defer freeString(options.path) + options.dtx_id = C.CString(dtx_id) + defer freeString(options.dtx_id) + /* Run the c code command */ + return daosError(C.ddb_run_dtx_act_abort(&ctx.ctx, &options)) +} diff --git a/src/control/cmd/ddb/ddb_commands.go b/src/control/cmd/ddb/ddb_commands.go new file mode 100644 index 00000000000..018818ea130 --- /dev/null +++ b/src/control/cmd/ddb/ddb_commands.go @@ -0,0 +1,300 @@ +// +// (C) Copyright 2022-2023 Intel Corporation. +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// + +package main + +import ( + "github.com/desertbit/grumble" +) + +func addAppCommands(app *grumble.App, ctx *DdbContext) { + // Command: ls + app.AddCommand(&grumble.Command{ + Name: "ls", + Aliases: nil, + Help: "List containers, objects, dkeys, akeys, and values", + LongHelp: "", + HelpGroup: "vos", + Flags: func(f *grumble.Flags) { + f.Bool("r", "recursive", false, "Recursively list the contents of the path") + f.Bool("d", "details", false, "List more details of items in path") + }, + Args: func(a *grumble.Args) { + a.String("path", "Optional, list contents of the provided path", grumble.Default("")) + }, + Run: func(c *grumble.Context) error { + return ddbLs(ctx, c.Args.String("path"), c.Flags.Bool("recursive"), c.Flags.Bool("details")) + }, + Completer: nil, + }) + // Command: open + app.AddCommand(&grumble.Command{ + Name: "open", + Aliases: nil, + Help: "Opens the vos file at ", + LongHelp: `Opens the vos file at . The '-w' option allows for modifying the vos file +with the rm, load, commit_ilog, etc commands. The path should be an absolute path to the +pool shard. Part of the path is used to determine what the pool uuid is.`, + HelpGroup: "vos", + Flags: func(f *grumble.Flags) { + f.Bool("w", "write_mode", false, "Open the vos file in write mode.") + }, + Args: func(a *grumble.Args) { + a.String("path", "Path to the vos file to open.") + }, + Run: func(c *grumble.Context) error { + return ddbOpen(ctx, c.Args.String("path"), c.Flags.Bool("write_mode")) + }, + Completer: openCompleter, + }) + // Command: version + app.AddCommand(&grumble.Command{ + Name: "version", + Aliases: nil, + Help: "Print ddb version", + LongHelp: "", + HelpGroup: "", + Run: func(c *grumble.Context) error { + return ddbVersion(ctx) + }, + Completer: nil, + }) + // Command: close + app.AddCommand(&grumble.Command{ + Name: "close", + Aliases: nil, + Help: "Close the currently opened vos pool shard", + LongHelp: "", + HelpGroup: "vos", + Run: func(c *grumble.Context) error { + return ddbClose(ctx) + }, + Completer: nil, + }) + // Command: superblock_dump + app.AddCommand(&grumble.Command{ + Name: "superblock_dump", + Aliases: nil, + Help: "Dump the pool superblock information", + LongHelp: "", + HelpGroup: "vos", + Run: func(c *grumble.Context) error { + return ddbSuperblockDump(ctx) + }, + Completer: nil, + }) + // Command: value_dump + app.AddCommand(&grumble.Command{ + Name: "value_dump", + Aliases: nil, + Help: "Dump a value", + LongHelp: `Dump a value to the screen or file. The vos path should be a complete +path, including the akey and if the value is an array value it should include +the extent. If a path to a file was provided then the value will be written to +the file, else it will be printed to the screen.`, + HelpGroup: "vos", + Args: func(a *grumble.Args) { + a.String("path", "VOS tree path to dump.") + a.String("dst", "File path to dump the value to.", grumble.Default("")) + }, + Run: func(c *grumble.Context) error { + return ddbValueDump(ctx, c.Args.String("path"), c.Args.String("dst")) + }, + Completer: nil, + }) + // Command: rm + app.AddCommand(&grumble.Command{ + Name: "rm", + Aliases: nil, + Help: "Remove a branch of the VOS tree.", + LongHelp: `Remove a branch of the VOS tree. The branch can be anything from a container +and everything under it, to a single value.`, + HelpGroup: "vos", + Args: func(a *grumble.Args) { + a.String("path", "VOS tree path to remove.") + }, + Run: func(c *grumble.Context) error { + return ddbRm(ctx, c.Args.String("path")) + }, + Completer: nil, + }) + // Command: value_load + app.AddCommand(&grumble.Command{ + Name: "value_load", + Aliases: nil, + Help: "Load a value to a vos path. ", + LongHelp: `Load a value to a vos path. This can be used to update +the value of an existing key, or create a new key. The is a path to a +file on the file system. The is a vos tree path to a value where the +data will be loaded. If the path currently exists, then the destination +path must match the value type, meaning, if the value type is an array, then +the path must include the extent, otherwise, it must not.`, + HelpGroup: "vos", + Args: func(a *grumble.Args) { + a.String("src", "Source file path.") + a.String("dst", "Destination vos tree path to a value.") + }, + Run: func(c *grumble.Context) error { + return ddbValueLoad(ctx, c.Args.String("src"), c.Args.String("dst")) + }, + Completer: nil, + }) + // Command: ilog_dump + app.AddCommand(&grumble.Command{ + Name: "ilog_dump", + Aliases: nil, + Help: "Dump the ilog", + LongHelp: "", + HelpGroup: "vos", + Args: func(a *grumble.Args) { + a.String("path", "VOS tree path to an object, dkey, or akey.") + }, + Run: func(c *grumble.Context) error { + return ddbIlogDump(ctx, c.Args.String("path")) + }, + Completer: nil, + }) + // Command: ilog_commit + app.AddCommand(&grumble.Command{ + Name: "ilog_commit", + Aliases: nil, + Help: "Process the ilog", + LongHelp: "", + HelpGroup: "vos", + Args: func(a *grumble.Args) { + a.String("path", "VOS tree path to an object, dkey, or akey.") + }, + Run: func(c *grumble.Context) error { + return ddbIlogCommit(ctx, c.Args.String("path")) + }, + Completer: nil, + }) + // Command: ilog_clear + app.AddCommand(&grumble.Command{ + Name: "ilog_clear", + Aliases: nil, + Help: "Remove all the ilog entries", + LongHelp: "", + HelpGroup: "vos", + Args: func(a *grumble.Args) { + a.String("path", "VOS tree path to an object, dkey, or akey.") + }, + Run: func(c *grumble.Context) error { + return ddbIlogClear(ctx, c.Args.String("path")) + }, + Completer: nil, + }) + // Command: dtx_dump + app.AddCommand(&grumble.Command{ + Name: "dtx_dump", + Aliases: nil, + Help: "Dump the dtx tables", + LongHelp: "", + HelpGroup: "vos", + Flags: func(f *grumble.Flags) { + f.Bool("a", "active", false, "Only dump entries from the active table") + f.Bool("c", "committed", false, "Only dump entries from the committed table") + }, + Args: func(a *grumble.Args) { + a.String("path", "VOS tree path to a container.") + }, + Run: func(c *grumble.Context) error { + return ddbDtxDump(ctx, c.Args.String("path"), c.Flags.Bool("active"), c.Flags.Bool("committed")) + }, + Completer: nil, + }) + // Command: dtx_cmt_clear + app.AddCommand(&grumble.Command{ + Name: "dtx_cmt_clear", + Aliases: nil, + Help: "Clear the dtx committed table", + LongHelp: "", + HelpGroup: "vos", + Args: func(a *grumble.Args) { + a.String("path", "VOS tree path to a container.") + }, + Run: func(c *grumble.Context) error { + return ddbDtxCmtClear(ctx, c.Args.String("path")) + }, + Completer: nil, + }) + // Command: smd_sync + app.AddCommand(&grumble.Command{ + Name: "smd_sync", + Aliases: nil, + Help: "Restore the SMD file with backup from blob", + LongHelp: "", + HelpGroup: "smd", + Args: func(a *grumble.Args) { + a.String("nvme_conf", "Path to the nvme conf file. (default /mnt/daos/daos_nvme.conf)", grumble.Default("")) + a.String("db_path", "Path to the vos db. (default /mnt/daos)", grumble.Default("")) + }, + Run: func(c *grumble.Context) error { + return ddbSmdSync(ctx, c.Args.String("nvme_conf"), c.Args.String("db_path")) + }, + Completer: nil, + }) + // Command: vea_dump + app.AddCommand(&grumble.Command{ + Name: "vea_dump", + Aliases: nil, + Help: "Dump information from the vea about free regions", + LongHelp: "", + HelpGroup: "vos", + Run: func(c *grumble.Context) error { + return ddbVeaDump(ctx) + }, + Completer: nil, + }) + // Command: vea_update + app.AddCommand(&grumble.Command{ + Name: "vea_update", + Aliases: nil, + Help: "Alter the VEA tree to mark a region as free.", + LongHelp: "", + HelpGroup: "vos", + Args: func(a *grumble.Args) { + a.String("offset", "Block offset of the region to mark free.") + a.String("blk_cnt", "Total blocks of the region to mark free.") + }, + Run: func(c *grumble.Context) error { + return ddbVeaUpdate(ctx, c.Args.String("offset"), c.Args.String("blk_cnt")) + }, + Completer: nil, + }) + // Command: dtx_act_commit + app.AddCommand(&grumble.Command{ + Name: "dtx_act_commit", + Aliases: nil, + Help: "Mark the active dtx entry as committed", + LongHelp: "", + HelpGroup: "vos", + Args: func(a *grumble.Args) { + a.String("path", "VOS tree path to a container.") + a.String("dtx_id", "DTX id of the entry to commit. ") + }, + Run: func(c *grumble.Context) error { + return ddbDtxActCommit(ctx, c.Args.String("path"), c.Args.String("dtx_id")) + }, + Completer: nil, + }) + // Command: dtx_act_abort + app.AddCommand(&grumble.Command{ + Name: "dtx_act_abort", + Aliases: nil, + Help: "Mark the active dtx entry as aborted", + LongHelp: "", + HelpGroup: "vos", + Args: func(a *grumble.Args) { + a.String("path", "VOS tree path to a container.") + a.String("dtx_id", "DTX id of the entry to abort. ") + }, + Run: func(c *grumble.Context) error { + return ddbDtxActAbort(ctx, c.Args.String("path"), c.Args.String("dtx_id")) + }, + Completer: nil, + }) +} diff --git a/src/control/cmd/ddb/main.go b/src/control/cmd/ddb/main.go new file mode 100644 index 00000000000..a74c9493803 --- /dev/null +++ b/src/control/cmd/ddb/main.go @@ -0,0 +1,275 @@ +// +// (C) Copyright 2022-2023 Intel Corporation. +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// + +package main + +import ( + "bufio" + "os" + "path" + "path/filepath" + "runtime/debug" + "sort" + "strings" + + "github.com/desertbit/grumble" + "github.com/jessevdk/go-flags" + "github.com/pkg/errors" + + "github.com/daos-stack/daos/src/control/build" + "github.com/daos-stack/daos/src/control/fault" + "github.com/daos-stack/daos/src/control/logging" +) + +func exitWithError(log logging.Logger, err error) { + cmdName := path.Base(os.Args[0]) + log.Errorf("%s: %v", cmdName, err) + if fault.HasResolution(err) { + log.Errorf("%s: %s", cmdName, fault.ShowResolutionFor(err)) + } + os.Exit(1) +} + +type cliOptions struct { + Debug bool `long:"debug" description:"enable debug output"` + WriteMode bool `long:"write_mode" short:"w" description:"Open the vos file in write mode."` + CmdFile string `long:"cmd_file" short:"f" description:"Path to a file containing a sequence of ddb commands to execute."` + Version bool `short:"v" long:"version" description:"Show version"` + Args struct { + VosPath vosPathStr `positional-arg-name:"vos_file_path"` + RunCmd ddbCmdStr `positional-arg-name:"ddb_command"` + RunCmdArgs []string `positional-arg-name:"ddb_command_args"` + } `positional-args:"yes"` +} + +type vosPathStr string + +func (pathStr vosPathStr) Complete(match string) (comps []flags.Completion) { + if match == "" || match == "/" { + match = defMntPrefix + } + for _, comp := range listDir(match) { + comps = append(comps, flags.Completion{Item: comp}) + } + sort.Slice(comps, func(i, j int) bool { return comps[i].Item < comps[j].Item }) + + return +} + +type ddbCmdStr string + +func (cmdStr ddbCmdStr) Complete(match string) (comps []flags.Completion) { + // hack to get at command names + ctx, cleanup, err := InitDdb() + if err != nil { + return + } + defer cleanup() + + app := createGrumbleApp(ctx) + for _, cmd := range app.Commands().All() { + if match == "" || strings.HasPrefix(cmd.Name, match) { + comps = append(comps, flags.Completion{Item: cmd.Name}) + } + } + sort.Slice(comps, func(i, j int) bool { return comps[i].Item < comps[j].Item }) + + return +} + +func (cmdStr *ddbCmdStr) UnmarshalFlag(fv string) error { + *cmdStr = ddbCmdStr(fv) + return nil +} + +func runFileCmds(log logging.Logger, app *grumble.App, fileName string) error { + file, err := os.Open(fileName) + if err != nil { + return errors.Wrapf(err, "Error opening file: %s", fileName) + + } + defer func() { + err = file.Close() + if err != nil { + log.Errorf("Error closing %s: %s\n", fileName, err) + } + }() + + log.Debugf("Running commands in: %s\n", fileName) + scanner := bufio.NewScanner(file) + for scanner.Scan() { + fileCmd := scanner.Text() + log.Debugf("Running Command: %s\n", fileCmd) + err := runCmdStr(app, fileCmd) + if err != nil { + return errors.Wrapf(err, "Failed running command %q", fileCmd) + } + } + + return nil +} + +func parseOpts(args []string, opts *cliOptions, log *logging.LeveledLogger) error { + p := flags.NewParser(opts, flags.HelpFlag|flags.IgnoreUnknown) + p.Name = "ddb" + p.Usage = "[OPTIONS]" + p.ShortDescription = "daos debug tool" + p.LongDescription = `The DAOS Debug Tool (ddb) allows a user to navigate through and modify +a file in the VOS format. It offers both a command line and interactive +shell mode. If neither a single command or '-f' option is provided, then +the tool will run in interactive mode. In order to modify the VOS file, +the '-w' option must be included. If supplied, the VOS file supplied in +the first positional parameter will be opened before commands are executed. + +Many of the commands take a vos tree path. The format for this path +is [cont]/[obj]/[dkey]/[akey]/[extent]. +- cont - the full container uuid. +- obj - the object id. +- keys (akey, dkey) - there are multiple types of keys + -- string keys are simply the string value. If the size of the + key is greater than strlen(key), then the size is included at + the end of the string value. Example: 'akey{5}' is the key: akey + with a null terminator at the end. + -- number keys are formatted as '{[type]: NNN}' where type is + 'uint8, uint16, uint32, or uint64'. NNN can be a decimal or + hex number. Example: '{uint32: 123456}' + -- binary keys are formatted as '{bin: 0xHHH}' where HHH is the hex + representation of the binary key. Example: '{bin: 0x1a2b}' +- extent for array values - in the format {lo-hi}. + +To make it easier to navigate the tree, indexes can be +used instead of the path part. The index is in the format [i]. Indexes +and actual path values can be used together + +Example Paths: +/3550f5df-e6b1-4415-947e-82e15cf769af/939000573846355970.0.13.1/dkey/akey/[0-1023] +[0]/[1]/[2]/[1]/[9] +/[0]/939000573846355970.0.13.1/[2]/akey{5}/[0-1023] +` + + // Set the traceback level such that a crash results in + // a coredump (when ulimit -c is set appropriately). + debug.SetTraceback("crash") + + if _, err := p.ParseArgs(args); err != nil { + return err + } + + if opts.Version { + log.Infof("ddb version %s", build.DaosVersion) + return nil + } + + if opts.Debug { + log.WithLogLevel(logging.LogLevelDebug) + log.Debug("debug output enabled") + } + + ctx, cleanup, err := InitDdb() + if err != nil { + return errors.Wrap(err, "Error initializing the DDB Context") + } + defer cleanup() + app := createGrumbleApp(ctx) + + if opts.Args.VosPath != "" { + log.Debugf("Connect to path: %s\n", opts.Args.VosPath) + if err := ddbOpen(ctx, string(opts.Args.VosPath), opts.WriteMode); err != nil { + return errors.Wrapf(err, "Error opening path: %s", opts.Args.VosPath) + } + } + + if opts.Args.RunCmd != "" && opts.CmdFile != "" { + return errors.New("Cannot use both command file and a command string") + } + + if opts.Args.RunCmd != "" || opts.CmdFile != "" { + // Non-interactive mode + if opts.Args.RunCmd != "" { + err := runCmdStr(app, string(opts.Args.RunCmd), opts.Args.RunCmdArgs...) + if err != nil { + log.Errorf("Error running command %s\n", string(opts.Args.RunCmd)) + } + } else { + err := runFileCmds(log, app, opts.CmdFile) + if err != nil { + log.Error("Error running command file\n") + } + } + + if ddbPoolIsOpen(ctx) { + err := ddbClose(ctx) + if err != nil { + log.Error("Error closing pool\n") + } + } + return err + } + + // Interactive mode + // Print the version upon entry + log.Infof("ddb version %s", build.DaosVersion) + // app.Run() uses the os.Args so need to clear them before running + os.Args = args + result := app.Run() + // make sure pool is closed + if ddbPoolIsOpen(ctx) { + err := ddbClose(ctx) + if err != nil { + log.Error("Error closing pool\n") + } + } + return result +} + +func main() { + var opts cliOptions + log := logging.NewCommandLineLogger() + + if err := parseOpts(os.Args[1:], &opts, log); err != nil { + if fe, ok := errors.Cause(err).(*flags.Error); ok && fe.Type == flags.ErrHelp { + log.Info(fe.Error()) + os.Exit(0) + } + exitWithError(log, err) + } +} + +func createGrumbleApp(ctx *DdbContext) *grumble.App { + homedir, err := os.UserHomeDir() + if err != nil { + homedir = "/tmp" + } + var app = grumble.New(&grumble.Config{ + Name: "ddb", + Flags: nil, + HistoryFile: filepath.Join(homedir, ".ddb_history"), + NoColor: false, + Prompt: "ddb: ", + }) + + addAppCommands(app, ctx) + + // Add the quit command. grumble also includes a builtin exit command + app.AddCommand(&grumble.Command{ + Name: "quit", + Aliases: []string{"q"}, + Help: "exit the shell", + LongHelp: "", + HelpGroup: "", + Run: func(c *grumble.Context) error { + c.Stop() + return nil + }, + Completer: nil, + }) + return app +} + +// Run the command in 'run' using the grumble app. shlex is used to parse the string into an argv/c format +func runCmdStr(app *grumble.App, cmd string, args ...string) error { + return app.RunCommand(append([]string{cmd}, args...)) +} diff --git a/src/control/cmd/dmg/check.go b/src/control/cmd/dmg/check.go new file mode 100644 index 00000000000..3b0144f9079 --- /dev/null +++ b/src/control/cmd/dmg/check.go @@ -0,0 +1,406 @@ +// +// (C) Copyright 2022-2023 Intel Corporation. +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// + +package main + +import ( + "bytes" + "context" + "fmt" + "strconv" + "strings" + + "github.com/jessevdk/go-flags" + "github.com/pkg/errors" + + "github.com/daos-stack/daos/src/control/cmd/dmg/pretty" + "github.com/daos-stack/daos/src/control/common/cmdutil" + "github.com/daos-stack/daos/src/control/lib/control" + "github.com/daos-stack/daos/src/control/lib/ui" +) + +type checkCmdRoot struct { + Enable checkEnableCmd `command:"enable" description:"Enable system checker"` + Disable checkDisableCmd `command:"disable" description:"Disable system checker"` + Start checkStartCmd `command:"start" description:"Start a system check"` + Stop checkStopCmd `command:"stop" description:"Stop a system check"` + Query checkQueryCmd `command:"query" description:"Query a system check"` + SetPolicy checkSetPolicyCmd `command:"set-policy" description:"Set system checker policies"` + GetPolicy checkGetPolicyCmd `command:"get-policy" description:"Get system checker policies"` + Repair checkRepairCmd `command:"repair" description:"Repair a reported system check problem"` +} + +type poolIDSet []PoolID + +func (p poolIDSet) List() (ids []string) { + ids = make([]string, len(p)) + + for i, id := range p { + ids[i] = id.String() + } + + return +} + +type checkCmdBase struct { + cmdutil.JSONOutputCmd + cmdutil.LogCmd + cfgCmd + ctlInvokerCmd +} + +func (c *checkCmdBase) Execute(_ []string) error { + return errors.New("not implemented") +} + +type checkPoolCmdBase struct { + checkCmdBase + + Args struct { + Pools poolIDSet `positional-arg-name:"[pool name or UUID [pool name or UUID]] "` + } `positional-args:"yes"` +} + +type checkEnableCmd struct { + checkCmdBase +} + +func (cmd *checkEnableCmd) Execute([]string) error { + req := new(control.SystemCheckEnableReq) + if err := control.SystemCheckEnable(context.Background(), cmd.ctlInvoker, req); err != nil { + return err + } + + cmd.Info("system checker enabled") + + return nil +} + +type checkDisableCmd struct { + checkCmdBase +} + +func (cmd *checkDisableCmd) Execute([]string) error { + req := new(control.SystemCheckDisableReq) + if err := control.SystemCheckDisable(context.Background(), cmd.ctlInvoker, req); err != nil { + return err + } + + cmd.Info("system checker disabled") + + return nil +} + +type setRepPolFlag struct { + ui.SetPropertiesFlag + + SetPolicies []*control.SystemCheckPolicy +} + +func (f *setRepPolFlag) UnmarshalFlag(fv string) error { + var keys []string + for _, class := range control.CheckerPolicyClasses() { + keys = append(keys, class.String()) + } + f.SettableKeys(keys...) + + if err := f.SetPropertiesFlag.UnmarshalFlag(fv); err != nil { + return err + } + + f.SetPolicies = make([]*control.SystemCheckPolicy, 0, len(f.ParsedProps)) + for class, action := range f.ParsedProps { + policy, err := control.NewSystemCheckPolicy(class, action) + if err != nil { + return err + } + f.SetPolicies = append(f.SetPolicies, policy) + } + + return nil +} + +func (f *setRepPolFlag) Complete(match string) []flags.Completion { + actions := control.CheckerPolicyActions() + actKeys := make([]string, len(actions)) + for i, act := range actions { + actKeys[i] = act.String() + } + comps := make(ui.CompletionMap) + for _, class := range control.CheckerPolicyClasses() { + comps[class.String()] = actKeys + } + f.SetCompletions(comps) + + return f.SetPropertiesFlag.Complete(match) +} + +type checkStartCmd struct { + checkPoolCmdBase + + DryRun bool `short:"n" long:"dry-run" description:"Scan only; do not initiate repairs."` + Reset bool `short:"r" long:"reset" description:"Reset the system check state."` + Failout ui.EnabledFlag `short:"f" long:"failout" description:"Stop on failure." choice:"on" choice:"off"` + Auto ui.EnabledFlag `short:"a" long:"auto" description:"Attempt to automatically repair problems." choice:"on" choice:"off"` + FindOrphans bool `short:"O" long:"find-orphans" description:"Find orphaned pools."` + Policies setRepPolFlag `short:"p" long:"policies" description:"Set repair policies."` +} + +func (cmd *checkStartCmd) Execute(_ []string) error { + ctx := context.Background() + + req := new(control.SystemCheckStartReq) + req.Uuids = cmd.Args.Pools.List() + + if cmd.DryRun { + req.Flags |= uint32(control.SystemCheckFlagDryRun) + } + if cmd.Reset { + req.Flags |= uint32(control.SystemCheckFlagReset) + } + if cmd.Failout.Set { + if cmd.Failout.Enabled { + req.Flags |= uint32(control.SystemCheckFlagFailout) + } else { + req.Flags |= uint32(control.SystemCheckFlagDisableFailout) + } + } + if cmd.Auto.Set { + if cmd.Auto.Enabled { + req.Flags |= uint32(control.SystemCheckFlagAuto) + } else { + req.Flags |= uint32(control.SystemCheckFlagDisableAuto) + } + } + if cmd.FindOrphans { + req.Flags |= uint32(control.SystemCheckFlagFindOrphans) + } + req.Policies = cmd.Policies.SetPolicies + + if err := control.SystemCheckStart(ctx, cmd.ctlInvoker, req); err != nil { + return err + } + + cmd.Info("system checker started") + + return nil +} + +type checkStopCmd struct { + checkPoolCmdBase +} + +func (cmd *checkStopCmd) Execute(_ []string) error { + ctx := context.Background() + + req := new(control.SystemCheckStopReq) + req.Uuids = cmd.Args.Pools.List() + + if err := control.SystemCheckStop(ctx, cmd.ctlInvoker, req); err != nil { + return err + } + + cmd.Info("system checker stopped") + + return nil +} + +type checkQueryCmd struct { + checkPoolCmdBase + + Verbose bool `short:"v" long:"verbose" description:"Show more detailed information."` +} + +func (cmd *checkQueryCmd) Execute(_ []string) error { + ctx := context.Background() + + req := new(control.SystemCheckQueryReq) + req.Uuids = cmd.Args.Pools.List() + + resp, err := control.SystemCheckQuery(ctx, cmd.ctlInvoker, req) + if cmd.JSONOutputEnabled() { + return cmd.OutputJSON(resp, nil) + } + if err != nil { + return err + } + + var buf bytes.Buffer + pretty.PrintCheckQueryResp(&buf, resp, cmd.Verbose) + cmd.Info(buf.String()) + + return nil +} + +type checkSetPolicyCmd struct { + checkCmdBase + + ResetToDefaults bool `short:"d" long:"reset-defaults" description:"Set all policies to their default action."` + AllInteractive bool `short:"a" long:"all-interactive" description:"Set all policies to interactive."` + Args struct { + Policies setRepPolFlag `description:"Repair policies (required unless --all-interactive is specified)"` + } `positional-args:"yes"` +} + +func (cmd *checkSetPolicyCmd) Execute(_ []string) error { + ctx := context.Background() + + req := &control.SystemCheckSetPolicyReq{ + ResetToDefaults: cmd.ResetToDefaults, + AllInteractive: cmd.AllInteractive, + Policies: cmd.Args.Policies.SetPolicies, + } + if err := control.SystemCheckSetPolicy(ctx, cmd.ctlInvoker, req); err != nil { + return err + } + + cmd.Info("system checker policies updated") + + return nil +} + +type getRepPolFlag struct { + ui.GetPropertiesFlag + + ReqClasses []control.SystemCheckFindingClass +} + +func (f *getRepPolFlag) UnmarshalFlag(fv string) error { + var keys []string + for _, class := range control.CheckerPolicyClasses() { + keys = append(keys, class.String()) + } + f.GettableKeys(keys...) + + if err := f.GetPropertiesFlag.UnmarshalFlag(fv); err != nil { + return err + } + + i := 0 + f.ReqClasses = make([]control.SystemCheckFindingClass, len(f.ParsedProps)) + for _, class := range f.ParsedProps.ToSlice() { + if err := f.ReqClasses[i].FromString(class); err != nil { + return err + } + i++ + } + + return nil +} + +func (f *getRepPolFlag) Complete(match string) []flags.Completion { + comps := make(ui.CompletionMap) + for _, class := range control.CheckerPolicyClasses() { + comps[class.String()] = nil + } + f.SetCompletions(comps) + + return f.GetPropertiesFlag.Complete(match) +} + +type checkGetPolicyCmd struct { + checkCmdBase + + LastUsed bool `short:"L" long:"last" description:"Fetch the last policy used by the checker."` + Args struct { + Classes getRepPolFlag `description:"Inconsistency class names"` + } `positional-args:"yes"` +} + +func (cmd *checkGetPolicyCmd) Execute(_ []string) error { + ctx := context.Background() + + req := new(control.SystemCheckGetPolicyReq) + req.LastUsed = cmd.LastUsed + req.SetClasses(cmd.Args.Classes.ReqClasses) + resp, err := control.SystemCheckGetPolicy(ctx, cmd.ctlInvoker, req) + if cmd.JSONOutputEnabled() { + return cmd.OutputJSON(resp, nil) + } + if err != nil { + return err + } + + var buf bytes.Buffer + pretty.PrintCheckerPolicies(&buf, resp.CheckerFlags, resp.Policies...) + cmd.Info(buf.String()) + + return nil +} + +type repairSeqNum uint64 + +func (r repairSeqNum) String() string { + return fmt.Sprintf("0x%x", uint64(r)) +} + +func (r *repairSeqNum) UnmarshalFlag(value string) error { + var val uint64 + var err error + if strings.HasPrefix(value, "0x") { + cleaned := strings.Replace(value, "0x", "", -1) + val, err = strconv.ParseUint(cleaned, 16, 64) + } else { + val, err = strconv.ParseUint(value, 10, 64) + } + + if err != nil { + return err + } + + *r = repairSeqNum(val) + return nil +} + +type checkRepairCmd struct { + checkCmdBase + + ForAll bool `short:"f" long:"for-all" description:"Take the same action for all inconsistencies with the same class."` + + Args struct { + SeqNum repairSeqNum `positional-arg-name:"[seq-num]" required:"1"` + SelectedAction int `positional-arg-name:"[action]" required:"1"` + } `positional-args:"yes"` +} + +func (cmd *checkRepairCmd) Execute(_ []string) error { + ctx := context.Background() + + qReq := new(control.SystemCheckQueryReq) + qReq.Seqs = []uint64{uint64(cmd.Args.SeqNum)} + qResp, err := control.SystemCheckQuery(ctx, cmd.ctlInvoker, qReq) + if err != nil { + return err + } + + if len(qResp.Reports) == 0 { + return errors.Errorf("no report found for seq %s", cmd.Args.SeqNum) + } + + report := qResp.Reports[0] + if !report.IsInteractive() { + return errors.Errorf("finding %s is already resolved: %s", cmd.Args.SeqNum, report.Resolution()) + } + choices := report.RepairChoices() + if cmd.Args.SelectedAction < 0 || cmd.Args.SelectedAction >= len(choices) { + return errors.Errorf("invalid action %d for seq %s", cmd.Args.SelectedAction, cmd.Args.SeqNum) + } + + req := new(control.SystemCheckRepairReq) + req.Seq = uint64(cmd.Args.SeqNum) + req.ForAll = cmd.ForAll + if err := req.SetAction(int32(choices[cmd.Args.SelectedAction].Action)); err != nil { + return err + } + + if err := control.SystemCheckRepair(ctx, cmd.ctlInvoker, req); err != nil { + return err + } + + cmd.Info("Repair request sent") + + return nil +} diff --git a/src/control/cmd/dmg/check_test.go b/src/control/cmd/dmg/check_test.go new file mode 100644 index 00000000000..cdc390ce019 --- /dev/null +++ b/src/control/cmd/dmg/check_test.go @@ -0,0 +1,69 @@ +// +// (C) Copyright 2023 Intel Corporation. +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// + +package main + +import ( + "testing" + + chkpb "github.com/daos-stack/daos/src/control/common/proto/chk" + mgmtpb "github.com/daos-stack/daos/src/control/common/proto/mgmt" + "github.com/daos-stack/daos/src/control/lib/control" + "github.com/pkg/errors" +) + +func TestCheckGetPolicyCommand(t *testing.T) { + runCmdTests(t, []cmdTest{ + { + "Get policy with no arguments", + "check get-policy", + printRequest(t, &control.SystemCheckGetPolicyReq{CheckGetPolicyReq: mgmtpb.CheckGetPolicyReq{Sys: "daos_server-unset"}}), + nil, + }, + { + "Get policy for one class", + "check get-policy POOL_BAD_LABEL", + printRequest(t, &control.SystemCheckGetPolicyReq{ + CheckGetPolicyReq: mgmtpb.CheckGetPolicyReq{ + Sys: "daos_server-unset", + Classes: []chkpb.CheckInconsistClass{chkpb.CheckInconsistClass_CIC_POOL_BAD_LABEL}, + }, + }), + nil, + }, + { + "Get policy for multiple classes", + "check get-policy POOL_BAD_LABEL,CONT_BAD_LABEL", + printRequest(t, &control.SystemCheckGetPolicyReq{ + CheckGetPolicyReq: mgmtpb.CheckGetPolicyReq{ + Sys: "daos_server-unset", + Classes: []chkpb.CheckInconsistClass{ + chkpb.CheckInconsistClass_CIC_CONT_BAD_LABEL, + chkpb.CheckInconsistClass_CIC_POOL_BAD_LABEL, + }, + }, + }), + nil, + }, + { + "Get policy for invalid class", + "check get-policy garbage", + "", + errors.New("gettable property"), + }, + { + "Get policy latest used", + "check get-policy --last", + printRequest(t, &control.SystemCheckGetPolicyReq{ + CheckGetPolicyReq: mgmtpb.CheckGetPolicyReq{ + Sys: "daos_server-unset", + LastUsed: true, + }, + }), + nil, + }, + }) +} diff --git a/src/control/cmd/dmg/command_test.go b/src/control/cmd/dmg/command_test.go index 262c8076f2f..b587a6bab05 100644 --- a/src/control/cmd/dmg/command_test.go +++ b/src/control/cmd/dmg/command_test.go @@ -165,6 +165,22 @@ func (bci *bridgeConnInvoker) InvokeUnaryRPC(ctx context.Context, uReq control.U resp = control.MockMSResponse("", nil, &mgmtpb.PoolExtendResp{}) case *control.PoolReintegrateReq: resp = control.MockMSResponse("", nil, &mgmtpb.PoolReintegrateResp{}) + case *control.SystemCheckEnableReq: + resp = control.MockMSResponse("", nil, &mgmtpb.DaosResp{}) + case *control.SystemCheckDisableReq: + resp = control.MockMSResponse("", nil, &mgmtpb.DaosResp{}) + case *control.SystemCheckStartReq: + resp = control.MockMSResponse("", nil, &mgmtpb.DaosResp{}) + case *control.SystemCheckStopReq: + resp = control.MockMSResponse("", nil, &mgmtpb.DaosResp{}) + case *control.SystemCheckQueryReq: + resp = control.MockMSResponse("", nil, &mgmtpb.CheckQueryResp{}) + case *control.SystemCheckGetPolicyReq: + resp = control.MockMSResponse("", nil, &mgmtpb.CheckGetPolicyResp{}) + case *control.SystemCheckSetPolicyReq: + resp = control.MockMSResponse("", nil, &mgmtpb.DaosResp{}) + case *control.SystemCheckRepairReq: + resp = control.MockMSResponse("", nil, &mgmtpb.DaosResp{}) case *control.SystemSetAttrReq: resp = control.MockMSResponse("", nil, &mgmtpb.DaosResp{}) case *control.SystemGetAttrReq: diff --git a/src/control/cmd/dmg/fi.go b/src/control/cmd/dmg/fi.go new file mode 100644 index 00000000000..222d33c9308 --- /dev/null +++ b/src/control/cmd/dmg/fi.go @@ -0,0 +1,234 @@ +// +// (C) Copyright 2019-2022 Intel Corporation. +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// +//go:build fault_injection +// +build fault_injection + +package main + +import ( + "context" + "encoding/json" + "io/ioutil" + "math/rand" + "strings" + "time" + + "github.com/google/uuid" + "github.com/jessevdk/go-flags" + "github.com/pkg/errors" + "google.golang.org/grpc" + "google.golang.org/protobuf/proto" + + "github.com/daos-stack/daos/src/control/common/cmdutil" + chkpb "github.com/daos-stack/daos/src/control/common/proto/chk" + mgmtpb "github.com/daos-stack/daos/src/control/common/proto/mgmt" + "github.com/daos-stack/daos/src/control/lib/control" + "github.com/daos-stack/daos/src/control/lib/ranklist" + "github.com/daos-stack/daos/src/control/lib/ui" + "github.com/daos-stack/daos/src/control/system/checker" +) + +type faultsCmdRoot struct { + Faults faultCmd `command:"faults" description:"Inject system fault"` +} + +type faultCmd struct { + AddCheckerReport addCheckerReportCmd `command:"add-checker-report" description:"Add a system checker report"` + MgmtSvcFault mgmtSvcFaultCmd `command:"mgmt-svc" alias:"ms" description:"Inject management service fault"` + PoolSvcFault poolSvcFaultCmd `command:"pool-svc" alias:"ps" description:"Inject pool service fault"` +} + +type chkRptCls struct { + class control.SystemCheckFindingClass +} + +func (c chkRptCls) Complete(match string) (comps []flags.Completion) { + for _, cls := range control.CheckerPolicyClasses() { + if strings.HasPrefix(cls.String(), match) { + comps = append(comps, flags.Completion{Item: cls.String()}) + } + } + return +} + +func (c *chkRptCls) UnmarshalFlag(value string) error { + return c.class.FromString(value) +} + +func (c chkRptCls) ToProto() chkpb.CheckInconsistClass { + return chkpb.CheckInconsistClass(c.class) +} + +type addCheckerReportCmd struct { + cmdutil.JSONOutputCmd + baseCmd + ctlInvokerCmd + + File string `short:"f" long:"file" description:"File containing checker report in JSON format"` + Class chkRptCls `short:"c" long:"class" description:"Checker report class (canned reports)"` +} + +func (cmd *addCheckerReportCmd) Execute(_ []string) (errOut error) { + defer func() { + errOut = errors.Wrap(errOut, "add checker finding") + }() + + var rpt *chkpb.CheckReport + if cmd.File != "" { + buf, err := ioutil.ReadFile(cmd.File) + if err != nil { + return errors.Wrapf(err, "failed to open file %s", cmd.File) + } + rpt = new(chkpb.CheckReport) + if err := json.Unmarshal(buf, rpt); err != nil { + return errors.Wrapf(err, "failed to parse file %s", cmd.File) + } + } else { + rand.Seed(time.Now().UnixNano()) + + cls := cmd.Class.ToProto() + // Define some canned reports based on class. These can be used + // for prototyping and testing. For more control, define a report + // in JSON format and load it with the --file option. + switch cls { + case chkpb.CheckInconsistClass_CIC_POOL_BAD_LABEL: + rpt = &chkpb.CheckReport{ + Seq: rand.Uint64(), + Class: cls, + Action: chkpb.CheckInconsistAction_CIA_INTERACT, + PoolUuid: uuid.New().String(), + ActChoices: []chkpb.CheckInconsistAction{ + chkpb.CheckInconsistAction_CIA_TRUST_MS, + chkpb.CheckInconsistAction_CIA_TRUST_PS, + chkpb.CheckInconsistAction_CIA_IGNORE, + }, + ActDetails: []string{"ms-label", "ps-label"}, + } + default: + return errors.Errorf("no canned report for class: %s", cls) + } + + // For canned reports, annotate the report for nice messages. + // For reports loaded from file, don't annotate them, just use them as-is. + f := checker.AnnotateFinding(checker.NewFinding(rpt)) + rpt = &f.CheckReport + } + + if rpt.Class == chkpb.CheckInconsistClass_CIC_NONE { + return errors.New("class must be set") + } + + ctx := context.Background() + resp, err := control.InvokeFaultRPC(ctx, cmd.ctlInvoker, + func(ctx context.Context, conn *grpc.ClientConn) (proto.Message, error) { + cmd.Debugf("injecting checker report: %+v", rpt) + return mgmtpb.NewMgmtSvcClient(conn).FaultInjectReport(ctx, rpt) + }, + ) + + if cmd.JSONOutputEnabled() { + return cmd.OutputJSON(resp, nil) + } + + if err != nil { + return err + } + + cmd.Info("Checker report added") + + return nil +} + +type labelFlag struct { + ui.LabelOrUUIDFlag +} + +func (f *labelFlag) UnmarshalFlag(value string) error { + if err := f.LabelOrUUIDFlag.UnmarshalFlag(value); err != nil { + return errors.Wrap(err, "invalid label") + } + + if f.HasUUID() { + return errors.New("UUID is not a valid input") + } + + return nil +} + +type poolFaultCmd struct { + poolCmd + + SvcList ui.RankSetFlag `short:"s" long:"svcl" description:"List of pool service ranks"` + Label labelFlag `short:"l" long:"label" description:"Pool service label"` + + Args struct { + Class chkRptCls `positional-arg-name:"" description:"Checker report class" required:"1"` + } `positional-args:"yes"` +} + +type mgmtSvcFaultCmd struct { + Pool mgmtSvcPoolFaultCmd `command:"pool" description:"Modify a pool service entry in the MS DB"` +} + +type mgmtSvcPoolFaultCmd struct { + poolFaultCmd +} + +func (cmd *mgmtSvcPoolFaultCmd) Execute([]string) (errOut error) { + resp, err := control.InvokeFaultRPC(context.Background(), cmd.ctlInvoker, + func(ctx context.Context, conn *grpc.ClientConn) (proto.Message, error) { + return mgmtpb.NewMgmtSvcClient(conn).FaultInjectMgmtPoolFault(ctx, + &chkpb.Fault{ + Class: cmd.Args.Class.ToProto(), + Strings: []string{cmd.PoolID().String(), cmd.Label.Label}, + Uints: ranklist.RanksToUint32(cmd.SvcList.Ranks()), + }, + ) + }, + ) + + if cmd.JSONOutputEnabled() { + return cmd.OutputJSON(resp, nil) + } + + if err != nil { + return err + } + + cmd.Info("Mgmt service fault injected") + + return nil +} + +type poolSvcFaultCmd struct { + poolFaultCmd +} + +func (cmd *poolSvcFaultCmd) Execute([]string) (errOut error) { + resp, err := control.InvokeFaultRPC(context.Background(), cmd.ctlInvoker, + func(ctx context.Context, conn *grpc.ClientConn) (proto.Message, error) { + return mgmtpb.NewMgmtSvcClient(conn).FaultInjectPoolFault(ctx, + &chkpb.Fault{ + Class: cmd.Args.Class.ToProto(), + Strings: []string{cmd.PoolID().String(), cmd.Label.Label}, + Uints: ranklist.RanksToUint32(cmd.SvcList.Ranks()), + }, + ) + }, + ) + + if cmd.JSONOutputEnabled() { + return cmd.OutputJSON(resp, nil) + } + + if err != nil { + return err + } + + cmd.Info("Pool service fault injected") + + return nil +} diff --git a/src/control/cmd/dmg/fi_disabled.go b/src/control/cmd/dmg/fi_disabled.go new file mode 100644 index 00000000000..f52d90ccb90 --- /dev/null +++ b/src/control/cmd/dmg/fi_disabled.go @@ -0,0 +1,11 @@ +// +// (C) Copyright 2019-2022 Intel Corporation. +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// +//go:build !fault_injection +// +build !fault_injection + +package main + +type faultsCmdRoot struct{} diff --git a/src/control/cmd/dmg/json_test.go b/src/control/cmd/dmg/json_test.go index b4b14b8f19d..a332dec368b 100644 --- a/src/control/cmd/dmg/json_test.go +++ b/src/control/cmd/dmg/json_test.go @@ -67,7 +67,7 @@ func TestDmg_JsonOutput(t *testing.T) { testArgs := append([]string{"-i", "--json"}, args...) switch strings.Join(args, " ") { case "version", "telemetry config", "telemetry run", "config generate", - "manpage", "system set-prop", "support collect-log": + "manpage", "system set-prop", "support collect-log", "check repair": return case "storage nvme-rebind": testArgs = append(testArgs, "-l", "foo.com", "-a", @@ -109,6 +109,8 @@ func TestDmg_JsonOutput(t *testing.T) { return // These commands query via http directly case "system cleanup": testArgs = append(testArgs, "hostname") + case "check set-policy": + testArgs = append(testArgs, "POOL_BAD_LABEL:IGNORE") case "system set-attr": testArgs = append(testArgs, "foo:bar") case "system del-attr": diff --git a/src/control/cmd/dmg/main.go b/src/control/cmd/dmg/main.go index a15c65867a4..ed7362eda2b 100644 --- a/src/control/cmd/dmg/main.go +++ b/src/control/cmd/dmg/main.go @@ -127,8 +127,10 @@ type cliOptions struct { Cont ContCmd `command:"container" alias:"cont" description:"Perform tasks related to DAOS containers"` Version versionCmd `command:"version" description:"Print dmg version"` Telemetry telemCmd `command:"telemetry" alias:"telem" description:"Perform telemetry operations"` - firmwareOption // build with tag "firmware" to enable + Check checkCmdRoot `command:"check" description:"Check system health"` ManPage cmdutil.ManCmd `command:"manpage" hidden:"true"` + faultsCmdRoot // compiled out for release builds + firmwareOption // build with tag "firmware" to enable } type versionCmd struct { diff --git a/src/control/cmd/dmg/pretty/check.go b/src/control/cmd/dmg/pretty/check.go new file mode 100644 index 00000000000..b963293ec97 --- /dev/null +++ b/src/control/cmd/dmg/pretty/check.go @@ -0,0 +1,247 @@ +// +// (C) Copyright 2020-2023 Intel Corporation. +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// + +package pretty + +import ( + "fmt" + "io" + "sort" + + "github.com/dustin/go-humanize/english" + + "github.com/daos-stack/daos/src/control/common" + "github.com/daos-stack/daos/src/control/lib/control" + "github.com/daos-stack/daos/src/control/lib/txtfmt" +) + +const ( + idLabel = "ID" + classLabel = "Class" + poolLabel = "Pool" + contLabel = "Cont" + resLabel = "Resolution" + repairLabel = "Repair Options" +) + +// PrintCheckerPolicies displays a two-column table of checker policy classes and actions. +func PrintCheckerPolicies(out io.Writer, flags control.SystemCheckFlags, policies ...*control.SystemCheckPolicy) { + fmt.Fprintf(out, "Checker flags: %s\n\n", flags) + + nameTitle := "Inconsistency Class" + valueTitle := "Repair Action" + table := []txtfmt.TableRow{} + for _, policy := range policies { + if policy == nil { + continue + } + row := txtfmt.TableRow{} + row[nameTitle] = policy.FindingClass.String() + row[valueTitle] = policy.RepairAction.String() + table = append(table, row) + } + + tf := txtfmt.NewTableFormatter(nameTitle, valueTitle) + tf.InitWriter(out) + tf.Format(table) +} + +func countResultPools(resp *control.SystemCheckQueryResp) int { + if resp == nil { + return 0 + } + + poolMap := make(map[string]struct{}) + for _, pool := range resp.Pools { + // Don't include pools that were not checked. + if pool.Unchecked() { + continue + } + poolMap[pool.UUID] = struct{}{} + } + for _, report := range resp.Reports { + if report.IsRemovedPool() && report.PoolUuid != "" { + poolMap[report.PoolUuid] = struct{}{} + } + } + + return len(poolMap) +} + +// PrintCheckQueryResp prints the checker results to the console. +func PrintCheckQueryResp(out io.Writer, resp *control.SystemCheckQueryResp, verbose bool) { + fmt.Fprintln(out, "DAOS System Checker Info") + if resp == nil { + fmt.Fprintln(out, " No results found.") + return + } + + statusMsg := fmt.Sprintf("Current status: %s", resp.Status) + if resp.Status > control.SystemCheckStatusInit && resp.Status < control.SystemCheckStatusCompleted { + statusMsg += fmt.Sprintf(" (started at: %s)", common.FormatTime(resp.StartTime)) + } + fmt.Fprintf(out, " %s\n", statusMsg) + fmt.Fprintf(out, " Current phase: %s (%s)\n", resp.ScanPhase, resp.ScanPhase.Description()) + + // Toggle this output based on the status. If the checker is still running, we + // should show the number of pools being checked. If the checker has completed, + // we should show the number of unique pools found in the reports. + action := "Checking" + poolCount := countResultPools(resp) + if resp.Status == control.SystemCheckStatusCompleted { + action = "Checked" + } + if poolCount > 0 { + fmt.Fprintf(out, " %s %s\n", action, english.Plural(poolCount, "pool", "")) + } + + if len(resp.Pools) > 0 && verbose { + pools := make([]*control.SystemCheckPoolInfo, 0, len(resp.Pools)) + for _, pool := range resp.Pools { + pools = append(pools, pool) + } + sort.Slice(pools, func(i, j int) bool { + return pools[i].UUID < pools[j].UUID + }) + fmt.Fprintln(out, "\nPer-Pool Checker Info:") + for _, pool := range pools { + fmt.Fprintf(out, " %+v\n", pool) + } + } + + fmt.Fprintln(out) + if len(resp.Reports) == 0 { + fmt.Fprintln(out, "No reports to display.") + return + } + + fmt.Fprintln(out, "Inconsistency Reports:") + if verbose { + printInconsistencyReportsVerbose(out, resp) + } else { + printInconsistencyReportsTable(out, resp) + } +} + +func printInconsistencyReportsTable(out io.Writer, resp *control.SystemCheckQueryResp) { + resolvedTable := []txtfmt.TableRow{} + actionTable := []txtfmt.TableRow{} + resolvedHasCont := false + actionHasCont := false + for _, report := range resp.Reports { + tr := txtfmt.TableRow{} + + tr[idLabel] = fmt.Sprintf("0x%x", report.Seq) + tr[classLabel] = control.SystemCheckFindingClass(report.Class).String() + tr[poolLabel] = checkerPoolID(report, false) + + if report.ContUuid != "" { + if report.IsInteractive() { + actionHasCont = true + } else { + resolvedHasCont = true + } + tr[contLabel] = checkerContID(report, false) + } + + if report.IsInteractive() { + choices := report.RepairChoices() + for idx, choice := range choices { + if idx != 0 { + // choices appear on multiple lines + actionTable = append(actionTable, tr) + tr = txtfmt.TableRow{ + idLabel: "", + classLabel: "", + poolLabel: "", + contLabel: "", + resLabel: "", + } + } + tr[repairLabel] = fmt.Sprintf("%d: %s", idx, choice.Info) + } + + actionTable = append(actionTable, tr) + } else { + if res := report.Resolution(); res != "" { + tr[resLabel] = res + } + resolvedTable = append(resolvedTable, tr) + } + } + + printReportTable(out, "Resolved", resolvedHasCont, true, resolvedTable) + printReportTable(out, "Action Required", actionHasCont, false, actionTable) +} + +func printReportTable(out io.Writer, title string, hasCont, resolved bool, table []txtfmt.TableRow) { + if len(table) == 0 { + return + } + + cols := []string{idLabel, classLabel, poolLabel} + if hasCont { + cols = append(cols, contLabel) + } + + if resolved { + cols = append(cols, resLabel) + } else { + cols = append(cols, repairLabel) + } + + tw := txtfmt.NewTableFormatter(cols...) + fmt.Fprintf(out, "- %s:\n%s\n", title, tw.Format(table)) +} + +func checkerPoolID(report *control.SystemCheckReport, verbose bool) string { + poolID := report.PoolUuid + if report.PoolLabel != "" { + poolID = report.PoolLabel + if verbose { + poolID += fmt.Sprintf(" (%s)", report.PoolUuid) + } + } + return poolID +} + +func checkerContID(report *control.SystemCheckReport, verbose bool) string { + contID := report.ContUuid + if report.ContLabel != "" { + contID = report.ContLabel + if verbose { + contID += fmt.Sprintf(" (%s)", report.ContUuid) + } + } + return contID +} + +func printInconsistencyReportsVerbose(out io.Writer, resp *control.SystemCheckQueryResp) { + iw := txtfmt.NewIndentWriter(out) + for _, report := range resp.Reports { + cls := control.SystemCheckFindingClass(report.Class) + fmt.Fprintf(iw, "ID: 0x%x\n", report.Seq) + fmt.Fprintf(iw, "Class: %s\n", cls) + fmt.Fprintf(iw, "Message: %s\n", report.Msg) + fmt.Fprintf(iw, "Pool: %s\n", checkerPoolID(report, true)) + if report.ContUuid != "" { + fmt.Fprintf(iw, "Container: %s\n", checkerContID(report, true)) + } + if report.IsInteractive() { + fmt.Fprintf(iw, "Potential resolution actions:\n") + iw2 := txtfmt.NewIndentWriter(iw) + for i, choice := range report.RepairChoices() { + fmt.Fprintf(iw2, "%d: %s\n", i, choice.Info) + } + fmt.Fprintln(iw) + } else if res := report.Resolution(); res != "" { + fmt.Fprintf(iw, "Resolution: %s\n\n", res) + } else { + fmt.Fprintf(iw, "No resolutions available\n\n") + continue + } + } +} diff --git a/src/control/cmd/dmg/pretty/check_test.go b/src/control/cmd/dmg/pretty/check_test.go new file mode 100644 index 00000000000..4f2ff970995 --- /dev/null +++ b/src/control/cmd/dmg/pretty/check_test.go @@ -0,0 +1,538 @@ +// +// (C) Copyright 2023 Intel Corporation. +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// + +package pretty_test + +import ( + "bytes" + "strings" + "testing" + "time" + + "github.com/google/go-cmp/cmp" + + "github.com/daos-stack/daos/src/control/cmd/dmg/pretty" + chkpb "github.com/daos-stack/daos/src/control/common/proto/chk" + "github.com/daos-stack/daos/src/control/lib/control" +) + +func TestPretty_PrintCheckQueryResp(t *testing.T) { + checkTime, err := time.Parse(time.RFC822Z, "20 Mar 23 10:07 -0500") + if err != nil { + t.Fatal(err) + } + + for name, tc := range map[string]struct { + resp *control.SystemCheckQueryResp + verbose bool + expOut string + }{ + "empty": { + expOut: ` +DAOS System Checker Info + No results found. +`, + }, + "(verbose) 2 pools being checked": { + resp: &control.SystemCheckQueryResp{ + Status: control.SystemCheckStatusRunning, + ScanPhase: control.SystemCheckScanPhaseContainerList, + StartTime: checkTime, + Pools: map[string]*control.SystemCheckPoolInfo{ + "pool-1": { + UUID: "pool-1", + Status: chkpb.CheckPoolStatus_CPS_CHECKING.String(), + Phase: chkpb.CheckScanPhase_CSP_PREPARE.String(), + StartTime: checkTime, + }, + "pool-2": { + UUID: "pool-2", + Status: chkpb.CheckPoolStatus_CPS_CHECKING.String(), + Phase: chkpb.CheckScanPhase_CSP_PREPARE.String(), + StartTime: checkTime, + }, + }, + }, + verbose: true, + expOut: ` +DAOS System Checker Info + Current status: RUNNING (started at: 2023-03-20T10:07:00.000-05:00) + Current phase: CONT_LIST (Comparing container list on PS and storage nodes) + Checking 2 pools + +Per-Pool Checker Info: + Pool pool-1: 0 ranks, status: CPS_CHECKING, phase: CSP_PREPARE, started: 2023-03-20T10:07:00.000-05:00 + Pool pool-2: 0 ranks, status: CPS_CHECKING, phase: CSP_PREPARE, started: 2023-03-20T10:07:00.000-05:00 + +No reports to display. +`, + }, + "(verbose) 3 pools repaired; 1 unchecked; 1 removed": { + resp: &control.SystemCheckQueryResp{ + Status: control.SystemCheckStatusCompleted, + ScanPhase: control.SystemCheckScanPhaseDone, + Pools: map[string]*control.SystemCheckPoolInfo{ + "pool-1": { + UUID: "pool-1", + Status: chkpb.CheckPoolStatus_CPS_CHECKED.String(), + Phase: chkpb.CheckScanPhase_CSP_DONE.String(), + StartTime: checkTime, + }, + "pool-2": { + UUID: "pool-2", + Status: chkpb.CheckPoolStatus_CPS_CHECKED.String(), + Phase: chkpb.CheckScanPhase_CSP_DONE.String(), + StartTime: checkTime, + }, + "pool-3": { + UUID: "pool-3", + Status: chkpb.CheckPoolStatus_CPS_CHECKED.String(), + Phase: chkpb.CheckScanPhase_CSP_DONE.String(), + StartTime: checkTime, + }, + "pool-5": { + UUID: "pool-5", + Status: chkpb.CheckPoolStatus_CPS_UNCHECKED.String(), + Phase: chkpb.CheckScanPhase_CSP_PREPARE.String(), + }, + }, + Reports: []*control.SystemCheckReport{ + { + CheckReport: chkpb.CheckReport{ + Seq: 1, + Class: chkpb.CheckInconsistClass_CIC_POOL_BAD_SVCL, + Action: chkpb.CheckInconsistAction_CIA_IGNORE, + Msg: "message 1", + PoolUuid: "pool-1", + }, + }, + { + CheckReport: chkpb.CheckReport{ + Seq: 2, + Class: chkpb.CheckInconsistClass_CIC_POOL_BAD_LABEL, + Action: chkpb.CheckInconsistAction_CIA_TRUST_MS, + Msg: "message 2", + PoolUuid: "pool-2", + }, + }, + { + CheckReport: chkpb.CheckReport{ + Seq: 3, + Class: chkpb.CheckInconsistClass_CIC_POOL_LESS_SVC_WITHOUT_QUORUM, + Action: chkpb.CheckInconsistAction_CIA_TRUST_PS, + Msg: "message 3", + PoolUuid: "pool-3", + }, + }, + { + CheckReport: chkpb.CheckReport{ + Seq: 4, + Class: chkpb.CheckInconsistClass_CIC_POOL_NONEXIST_ON_ENGINE, + Action: chkpb.CheckInconsistAction_CIA_DISCARD, + Msg: "message 4", + PoolUuid: "pool-4", + }, + }, + }, + }, + verbose: true, + expOut: ` +DAOS System Checker Info + Current status: COMPLETED + Current phase: DONE (Check completed) + Checked 4 pools + +Per-Pool Checker Info: + Pool pool-1: 0 ranks, status: CPS_CHECKED, phase: CSP_DONE, started: 2023-03-20T10:07:00.000-05:00 + Pool pool-2: 0 ranks, status: CPS_CHECKED, phase: CSP_DONE, started: 2023-03-20T10:07:00.000-05:00 + Pool pool-3: 0 ranks, status: CPS_CHECKED, phase: CSP_DONE, started: 2023-03-20T10:07:00.000-05:00 + Pool pool-5: 0 ranks, status: CPS_UNCHECKED, phase: CSP_PREPARE + +Inconsistency Reports: + ID: 0x1 + Class: POOL_BAD_SVCL + Message: message 1 + Pool: pool-1 + Resolution: IGNORE + + ID: 0x2 + Class: POOL_BAD_LABEL + Message: message 2 + Pool: pool-2 + Resolution: TRUST_MS + + ID: 0x3 + Class: POOL_LESS_SVC_WITHOUT_QUORUM + Message: message 3 + Pool: pool-3 + Resolution: TRUST_PS + + ID: 0x4 + Class: POOL_NONEXIST_ON_ENGINE + Message: message 4 + Pool: pool-4 + Resolution: DISCARD + +`, + }, + "non-verbose": { + resp: &control.SystemCheckQueryResp{ + Status: control.SystemCheckStatusCompleted, + ScanPhase: control.SystemCheckScanPhaseDone, + Pools: map[string]*control.SystemCheckPoolInfo{ + "pool-1": { + UUID: "pool-1", + Status: chkpb.CheckPoolStatus_CPS_CHECKED.String(), + Phase: chkpb.CheckScanPhase_CSP_DONE.String(), + StartTime: checkTime, + }, + "pool-2": { + UUID: "pool-2", + Status: chkpb.CheckPoolStatus_CPS_CHECKED.String(), + Phase: chkpb.CheckScanPhase_CSP_DONE.String(), + StartTime: checkTime, + }, + "pool-3": { + UUID: "pool-3", + Status: chkpb.CheckPoolStatus_CPS_CHECKED.String(), + Phase: chkpb.CheckScanPhase_CSP_DONE.String(), + StartTime: checkTime, + }, + "pool-5": { + UUID: "pool-5", + Status: chkpb.CheckPoolStatus_CPS_UNCHECKED.String(), + Phase: chkpb.CheckScanPhase_CSP_PREPARE.String(), + }, + }, + Reports: []*control.SystemCheckReport{ + { + CheckReport: chkpb.CheckReport{ + Seq: 1, + Class: chkpb.CheckInconsistClass_CIC_POOL_BAD_SVCL, + Action: chkpb.CheckInconsistAction_CIA_IGNORE, + Msg: "message 1", + PoolUuid: "pool-1", + }, + }, + { + CheckReport: chkpb.CheckReport{ + Seq: 2, + Class: chkpb.CheckInconsistClass_CIC_POOL_BAD_LABEL, + Action: chkpb.CheckInconsistAction_CIA_TRUST_MS, + Msg: "message 2", + PoolUuid: "pool-2", + }, + }, + { + CheckReport: chkpb.CheckReport{ + Seq: 3, + Class: chkpb.CheckInconsistClass_CIC_POOL_LESS_SVC_WITHOUT_QUORUM, + Action: chkpb.CheckInconsistAction_CIA_TRUST_PS, + Msg: "message 3", + PoolUuid: "pool-3", + }, + }, + { + CheckReport: chkpb.CheckReport{ + Seq: 4, + Class: chkpb.CheckInconsistClass_CIC_POOL_NONEXIST_ON_ENGINE, + Action: chkpb.CheckInconsistAction_CIA_DISCARD, + Msg: "message 4", + PoolUuid: "pool-4", + }, + }, + }, + }, + expOut: ` +DAOS System Checker Info + Current status: COMPLETED + Current phase: DONE (Check completed) + Checked 4 pools + +Inconsistency Reports: +- Resolved: +ID Class Pool Resolution +-- ----- ---- ---------- +0x1 POOL_BAD_SVCL pool-1 IGNORE +0x2 POOL_BAD_LABEL pool-2 TRUST_MS +0x3 POOL_LESS_SVC_WITHOUT_QUORUM pool-3 TRUST_PS +0x4 POOL_NONEXIST_ON_ENGINE pool-4 DISCARD + +`, + }, + "non-verbose with container": { + resp: &control.SystemCheckQueryResp{ + Status: control.SystemCheckStatusCompleted, + ScanPhase: control.SystemCheckScanPhaseDone, + Pools: map[string]*control.SystemCheckPoolInfo{ + "pool-1": { + UUID: "pool-1", + Status: chkpb.CheckPoolStatus_CPS_CHECKED.String(), + Phase: chkpb.CheckScanPhase_CSP_DONE.String(), + StartTime: checkTime, + }, + "pool-2": { + UUID: "pool-2", + Status: chkpb.CheckPoolStatus_CPS_CHECKED.String(), + Phase: chkpb.CheckScanPhase_CSP_DONE.String(), + StartTime: checkTime, + }, + }, + Reports: []*control.SystemCheckReport{ + { + CheckReport: chkpb.CheckReport{ + Seq: 1, + Class: chkpb.CheckInconsistClass_CIC_CONT_NONEXIST_ON_PS, + Action: chkpb.CheckInconsistAction_CIA_IGNORE, + Msg: "message 1", + PoolUuid: "pool-1", + ContUuid: "cont-1", + }, + }, + { + CheckReport: chkpb.CheckReport{ + Seq: 2, + Class: chkpb.CheckInconsistClass_CIC_POOL_BAD_LABEL, + Action: chkpb.CheckInconsistAction_CIA_TRUST_MS, + Msg: "message 2", + PoolUuid: "pool-2", + }, + }, + }, + }, + expOut: ` +DAOS System Checker Info + Current status: COMPLETED + Current phase: DONE (Check completed) + Checked 2 pools + +Inconsistency Reports: +- Resolved: +ID Class Pool Cont Resolution +-- ----- ---- ---- ---------- +0x1 CONT_NONEXIST_ON_PS pool-1 cont-1 IGNORE +0x2 POOL_BAD_LABEL pool-2 None TRUST_MS + +`, + }, + "non-verbose with resolved and interactive": { + resp: &control.SystemCheckQueryResp{ + Status: control.SystemCheckStatusCompleted, + ScanPhase: control.SystemCheckScanPhaseDone, + Pools: map[string]*control.SystemCheckPoolInfo{ + "pool-1": { + UUID: "pool-1", + Status: chkpb.CheckPoolStatus_CPS_CHECKED.String(), + Phase: chkpb.CheckScanPhase_CSP_DONE.String(), + StartTime: checkTime, + }, + "pool-2": { + UUID: "pool-2", + Status: chkpb.CheckPoolStatus_CPS_CHECKED.String(), + Phase: chkpb.CheckScanPhase_CSP_DONE.String(), + StartTime: checkTime, + }, + }, + Reports: []*control.SystemCheckReport{ + { + CheckReport: chkpb.CheckReport{ + Seq: 1, + Class: chkpb.CheckInconsistClass_CIC_POOL_BAD_LABEL, + Action: chkpb.CheckInconsistAction_CIA_INTERACT, + Msg: "message 1", + PoolUuid: "pool-1", + ActChoices: []chkpb.CheckInconsistAction{ + chkpb.CheckInconsistAction_CIA_TRUST_MS, + chkpb.CheckInconsistAction_CIA_TRUST_PS, + }, + ActDetails: []string{"trust MS details", "trust PS details"}, + ActMsgs: []string{"trust MS", "trust PS"}, + }, + }, + { + CheckReport: chkpb.CheckReport{ + Seq: 2, + Class: chkpb.CheckInconsistClass_CIC_POOL_BAD_LABEL, + Action: chkpb.CheckInconsistAction_CIA_TRUST_MS, + Msg: "message 2", + PoolUuid: "pool-2", + }, + }, + { + CheckReport: chkpb.CheckReport{ + Seq: 3, + Class: chkpb.CheckInconsistClass_CIC_CONT_NONEXIST_ON_PS, + Action: chkpb.CheckInconsistAction_CIA_INTERACT, + Msg: "message 3", + PoolUuid: "pool-2", + ContUuid: "cont-1", + ActChoices: []chkpb.CheckInconsistAction{ + chkpb.CheckInconsistAction_CIA_IGNORE, + chkpb.CheckInconsistAction_CIA_TRUST_PS, + chkpb.CheckInconsistAction_CIA_TRUST_MS, + }, + ActDetails: []string{"ignore details", "trust PS details", "trust MS details"}, + ActMsgs: []string{"ignore", "trust PS", "trust MS"}, + }, + }, + }, + }, + expOut: ` +DAOS System Checker Info + Current status: COMPLETED + Current phase: DONE (Check completed) + Checked 2 pools + +Inconsistency Reports: +- Resolved: +ID Class Pool Resolution +-- ----- ---- ---------- +0x2 POOL_BAD_LABEL pool-2 TRUST_MS + +- Action Required: +ID Class Pool Cont Repair Options +-- ----- ---- ---- -------------- +0x1 POOL_BAD_LABEL pool-1 None 0: trust MS details + 1: trust PS details +0x3 CONT_NONEXIST_ON_PS pool-2 cont-1 0: ignore details + 1: trust PS details + 2: trust MS details + +`, + }, + "non-verbose with interactive only": { + resp: &control.SystemCheckQueryResp{ + Status: control.SystemCheckStatusCompleted, + ScanPhase: control.SystemCheckScanPhaseDone, + Pools: map[string]*control.SystemCheckPoolInfo{ + "pool-1": { + UUID: "pool-1", + Status: chkpb.CheckPoolStatus_CPS_CHECKED.String(), + Phase: chkpb.CheckScanPhase_CSP_DONE.String(), + StartTime: checkTime, + }, + "pool-2": { + UUID: "pool-2", + Status: chkpb.CheckPoolStatus_CPS_CHECKED.String(), + Phase: chkpb.CheckScanPhase_CSP_DONE.String(), + StartTime: checkTime, + }, + }, + Reports: []*control.SystemCheckReport{ + { + CheckReport: chkpb.CheckReport{ + Seq: 1, + Class: chkpb.CheckInconsistClass_CIC_POOL_BAD_LABEL, + Action: chkpb.CheckInconsistAction_CIA_INTERACT, + Msg: "message 1", + PoolUuid: "pool-1", + ActChoices: []chkpb.CheckInconsistAction{ + chkpb.CheckInconsistAction_CIA_TRUST_MS, + chkpb.CheckInconsistAction_CIA_TRUST_PS, + }, + ActDetails: []string{"trust MS details", "trust PS details"}, + ActMsgs: []string{"trust MS", "trust PS"}, + }, + }, + { + CheckReport: chkpb.CheckReport{ + Seq: 3, + Class: chkpb.CheckInconsistClass_CIC_CONT_NONEXIST_ON_PS, + Action: chkpb.CheckInconsistAction_CIA_INTERACT, + Msg: "message 3", + PoolUuid: "pool-2", + ContUuid: "cont-1", + ActChoices: []chkpb.CheckInconsistAction{ + chkpb.CheckInconsistAction_CIA_IGNORE, + chkpb.CheckInconsistAction_CIA_TRUST_PS, + chkpb.CheckInconsistAction_CIA_TRUST_MS, + }, + ActDetails: []string{"ignore details", "trust PS details", "trust MS details"}, + ActMsgs: []string{"ignore", "trust PS", "trust MS"}, + }, + }, + }, + }, + expOut: ` +DAOS System Checker Info + Current status: COMPLETED + Current phase: DONE (Check completed) + Checked 2 pools + +Inconsistency Reports: +- Action Required: +ID Class Pool Cont Repair Options +-- ----- ---- ---- -------------- +0x1 POOL_BAD_LABEL pool-1 None 0: trust MS details + 1: trust PS details +0x3 CONT_NONEXIST_ON_PS pool-2 cont-1 0: ignore details + 1: trust PS details + 2: trust MS details + +`, + }, + "verbose interactive": { + resp: &control.SystemCheckQueryResp{ + Status: control.SystemCheckStatusCompleted, + ScanPhase: control.SystemCheckScanPhaseDone, + Pools: map[string]*control.SystemCheckPoolInfo{ + "pool-1": { + UUID: "pool-1", + Status: chkpb.CheckPoolStatus_CPS_CHECKED.String(), + Phase: chkpb.CheckScanPhase_CSP_DONE.String(), + StartTime: checkTime, + }, + }, + Reports: []*control.SystemCheckReport{ + { + CheckReport: chkpb.CheckReport{ + Seq: 1, + Class: chkpb.CheckInconsistClass_CIC_POOL_BAD_LABEL, + Action: chkpb.CheckInconsistAction_CIA_INTERACT, + Msg: "message 1", + PoolUuid: "pool-1", + ActChoices: []chkpb.CheckInconsistAction{ + chkpb.CheckInconsistAction_CIA_TRUST_MS, + chkpb.CheckInconsistAction_CIA_TRUST_PS, + }, + ActDetails: []string{"trust MS details", "trust PS details"}, + ActMsgs: []string{"trust MS", "trust PS"}, + }, + }, + }, + }, + verbose: true, + expOut: ` +DAOS System Checker Info + Current status: COMPLETED + Current phase: DONE (Check completed) + Checked 1 pool + +Per-Pool Checker Info: + Pool pool-1: 0 ranks, status: CPS_CHECKED, phase: CSP_DONE, started: 2023-03-20T10:07:00.000-05:00 + +Inconsistency Reports: + ID: 0x1 + Class: POOL_BAD_LABEL + Message: message 1 + Pool: pool-1 + Potential resolution actions: + 0: trust MS details + 1: trust PS details + +`, + }, + } { + t.Run(name, func(t *testing.T) { + var buf bytes.Buffer + pretty.PrintCheckQueryResp(&buf, tc.resp, tc.verbose) + got := buf.String() + if diff := cmp.Diff(strings.TrimLeft(tc.expOut, "\n"), got); diff != "" { + t.Fatalf("unexpected output (-want, +got):\n%s", diff) + } + }) + } +} diff --git a/src/control/common/proto/chk/chk.pb.go b/src/control/common/proto/chk/chk.pb.go new file mode 100644 index 00000000000..54be7068a93 --- /dev/null +++ b/src/control/common/proto/chk/chk.pb.go @@ -0,0 +1,965 @@ +// +// (C) Copyright 2022 Intel Corporation. +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// + +// Code generated by protoc-gen-go. DO NOT EDIT. +// versions: +// protoc-gen-go v1.30.0 +// protoc v3.5.0 +// source: chk/chk.proto + +package chk + +import ( + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" + reflect "reflect" + sync "sync" +) + +const ( + // Verify that this generated code is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) + // Verify that runtime/protoimpl is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) +) + +// Kinds of DAOS global inconsistency. +type CheckInconsistClass int32 + +const ( + // Consistent cases. + CheckInconsistClass_CIC_NONE CheckInconsistClass = 0 + // Only a subset of the pool services are present but we will have a quorum. + // Default action: CIA_IGNORE. + CheckInconsistClass_CIC_POOL_LESS_SVC_WITH_QUORUM CheckInconsistClass = 1 + // Only a subset of the pool services are present, and we don't have a quorum. + // Default action: CIA_INTERACT. + CheckInconsistClass_CIC_POOL_LESS_SVC_WITHOUT_QUORUM CheckInconsistClass = 2 + // More members are reported than the pool service was created with. + // Default action: CIA_DISCARD. Remove unrecognized pool service. + CheckInconsistClass_CIC_POOL_MORE_SVC CheckInconsistClass = 3 + // Engine(s) claim the pool which is not registered to MS. + // Default action: CIA_READD. Register the pool to the MS. + CheckInconsistClass_CIC_POOL_NONEXIST_ON_MS CheckInconsistClass = 4 + // Pool is registered to MS but not claimed by any engine. + // Default action: CIA_DISCARD. De-register pool from MS. + CheckInconsistClass_CIC_POOL_NONEXIST_ON_ENGINE CheckInconsistClass = 5 + // Svcl list stored in MS does not match the actual PS membership. + // Default action: CIA_TRUST_PS. Refresh svcl list in MS DB. + CheckInconsistClass_CIC_POOL_BAD_SVCL CheckInconsistClass = 6 + // The pool label recorded by MS does not match the pool label property from PS. + // Default action: CIA_TRUST_PS. Refresh label in MS DB. + CheckInconsistClass_CIC_POOL_BAD_LABEL CheckInconsistClass = 7 + // An engine has some allocated storage but does not appear in pool map. + // Default action: CIA_DISCARD. Associated files and blobs will be deleted from the engine. + CheckInconsistClass_CIC_ENGINE_NONEXIST_IN_MAP CheckInconsistClass = 8 + // An engine has some allocated storage and is marked as down/downout in pool map. + // Default action: CIA_IGNORE. It can be reintegrated after CR scan. + CheckInconsistClass_CIC_ENGINE_DOWN_IN_MAP CheckInconsistClass = 9 + // An engine is referenced in pool map, but no storage is actually allocated on this engine. + // Default action: CIA_DISCARD. Evict the rank from pool map, give left things to rebuild. + CheckInconsistClass_CIC_ENGINE_HAS_NO_STORAGE CheckInconsistClass = 10 + // Containers that have storage allocated on engine but does not exist in the PS. + // Default action: CIA_DISCARD. Destrory the unrecognized container. + CheckInconsistClass_CIC_CONT_NONEXIST_ON_PS CheckInconsistClass = 11 + // The container label recorded by PS does not match the container label property. + // Default action: CIA_TRUST_PS. Refresh label property on related target(s). + CheckInconsistClass_CIC_CONT_BAD_LABEL CheckInconsistClass = 12 + // The DTX is corrupted, some participant RDG(s) may be lost. + // Default action: CIA_INTERACT. + CheckInconsistClass_CIC_DTX_CORRUPTED CheckInconsistClass = 13 + // The DTX entry on leader does not exist, then not sure the status. + // Default action: CIA_DISCARD. It is equal to abort the DTX and may lost data on related + // shard, then we may found data inconsistency in subseqeunt CR scan phase, at that time, + // such data inconsistency will be fixed. + CheckInconsistClass_CIC_DTX_ORPHAN CheckInconsistClass = 14 + // The checksum information is lost. + // Default action: CIA_READD. We have to trust the data and recalculate the checksum. If + // data is corrupted, then we may hit data inconsistency in subseqeunt CR scan phase, at + // that time, such data inconsistency will be fixed. + CheckInconsistClass_CIC_CSUM_LOST CheckInconsistClass = 15 + // Checksum related inconsistency or data corruption. + // Default action: CIA_DISCARD. Then we will hit data lost in subseqeunt CR scan phase, + // at that time, such data inconsistency will be fixed. + CheckInconsistClass_CIC_CSUM_FAILURE CheckInconsistClass = 16 + // Replicated object lost some replica(s). + // Default action: CIA_READD. Copy from another valid replica. + CheckInconsistClass_CIC_OBJ_LOST_REP CheckInconsistClass = 17 + // EC object lost parity or data shard(s). + // Default action: CIA_READD. Trust other available shards and recalculate the lost one(s). + CheckInconsistClass_CIC_OBJ_LOST_EC_SHARD CheckInconsistClass = 18 + // EC object lost too many shards that exceeds its redundancy. + // Default action: CIA_INTERACT. Ask the admin to decide whether keep or remove the object. + CheckInconsistClass_CIC_OBJ_LOST_EC_DATA CheckInconsistClass = 19 + // Data inconsistency among replicas + // Default action: CIA_TRUST_LATEST. Try to keep the latest data. If all have the same epoch, + // then ask the admin (CIA_INTERACT) to decide which one will be trusted. + CheckInconsistClass_CIC_OBJ_DATA_INCONSIST CheckInconsistClass = 20 + // Unknown inconsistency. + // Default action: CIA_IGNORE. + CheckInconsistClass_CIC_UNKNOWN CheckInconsistClass = 100 +) + +// Enum value maps for CheckInconsistClass. +var ( + CheckInconsistClass_name = map[int32]string{ + 0: "CIC_NONE", + 1: "CIC_POOL_LESS_SVC_WITH_QUORUM", + 2: "CIC_POOL_LESS_SVC_WITHOUT_QUORUM", + 3: "CIC_POOL_MORE_SVC", + 4: "CIC_POOL_NONEXIST_ON_MS", + 5: "CIC_POOL_NONEXIST_ON_ENGINE", + 6: "CIC_POOL_BAD_SVCL", + 7: "CIC_POOL_BAD_LABEL", + 8: "CIC_ENGINE_NONEXIST_IN_MAP", + 9: "CIC_ENGINE_DOWN_IN_MAP", + 10: "CIC_ENGINE_HAS_NO_STORAGE", + 11: "CIC_CONT_NONEXIST_ON_PS", + 12: "CIC_CONT_BAD_LABEL", + 13: "CIC_DTX_CORRUPTED", + 14: "CIC_DTX_ORPHAN", + 15: "CIC_CSUM_LOST", + 16: "CIC_CSUM_FAILURE", + 17: "CIC_OBJ_LOST_REP", + 18: "CIC_OBJ_LOST_EC_SHARD", + 19: "CIC_OBJ_LOST_EC_DATA", + 20: "CIC_OBJ_DATA_INCONSIST", + 100: "CIC_UNKNOWN", + } + CheckInconsistClass_value = map[string]int32{ + "CIC_NONE": 0, + "CIC_POOL_LESS_SVC_WITH_QUORUM": 1, + "CIC_POOL_LESS_SVC_WITHOUT_QUORUM": 2, + "CIC_POOL_MORE_SVC": 3, + "CIC_POOL_NONEXIST_ON_MS": 4, + "CIC_POOL_NONEXIST_ON_ENGINE": 5, + "CIC_POOL_BAD_SVCL": 6, + "CIC_POOL_BAD_LABEL": 7, + "CIC_ENGINE_NONEXIST_IN_MAP": 8, + "CIC_ENGINE_DOWN_IN_MAP": 9, + "CIC_ENGINE_HAS_NO_STORAGE": 10, + "CIC_CONT_NONEXIST_ON_PS": 11, + "CIC_CONT_BAD_LABEL": 12, + "CIC_DTX_CORRUPTED": 13, + "CIC_DTX_ORPHAN": 14, + "CIC_CSUM_LOST": 15, + "CIC_CSUM_FAILURE": 16, + "CIC_OBJ_LOST_REP": 17, + "CIC_OBJ_LOST_EC_SHARD": 18, + "CIC_OBJ_LOST_EC_DATA": 19, + "CIC_OBJ_DATA_INCONSIST": 20, + "CIC_UNKNOWN": 100, + } +) + +func (x CheckInconsistClass) Enum() *CheckInconsistClass { + p := new(CheckInconsistClass) + *p = x + return p +} + +func (x CheckInconsistClass) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (CheckInconsistClass) Descriptor() protoreflect.EnumDescriptor { + return file_chk_chk_proto_enumTypes[0].Descriptor() +} + +func (CheckInconsistClass) Type() protoreflect.EnumType { + return &file_chk_chk_proto_enumTypes[0] +} + +func (x CheckInconsistClass) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Use CheckInconsistClass.Descriptor instead. +func (CheckInconsistClass) EnumDescriptor() ([]byte, []int) { + return file_chk_chk_proto_rawDescGZIP(), []int{0} +} + +// Actions for how to handle kinds of inconsistency. +type CheckInconsistAction int32 + +const ( + // Default action, depends on the detailed inconsistency class. + CheckInconsistAction_CIA_DEFAULT CheckInconsistAction = 0 + // Interact with administrator for further action. + CheckInconsistAction_CIA_INTERACT CheckInconsistAction = 1 + // Ignore but log the inconsistency. + CheckInconsistAction_CIA_IGNORE CheckInconsistAction = 2 + // Discard the unrecognized element: pool service, pool itself, container, and so on. + CheckInconsistAction_CIA_DISCARD CheckInconsistAction = 3 + // Re-add the missing element: pool to MS, target to pool map, and so on. + CheckInconsistAction_CIA_READD CheckInconsistAction = 4 + // Trust the information recorded in MS DB. + CheckInconsistAction_CIA_TRUST_MS CheckInconsistAction = 5 + // Trust the information recorded in PS DB. + CheckInconsistAction_CIA_TRUST_PS CheckInconsistAction = 6 + // Trust the information recorded by target(s). + CheckInconsistAction_CIA_TRUST_TARGET CheckInconsistAction = 7 + // Trust the majority parts (if have). + CheckInconsistAction_CIA_TRUST_MAJORITY CheckInconsistAction = 8 + // Trust the one with latest (pool map or epoch) information. Keep the latest data. + CheckInconsistAction_CIA_TRUST_LATEST CheckInconsistAction = 9 + // Trust the one with oldest (pool map or epoch) information. Rollback to old version. + CheckInconsistAction_CIA_TRUST_OLDEST CheckInconsistAction = 10 + // Trust EC parity shard. + CheckInconsistAction_CIA_TRUST_EC_PARITY CheckInconsistAction = 11 + // Trust EC data shard. + CheckInconsistAction_CIA_TRUST_EC_DATA CheckInconsistAction = 12 +) + +// Enum value maps for CheckInconsistAction. +var ( + CheckInconsistAction_name = map[int32]string{ + 0: "CIA_DEFAULT", + 1: "CIA_INTERACT", + 2: "CIA_IGNORE", + 3: "CIA_DISCARD", + 4: "CIA_READD", + 5: "CIA_TRUST_MS", + 6: "CIA_TRUST_PS", + 7: "CIA_TRUST_TARGET", + 8: "CIA_TRUST_MAJORITY", + 9: "CIA_TRUST_LATEST", + 10: "CIA_TRUST_OLDEST", + 11: "CIA_TRUST_EC_PARITY", + 12: "CIA_TRUST_EC_DATA", + } + CheckInconsistAction_value = map[string]int32{ + "CIA_DEFAULT": 0, + "CIA_INTERACT": 1, + "CIA_IGNORE": 2, + "CIA_DISCARD": 3, + "CIA_READD": 4, + "CIA_TRUST_MS": 5, + "CIA_TRUST_PS": 6, + "CIA_TRUST_TARGET": 7, + "CIA_TRUST_MAJORITY": 8, + "CIA_TRUST_LATEST": 9, + "CIA_TRUST_OLDEST": 10, + "CIA_TRUST_EC_PARITY": 11, + "CIA_TRUST_EC_DATA": 12, + } +) + +func (x CheckInconsistAction) Enum() *CheckInconsistAction { + p := new(CheckInconsistAction) + *p = x + return p +} + +func (x CheckInconsistAction) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (CheckInconsistAction) Descriptor() protoreflect.EnumDescriptor { + return file_chk_chk_proto_enumTypes[1].Descriptor() +} + +func (CheckInconsistAction) Type() protoreflect.EnumType { + return &file_chk_chk_proto_enumTypes[1] +} + +func (x CheckInconsistAction) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Use CheckInconsistAction.Descriptor instead. +func (CheckInconsistAction) EnumDescriptor() ([]byte, []int) { + return file_chk_chk_proto_rawDescGZIP(), []int{1} +} + +// The flags to control DAOS check general behavior, not related with any detailed inconsistency. +type CheckFlag int32 + +const ( + CheckFlag_CF_NONE CheckFlag = 0 + // Only scan without real repairing inconsistency. + CheckFlag_CF_DRYRUN CheckFlag = 1 + // Start DAOS check from the beginning. + // Otherwise, resume the DAOS check from the latest checkpoint by default. + CheckFlag_CF_RESET CheckFlag = 2 + // Stop DAOS check if hit unknown inconsistency or fail to repair some inconsistency. + // Otherwise, mark 'fail' on related component and continue to handle next one by default. + CheckFlag_CF_FAILOUT CheckFlag = 4 + // If the admin does not want to interact with engine during check scan, then CIA_INTERACT + // will be converted to CIA_IGNORE. That will overwrite the CheckInconsistPolicy. + CheckFlag_CF_AUTO CheckFlag = 8 + // Handle orphan pool when start the check instance. If not specify the flag, some orphan + // pool(s) may be not handled (by default) unless all pools are checked from the scratch. + CheckFlag_CF_ORPHAN_POOL CheckFlag = 16 + // Overwrite former set CF_FAILOUT flag, cannot be specified together with CF_FAILOUT. + CheckFlag_CF_NO_FAILOUT CheckFlag = 32 + // Overwrite former set CF_AUTO flag, cannot be specified together with CF_AUTO. + CheckFlag_CF_NO_AUTO CheckFlag = 64 +) + +// Enum value maps for CheckFlag. +var ( + CheckFlag_name = map[int32]string{ + 0: "CF_NONE", + 1: "CF_DRYRUN", + 2: "CF_RESET", + 4: "CF_FAILOUT", + 8: "CF_AUTO", + 16: "CF_ORPHAN_POOL", + 32: "CF_NO_FAILOUT", + 64: "CF_NO_AUTO", + } + CheckFlag_value = map[string]int32{ + "CF_NONE": 0, + "CF_DRYRUN": 1, + "CF_RESET": 2, + "CF_FAILOUT": 4, + "CF_AUTO": 8, + "CF_ORPHAN_POOL": 16, + "CF_NO_FAILOUT": 32, + "CF_NO_AUTO": 64, + } +) + +func (x CheckFlag) Enum() *CheckFlag { + p := new(CheckFlag) + *p = x + return p +} + +func (x CheckFlag) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (CheckFlag) Descriptor() protoreflect.EnumDescriptor { + return file_chk_chk_proto_enumTypes[2].Descriptor() +} + +func (CheckFlag) Type() protoreflect.EnumType { + return &file_chk_chk_proto_enumTypes[2] +} + +func (x CheckFlag) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Use CheckFlag.Descriptor instead. +func (CheckFlag) EnumDescriptor() ([]byte, []int) { + return file_chk_chk_proto_rawDescGZIP(), []int{2} +} + +// The status of DAOS check instance. +type CheckInstStatus int32 + +const ( + CheckInstStatus_CIS_INIT CheckInstStatus = 0 // DAOS check has never been run. + CheckInstStatus_CIS_RUNNING CheckInstStatus = 1 // DAOS check is still in process. + CheckInstStatus_CIS_COMPLETED CheckInstStatus = 2 // All passes have been done for all required pools. + CheckInstStatus_CIS_STOPPED CheckInstStatus = 3 // DAOS check has been explicitly stopped, do not allow to rejoin. + CheckInstStatus_CIS_FAILED CheckInstStatus = 4 // DAOS check auto stopped for some unrecoverable failure, do not rejoin. + CheckInstStatus_CIS_PAUSED CheckInstStatus = 5 // DAOS check has been paused because engine exit, allow to rejoin. + CheckInstStatus_CIS_IMPLICATED CheckInstStatus = 6 // Check on the engine exit for other engine failure, do not rejoin. +) + +// Enum value maps for CheckInstStatus. +var ( + CheckInstStatus_name = map[int32]string{ + 0: "CIS_INIT", + 1: "CIS_RUNNING", + 2: "CIS_COMPLETED", + 3: "CIS_STOPPED", + 4: "CIS_FAILED", + 5: "CIS_PAUSED", + 6: "CIS_IMPLICATED", + } + CheckInstStatus_value = map[string]int32{ + "CIS_INIT": 0, + "CIS_RUNNING": 1, + "CIS_COMPLETED": 2, + "CIS_STOPPED": 3, + "CIS_FAILED": 4, + "CIS_PAUSED": 5, + "CIS_IMPLICATED": 6, + } +) + +func (x CheckInstStatus) Enum() *CheckInstStatus { + p := new(CheckInstStatus) + *p = x + return p +} + +func (x CheckInstStatus) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (CheckInstStatus) Descriptor() protoreflect.EnumDescriptor { + return file_chk_chk_proto_enumTypes[3].Descriptor() +} + +func (CheckInstStatus) Type() protoreflect.EnumType { + return &file_chk_chk_proto_enumTypes[3] +} + +func (x CheckInstStatus) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Use CheckInstStatus.Descriptor instead. +func (CheckInstStatus) EnumDescriptor() ([]byte, []int) { + return file_chk_chk_proto_rawDescGZIP(), []int{3} +} + +// The pool status for DAOS check. +type CheckPoolStatus int32 + +const ( + CheckPoolStatus_CPS_UNCHECKED CheckPoolStatus = 0 // DAOS check has not started against this pool. + CheckPoolStatus_CPS_CHECKING CheckPoolStatus = 1 // The pool is being checked. + CheckPoolStatus_CPS_CHECKED CheckPoolStatus = 2 // DAOS check has successfully completed all the passes on this pool. + CheckPoolStatus_CPS_FAILED CheckPoolStatus = 3 // DAOS check could not be completed due to some unrecoverable failure. + CheckPoolStatus_CPS_PAUSED CheckPoolStatus = 4 // Checking the pool has been paused because engine exit. + CheckPoolStatus_CPS_PENDING CheckPoolStatus = 5 // Waiting for the decision from the admin. + CheckPoolStatus_CPS_STOPPED CheckPoolStatus = 6 // DAOS check on the pool has been stopped explicitly. + CheckPoolStatus_CPS_IMPLICATED CheckPoolStatus = 7 // Check on the pool is stopped because of other pool or engine failure. +) + +// Enum value maps for CheckPoolStatus. +var ( + CheckPoolStatus_name = map[int32]string{ + 0: "CPS_UNCHECKED", + 1: "CPS_CHECKING", + 2: "CPS_CHECKED", + 3: "CPS_FAILED", + 4: "CPS_PAUSED", + 5: "CPS_PENDING", + 6: "CPS_STOPPED", + 7: "CPS_IMPLICATED", + } + CheckPoolStatus_value = map[string]int32{ + "CPS_UNCHECKED": 0, + "CPS_CHECKING": 1, + "CPS_CHECKED": 2, + "CPS_FAILED": 3, + "CPS_PAUSED": 4, + "CPS_PENDING": 5, + "CPS_STOPPED": 6, + "CPS_IMPLICATED": 7, + } +) + +func (x CheckPoolStatus) Enum() *CheckPoolStatus { + p := new(CheckPoolStatus) + *p = x + return p +} + +func (x CheckPoolStatus) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (CheckPoolStatus) Descriptor() protoreflect.EnumDescriptor { + return file_chk_chk_proto_enumTypes[4].Descriptor() +} + +func (CheckPoolStatus) Type() protoreflect.EnumType { + return &file_chk_chk_proto_enumTypes[4] +} + +func (x CheckPoolStatus) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Use CheckPoolStatus.Descriptor instead. +func (CheckPoolStatus) EnumDescriptor() ([]byte, []int) { + return file_chk_chk_proto_rawDescGZIP(), []int{4} +} + +// DAOS check engine scan phases. +type CheckScanPhase int32 + +const ( + CheckScanPhase_CSP_PREPARE CheckScanPhase = 0 // Initial phase, prepare to start check on related engines. + CheckScanPhase_CSP_POOL_LIST CheckScanPhase = 1 // Pool list consolidation. + CheckScanPhase_CSP_POOL_MBS CheckScanPhase = 2 // Pool membership. + CheckScanPhase_CSP_POOL_CLEANUP CheckScanPhase = 3 // Pool cleanup. + CheckScanPhase_CSP_CONT_LIST CheckScanPhase = 4 // Container list consolidation. + CheckScanPhase_CSP_CONT_CLEANUP CheckScanPhase = 5 // Container cleanup. + CheckScanPhase_CSP_DTX_RESYNC CheckScanPhase = 6 // DTX resync and cleanup. + CheckScanPhase_CSP_OBJ_SCRUB CheckScanPhase = 7 // RP/EC shards consistency verification with checksum scrub if have. + CheckScanPhase_CSP_REBUILD CheckScanPhase = 8 // Object rebuild. + CheckScanPhase_CSP_AGGREGATION CheckScanPhase = 9 // EC aggregation & VOS aggregation. + CheckScanPhase_CSP_DONE CheckScanPhase = 10 // All done. +) + +// Enum value maps for CheckScanPhase. +var ( + CheckScanPhase_name = map[int32]string{ + 0: "CSP_PREPARE", + 1: "CSP_POOL_LIST", + 2: "CSP_POOL_MBS", + 3: "CSP_POOL_CLEANUP", + 4: "CSP_CONT_LIST", + 5: "CSP_CONT_CLEANUP", + 6: "CSP_DTX_RESYNC", + 7: "CSP_OBJ_SCRUB", + 8: "CSP_REBUILD", + 9: "CSP_AGGREGATION", + 10: "CSP_DONE", + } + CheckScanPhase_value = map[string]int32{ + "CSP_PREPARE": 0, + "CSP_POOL_LIST": 1, + "CSP_POOL_MBS": 2, + "CSP_POOL_CLEANUP": 3, + "CSP_CONT_LIST": 4, + "CSP_CONT_CLEANUP": 5, + "CSP_DTX_RESYNC": 6, + "CSP_OBJ_SCRUB": 7, + "CSP_REBUILD": 8, + "CSP_AGGREGATION": 9, + "CSP_DONE": 10, + } +) + +func (x CheckScanPhase) Enum() *CheckScanPhase { + p := new(CheckScanPhase) + *p = x + return p +} + +func (x CheckScanPhase) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (CheckScanPhase) Descriptor() protoreflect.EnumDescriptor { + return file_chk_chk_proto_enumTypes[5].Descriptor() +} + +func (CheckScanPhase) Type() protoreflect.EnumType { + return &file_chk_chk_proto_enumTypes[5] +} + +func (x CheckScanPhase) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Use CheckScanPhase.Descriptor instead. +func (CheckScanPhase) EnumDescriptor() ([]byte, []int) { + return file_chk_chk_proto_rawDescGZIP(), []int{5} +} + +// DAOS check engine reports the found inconsistency and repair result to control plane. +// If the repair action is CIA_INTERACT, then the control plane will reply current dRPC +// firstly, and then interact with the admin for the repair decision in another section +// and tell DAOS check engine via another DRPC_METHOD_MGMT_CHK_ACT dRPC call. +// +// If the CheckReport::msg is not enough to help admin to make the decision, then we +// may have to leverage DAOS debug tools to dump more information from related target. +type CheckReport struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Seq uint64 `protobuf:"varint,1,opt,name=seq,proto3" json:"seq,omitempty"` // DAOS Check event sequence, unique for the instance. + Class CheckInconsistClass `protobuf:"varint,2,opt,name=class,proto3,enum=chk.CheckInconsistClass" json:"class,omitempty"` // Inconsistency class + Action CheckInconsistAction `protobuf:"varint,3,opt,name=action,proto3,enum=chk.CheckInconsistAction" json:"action,omitempty"` // The action taken to repair the inconsistency + // Repair result: zero is for repaired successfully. + // + // negative value if failed to repair. + // positive value is for CIA_IGNORE or dryrun mode. + // + // It is meaningless if the action is CIA_INTERACT. + Result int32 `protobuf:"varint,4,opt,name=result,proto3" json:"result,omitempty"` + Rank uint32 `protobuf:"varint,5,opt,name=rank,proto3" json:"rank,omitempty"` // Inconsistency happened on which rank if applicable. + Target uint32 `protobuf:"varint,6,opt,name=target,proto3" json:"target,omitempty"` // Inconsistency happened on which target in the rank if applicable. + PoolUuid string `protobuf:"bytes,7,opt,name=pool_uuid,json=poolUuid,proto3" json:"pool_uuid,omitempty"` // The consistency is in which pool if applicable. + PoolLabel string `protobuf:"bytes,8,opt,name=pool_label,json=poolLabel,proto3" json:"pool_label,omitempty"` // The pool label, if available. + ContUuid string `protobuf:"bytes,9,opt,name=cont_uuid,json=contUuid,proto3" json:"cont_uuid,omitempty"` // The consistency is in which container if applicable. + ContLabel string `protobuf:"bytes,10,opt,name=cont_label,json=contLabel,proto3" json:"cont_label,omitempty"` // The container label, if available. + Objid string `protobuf:"bytes,11,opt,name=objid,proto3" json:"objid,omitempty"` // The consistency is in which object if applicable. + Dkey string `protobuf:"bytes,12,opt,name=dkey,proto3" json:"dkey,omitempty"` // The consistency is in which dkey if applicable. + Akey string `protobuf:"bytes,13,opt,name=akey,proto3" json:"akey,omitempty"` // The consistency is in which akey if applicable. + Timestamp string `protobuf:"bytes,14,opt,name=timestamp,proto3" json:"timestamp,omitempty"` // The time of report (and repair) the inconsistency. + Msg string `protobuf:"bytes,15,opt,name=msg,proto3" json:"msg,omitempty"` // Information to describe the inconsistency in detail. + ActChoices []CheckInconsistAction `protobuf:"varint,16,rep,packed,name=act_choices,json=actChoices,proto3,enum=chk.CheckInconsistAction" json:"act_choices,omitempty"` // Interactive mode options (first is suggested). + ActDetails []string `protobuf:"bytes,17,rep,name=act_details,json=actDetails,proto3" json:"act_details,omitempty"` // Details for each potential action (length should match actions). + ActMsgs []string `protobuf:"bytes,18,rep,name=act_msgs,json=actMsgs,proto3" json:"act_msgs,omitempty"` // Formatted messages containing details for each action choice. +} + +func (x *CheckReport) Reset() { + *x = CheckReport{} + if protoimpl.UnsafeEnabled { + mi := &file_chk_chk_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CheckReport) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CheckReport) ProtoMessage() {} + +func (x *CheckReport) ProtoReflect() protoreflect.Message { + mi := &file_chk_chk_proto_msgTypes[0] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CheckReport.ProtoReflect.Descriptor instead. +func (*CheckReport) Descriptor() ([]byte, []int) { + return file_chk_chk_proto_rawDescGZIP(), []int{0} +} + +func (x *CheckReport) GetSeq() uint64 { + if x != nil { + return x.Seq + } + return 0 +} + +func (x *CheckReport) GetClass() CheckInconsistClass { + if x != nil { + return x.Class + } + return CheckInconsistClass_CIC_NONE +} + +func (x *CheckReport) GetAction() CheckInconsistAction { + if x != nil { + return x.Action + } + return CheckInconsistAction_CIA_DEFAULT +} + +func (x *CheckReport) GetResult() int32 { + if x != nil { + return x.Result + } + return 0 +} + +func (x *CheckReport) GetRank() uint32 { + if x != nil { + return x.Rank + } + return 0 +} + +func (x *CheckReport) GetTarget() uint32 { + if x != nil { + return x.Target + } + return 0 +} + +func (x *CheckReport) GetPoolUuid() string { + if x != nil { + return x.PoolUuid + } + return "" +} + +func (x *CheckReport) GetPoolLabel() string { + if x != nil { + return x.PoolLabel + } + return "" +} + +func (x *CheckReport) GetContUuid() string { + if x != nil { + return x.ContUuid + } + return "" +} + +func (x *CheckReport) GetContLabel() string { + if x != nil { + return x.ContLabel + } + return "" +} + +func (x *CheckReport) GetObjid() string { + if x != nil { + return x.Objid + } + return "" +} + +func (x *CheckReport) GetDkey() string { + if x != nil { + return x.Dkey + } + return "" +} + +func (x *CheckReport) GetAkey() string { + if x != nil { + return x.Akey + } + return "" +} + +func (x *CheckReport) GetTimestamp() string { + if x != nil { + return x.Timestamp + } + return "" +} + +func (x *CheckReport) GetMsg() string { + if x != nil { + return x.Msg + } + return "" +} + +func (x *CheckReport) GetActChoices() []CheckInconsistAction { + if x != nil { + return x.ActChoices + } + return nil +} + +func (x *CheckReport) GetActDetails() []string { + if x != nil { + return x.ActDetails + } + return nil +} + +func (x *CheckReport) GetActMsgs() []string { + if x != nil { + return x.ActMsgs + } + return nil +} + +var File_chk_chk_proto protoreflect.FileDescriptor + +var file_chk_chk_proto_rawDesc = []byte{ + 0x0a, 0x0d, 0x63, 0x68, 0x6b, 0x2f, 0x63, 0x68, 0x6b, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, + 0x03, 0x63, 0x68, 0x6b, 0x22, 0xa4, 0x04, 0x0a, 0x0b, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x52, 0x65, + 0x70, 0x6f, 0x72, 0x74, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x65, 0x71, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x04, 0x52, 0x03, 0x73, 0x65, 0x71, 0x12, 0x2e, 0x0a, 0x05, 0x63, 0x6c, 0x61, 0x73, 0x73, 0x18, + 0x02, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x18, 0x2e, 0x63, 0x68, 0x6b, 0x2e, 0x43, 0x68, 0x65, 0x63, + 0x6b, 0x49, 0x6e, 0x63, 0x6f, 0x6e, 0x73, 0x69, 0x73, 0x74, 0x43, 0x6c, 0x61, 0x73, 0x73, 0x52, + 0x05, 0x63, 0x6c, 0x61, 0x73, 0x73, 0x12, 0x31, 0x0a, 0x06, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, + 0x18, 0x03, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x19, 0x2e, 0x63, 0x68, 0x6b, 0x2e, 0x43, 0x68, 0x65, + 0x63, 0x6b, 0x49, 0x6e, 0x63, 0x6f, 0x6e, 0x73, 0x69, 0x73, 0x74, 0x41, 0x63, 0x74, 0x69, 0x6f, + 0x6e, 0x52, 0x06, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x16, 0x0a, 0x06, 0x72, 0x65, 0x73, + 0x75, 0x6c, 0x74, 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x72, 0x65, 0x73, 0x75, 0x6c, + 0x74, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0d, 0x52, + 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x12, 0x16, 0x0a, 0x06, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x18, + 0x06, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x06, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x12, 0x1b, 0x0a, + 0x09, 0x70, 0x6f, 0x6f, 0x6c, 0x5f, 0x75, 0x75, 0x69, 0x64, 0x18, 0x07, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x08, 0x70, 0x6f, 0x6f, 0x6c, 0x55, 0x75, 0x69, 0x64, 0x12, 0x1d, 0x0a, 0x0a, 0x70, 0x6f, + 0x6f, 0x6c, 0x5f, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x18, 0x08, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, + 0x70, 0x6f, 0x6f, 0x6c, 0x4c, 0x61, 0x62, 0x65, 0x6c, 0x12, 0x1b, 0x0a, 0x09, 0x63, 0x6f, 0x6e, + 0x74, 0x5f, 0x75, 0x75, 0x69, 0x64, 0x18, 0x09, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x63, 0x6f, + 0x6e, 0x74, 0x55, 0x75, 0x69, 0x64, 0x12, 0x1d, 0x0a, 0x0a, 0x63, 0x6f, 0x6e, 0x74, 0x5f, 0x6c, + 0x61, 0x62, 0x65, 0x6c, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x63, 0x6f, 0x6e, 0x74, + 0x4c, 0x61, 0x62, 0x65, 0x6c, 0x12, 0x14, 0x0a, 0x05, 0x6f, 0x62, 0x6a, 0x69, 0x64, 0x18, 0x0b, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6f, 0x62, 0x6a, 0x69, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x64, + 0x6b, 0x65, 0x79, 0x18, 0x0c, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x64, 0x6b, 0x65, 0x79, 0x12, + 0x12, 0x0a, 0x04, 0x61, 0x6b, 0x65, 0x79, 0x18, 0x0d, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x61, + 0x6b, 0x65, 0x79, 0x12, 0x1c, 0x0a, 0x09, 0x74, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, + 0x18, 0x0e, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x74, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, + 0x70, 0x12, 0x10, 0x0a, 0x03, 0x6d, 0x73, 0x67, 0x18, 0x0f, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, + 0x6d, 0x73, 0x67, 0x12, 0x3a, 0x0a, 0x0b, 0x61, 0x63, 0x74, 0x5f, 0x63, 0x68, 0x6f, 0x69, 0x63, + 0x65, 0x73, 0x18, 0x10, 0x20, 0x03, 0x28, 0x0e, 0x32, 0x19, 0x2e, 0x63, 0x68, 0x6b, 0x2e, 0x43, + 0x68, 0x65, 0x63, 0x6b, 0x49, 0x6e, 0x63, 0x6f, 0x6e, 0x73, 0x69, 0x73, 0x74, 0x41, 0x63, 0x74, + 0x69, 0x6f, 0x6e, 0x52, 0x0a, 0x61, 0x63, 0x74, 0x43, 0x68, 0x6f, 0x69, 0x63, 0x65, 0x73, 0x12, + 0x1f, 0x0a, 0x0b, 0x61, 0x63, 0x74, 0x5f, 0x64, 0x65, 0x74, 0x61, 0x69, 0x6c, 0x73, 0x18, 0x11, + 0x20, 0x03, 0x28, 0x09, 0x52, 0x0a, 0x61, 0x63, 0x74, 0x44, 0x65, 0x74, 0x61, 0x69, 0x6c, 0x73, + 0x12, 0x19, 0x0a, 0x08, 0x61, 0x63, 0x74, 0x5f, 0x6d, 0x73, 0x67, 0x73, 0x18, 0x12, 0x20, 0x03, + 0x28, 0x09, 0x52, 0x07, 0x61, 0x63, 0x74, 0x4d, 0x73, 0x67, 0x73, 0x2a, 0xcc, 0x04, 0x0a, 0x13, + 0x43, 0x68, 0x65, 0x63, 0x6b, 0x49, 0x6e, 0x63, 0x6f, 0x6e, 0x73, 0x69, 0x73, 0x74, 0x43, 0x6c, + 0x61, 0x73, 0x73, 0x12, 0x0c, 0x0a, 0x08, 0x43, 0x49, 0x43, 0x5f, 0x4e, 0x4f, 0x4e, 0x45, 0x10, + 0x00, 0x12, 0x21, 0x0a, 0x1d, 0x43, 0x49, 0x43, 0x5f, 0x50, 0x4f, 0x4f, 0x4c, 0x5f, 0x4c, 0x45, + 0x53, 0x53, 0x5f, 0x53, 0x56, 0x43, 0x5f, 0x57, 0x49, 0x54, 0x48, 0x5f, 0x51, 0x55, 0x4f, 0x52, + 0x55, 0x4d, 0x10, 0x01, 0x12, 0x24, 0x0a, 0x20, 0x43, 0x49, 0x43, 0x5f, 0x50, 0x4f, 0x4f, 0x4c, + 0x5f, 0x4c, 0x45, 0x53, 0x53, 0x5f, 0x53, 0x56, 0x43, 0x5f, 0x57, 0x49, 0x54, 0x48, 0x4f, 0x55, + 0x54, 0x5f, 0x51, 0x55, 0x4f, 0x52, 0x55, 0x4d, 0x10, 0x02, 0x12, 0x15, 0x0a, 0x11, 0x43, 0x49, + 0x43, 0x5f, 0x50, 0x4f, 0x4f, 0x4c, 0x5f, 0x4d, 0x4f, 0x52, 0x45, 0x5f, 0x53, 0x56, 0x43, 0x10, + 0x03, 0x12, 0x1b, 0x0a, 0x17, 0x43, 0x49, 0x43, 0x5f, 0x50, 0x4f, 0x4f, 0x4c, 0x5f, 0x4e, 0x4f, + 0x4e, 0x45, 0x58, 0x49, 0x53, 0x54, 0x5f, 0x4f, 0x4e, 0x5f, 0x4d, 0x53, 0x10, 0x04, 0x12, 0x1f, + 0x0a, 0x1b, 0x43, 0x49, 0x43, 0x5f, 0x50, 0x4f, 0x4f, 0x4c, 0x5f, 0x4e, 0x4f, 0x4e, 0x45, 0x58, + 0x49, 0x53, 0x54, 0x5f, 0x4f, 0x4e, 0x5f, 0x45, 0x4e, 0x47, 0x49, 0x4e, 0x45, 0x10, 0x05, 0x12, + 0x15, 0x0a, 0x11, 0x43, 0x49, 0x43, 0x5f, 0x50, 0x4f, 0x4f, 0x4c, 0x5f, 0x42, 0x41, 0x44, 0x5f, + 0x53, 0x56, 0x43, 0x4c, 0x10, 0x06, 0x12, 0x16, 0x0a, 0x12, 0x43, 0x49, 0x43, 0x5f, 0x50, 0x4f, + 0x4f, 0x4c, 0x5f, 0x42, 0x41, 0x44, 0x5f, 0x4c, 0x41, 0x42, 0x45, 0x4c, 0x10, 0x07, 0x12, 0x1e, + 0x0a, 0x1a, 0x43, 0x49, 0x43, 0x5f, 0x45, 0x4e, 0x47, 0x49, 0x4e, 0x45, 0x5f, 0x4e, 0x4f, 0x4e, + 0x45, 0x58, 0x49, 0x53, 0x54, 0x5f, 0x49, 0x4e, 0x5f, 0x4d, 0x41, 0x50, 0x10, 0x08, 0x12, 0x1a, + 0x0a, 0x16, 0x43, 0x49, 0x43, 0x5f, 0x45, 0x4e, 0x47, 0x49, 0x4e, 0x45, 0x5f, 0x44, 0x4f, 0x57, + 0x4e, 0x5f, 0x49, 0x4e, 0x5f, 0x4d, 0x41, 0x50, 0x10, 0x09, 0x12, 0x1d, 0x0a, 0x19, 0x43, 0x49, + 0x43, 0x5f, 0x45, 0x4e, 0x47, 0x49, 0x4e, 0x45, 0x5f, 0x48, 0x41, 0x53, 0x5f, 0x4e, 0x4f, 0x5f, + 0x53, 0x54, 0x4f, 0x52, 0x41, 0x47, 0x45, 0x10, 0x0a, 0x12, 0x1b, 0x0a, 0x17, 0x43, 0x49, 0x43, + 0x5f, 0x43, 0x4f, 0x4e, 0x54, 0x5f, 0x4e, 0x4f, 0x4e, 0x45, 0x58, 0x49, 0x53, 0x54, 0x5f, 0x4f, + 0x4e, 0x5f, 0x50, 0x53, 0x10, 0x0b, 0x12, 0x16, 0x0a, 0x12, 0x43, 0x49, 0x43, 0x5f, 0x43, 0x4f, + 0x4e, 0x54, 0x5f, 0x42, 0x41, 0x44, 0x5f, 0x4c, 0x41, 0x42, 0x45, 0x4c, 0x10, 0x0c, 0x12, 0x15, + 0x0a, 0x11, 0x43, 0x49, 0x43, 0x5f, 0x44, 0x54, 0x58, 0x5f, 0x43, 0x4f, 0x52, 0x52, 0x55, 0x50, + 0x54, 0x45, 0x44, 0x10, 0x0d, 0x12, 0x12, 0x0a, 0x0e, 0x43, 0x49, 0x43, 0x5f, 0x44, 0x54, 0x58, + 0x5f, 0x4f, 0x52, 0x50, 0x48, 0x41, 0x4e, 0x10, 0x0e, 0x12, 0x11, 0x0a, 0x0d, 0x43, 0x49, 0x43, + 0x5f, 0x43, 0x53, 0x55, 0x4d, 0x5f, 0x4c, 0x4f, 0x53, 0x54, 0x10, 0x0f, 0x12, 0x14, 0x0a, 0x10, + 0x43, 0x49, 0x43, 0x5f, 0x43, 0x53, 0x55, 0x4d, 0x5f, 0x46, 0x41, 0x49, 0x4c, 0x55, 0x52, 0x45, + 0x10, 0x10, 0x12, 0x14, 0x0a, 0x10, 0x43, 0x49, 0x43, 0x5f, 0x4f, 0x42, 0x4a, 0x5f, 0x4c, 0x4f, + 0x53, 0x54, 0x5f, 0x52, 0x45, 0x50, 0x10, 0x11, 0x12, 0x19, 0x0a, 0x15, 0x43, 0x49, 0x43, 0x5f, + 0x4f, 0x42, 0x4a, 0x5f, 0x4c, 0x4f, 0x53, 0x54, 0x5f, 0x45, 0x43, 0x5f, 0x53, 0x48, 0x41, 0x52, + 0x44, 0x10, 0x12, 0x12, 0x18, 0x0a, 0x14, 0x43, 0x49, 0x43, 0x5f, 0x4f, 0x42, 0x4a, 0x5f, 0x4c, + 0x4f, 0x53, 0x54, 0x5f, 0x45, 0x43, 0x5f, 0x44, 0x41, 0x54, 0x41, 0x10, 0x13, 0x12, 0x1a, 0x0a, + 0x16, 0x43, 0x49, 0x43, 0x5f, 0x4f, 0x42, 0x4a, 0x5f, 0x44, 0x41, 0x54, 0x41, 0x5f, 0x49, 0x4e, + 0x43, 0x4f, 0x4e, 0x53, 0x49, 0x53, 0x54, 0x10, 0x14, 0x12, 0x0f, 0x0a, 0x0b, 0x43, 0x49, 0x43, + 0x5f, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x64, 0x2a, 0x97, 0x02, 0x0a, 0x14, 0x43, + 0x68, 0x65, 0x63, 0x6b, 0x49, 0x6e, 0x63, 0x6f, 0x6e, 0x73, 0x69, 0x73, 0x74, 0x41, 0x63, 0x74, + 0x69, 0x6f, 0x6e, 0x12, 0x0f, 0x0a, 0x0b, 0x43, 0x49, 0x41, 0x5f, 0x44, 0x45, 0x46, 0x41, 0x55, + 0x4c, 0x54, 0x10, 0x00, 0x12, 0x10, 0x0a, 0x0c, 0x43, 0x49, 0x41, 0x5f, 0x49, 0x4e, 0x54, 0x45, + 0x52, 0x41, 0x43, 0x54, 0x10, 0x01, 0x12, 0x0e, 0x0a, 0x0a, 0x43, 0x49, 0x41, 0x5f, 0x49, 0x47, + 0x4e, 0x4f, 0x52, 0x45, 0x10, 0x02, 0x12, 0x0f, 0x0a, 0x0b, 0x43, 0x49, 0x41, 0x5f, 0x44, 0x49, + 0x53, 0x43, 0x41, 0x52, 0x44, 0x10, 0x03, 0x12, 0x0d, 0x0a, 0x09, 0x43, 0x49, 0x41, 0x5f, 0x52, + 0x45, 0x41, 0x44, 0x44, 0x10, 0x04, 0x12, 0x10, 0x0a, 0x0c, 0x43, 0x49, 0x41, 0x5f, 0x54, 0x52, + 0x55, 0x53, 0x54, 0x5f, 0x4d, 0x53, 0x10, 0x05, 0x12, 0x10, 0x0a, 0x0c, 0x43, 0x49, 0x41, 0x5f, + 0x54, 0x52, 0x55, 0x53, 0x54, 0x5f, 0x50, 0x53, 0x10, 0x06, 0x12, 0x14, 0x0a, 0x10, 0x43, 0x49, + 0x41, 0x5f, 0x54, 0x52, 0x55, 0x53, 0x54, 0x5f, 0x54, 0x41, 0x52, 0x47, 0x45, 0x54, 0x10, 0x07, + 0x12, 0x16, 0x0a, 0x12, 0x43, 0x49, 0x41, 0x5f, 0x54, 0x52, 0x55, 0x53, 0x54, 0x5f, 0x4d, 0x41, + 0x4a, 0x4f, 0x52, 0x49, 0x54, 0x59, 0x10, 0x08, 0x12, 0x14, 0x0a, 0x10, 0x43, 0x49, 0x41, 0x5f, + 0x54, 0x52, 0x55, 0x53, 0x54, 0x5f, 0x4c, 0x41, 0x54, 0x45, 0x53, 0x54, 0x10, 0x09, 0x12, 0x14, + 0x0a, 0x10, 0x43, 0x49, 0x41, 0x5f, 0x54, 0x52, 0x55, 0x53, 0x54, 0x5f, 0x4f, 0x4c, 0x44, 0x45, + 0x53, 0x54, 0x10, 0x0a, 0x12, 0x17, 0x0a, 0x13, 0x43, 0x49, 0x41, 0x5f, 0x54, 0x52, 0x55, 0x53, + 0x54, 0x5f, 0x45, 0x43, 0x5f, 0x50, 0x41, 0x52, 0x49, 0x54, 0x59, 0x10, 0x0b, 0x12, 0x15, 0x0a, + 0x11, 0x43, 0x49, 0x41, 0x5f, 0x54, 0x52, 0x55, 0x53, 0x54, 0x5f, 0x45, 0x43, 0x5f, 0x44, 0x41, + 0x54, 0x41, 0x10, 0x0c, 0x2a, 0x89, 0x01, 0x0a, 0x09, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x46, 0x6c, + 0x61, 0x67, 0x12, 0x0b, 0x0a, 0x07, 0x43, 0x46, 0x5f, 0x4e, 0x4f, 0x4e, 0x45, 0x10, 0x00, 0x12, + 0x0d, 0x0a, 0x09, 0x43, 0x46, 0x5f, 0x44, 0x52, 0x59, 0x52, 0x55, 0x4e, 0x10, 0x01, 0x12, 0x0c, + 0x0a, 0x08, 0x43, 0x46, 0x5f, 0x52, 0x45, 0x53, 0x45, 0x54, 0x10, 0x02, 0x12, 0x0e, 0x0a, 0x0a, + 0x43, 0x46, 0x5f, 0x46, 0x41, 0x49, 0x4c, 0x4f, 0x55, 0x54, 0x10, 0x04, 0x12, 0x0b, 0x0a, 0x07, + 0x43, 0x46, 0x5f, 0x41, 0x55, 0x54, 0x4f, 0x10, 0x08, 0x12, 0x12, 0x0a, 0x0e, 0x43, 0x46, 0x5f, + 0x4f, 0x52, 0x50, 0x48, 0x41, 0x4e, 0x5f, 0x50, 0x4f, 0x4f, 0x4c, 0x10, 0x10, 0x12, 0x11, 0x0a, + 0x0d, 0x43, 0x46, 0x5f, 0x4e, 0x4f, 0x5f, 0x46, 0x41, 0x49, 0x4c, 0x4f, 0x55, 0x54, 0x10, 0x20, + 0x12, 0x0e, 0x0a, 0x0a, 0x43, 0x46, 0x5f, 0x4e, 0x4f, 0x5f, 0x41, 0x55, 0x54, 0x4f, 0x10, 0x40, + 0x2a, 0x88, 0x01, 0x0a, 0x0f, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x49, 0x6e, 0x73, 0x74, 0x53, 0x74, + 0x61, 0x74, 0x75, 0x73, 0x12, 0x0c, 0x0a, 0x08, 0x43, 0x49, 0x53, 0x5f, 0x49, 0x4e, 0x49, 0x54, + 0x10, 0x00, 0x12, 0x0f, 0x0a, 0x0b, 0x43, 0x49, 0x53, 0x5f, 0x52, 0x55, 0x4e, 0x4e, 0x49, 0x4e, + 0x47, 0x10, 0x01, 0x12, 0x11, 0x0a, 0x0d, 0x43, 0x49, 0x53, 0x5f, 0x43, 0x4f, 0x4d, 0x50, 0x4c, + 0x45, 0x54, 0x45, 0x44, 0x10, 0x02, 0x12, 0x0f, 0x0a, 0x0b, 0x43, 0x49, 0x53, 0x5f, 0x53, 0x54, + 0x4f, 0x50, 0x50, 0x45, 0x44, 0x10, 0x03, 0x12, 0x0e, 0x0a, 0x0a, 0x43, 0x49, 0x53, 0x5f, 0x46, + 0x41, 0x49, 0x4c, 0x45, 0x44, 0x10, 0x04, 0x12, 0x0e, 0x0a, 0x0a, 0x43, 0x49, 0x53, 0x5f, 0x50, + 0x41, 0x55, 0x53, 0x45, 0x44, 0x10, 0x05, 0x12, 0x12, 0x0a, 0x0e, 0x43, 0x49, 0x53, 0x5f, 0x49, + 0x4d, 0x50, 0x4c, 0x49, 0x43, 0x41, 0x54, 0x45, 0x44, 0x10, 0x06, 0x2a, 0x9d, 0x01, 0x0a, 0x0f, + 0x43, 0x68, 0x65, 0x63, 0x6b, 0x50, 0x6f, 0x6f, 0x6c, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, + 0x11, 0x0a, 0x0d, 0x43, 0x50, 0x53, 0x5f, 0x55, 0x4e, 0x43, 0x48, 0x45, 0x43, 0x4b, 0x45, 0x44, + 0x10, 0x00, 0x12, 0x10, 0x0a, 0x0c, 0x43, 0x50, 0x53, 0x5f, 0x43, 0x48, 0x45, 0x43, 0x4b, 0x49, + 0x4e, 0x47, 0x10, 0x01, 0x12, 0x0f, 0x0a, 0x0b, 0x43, 0x50, 0x53, 0x5f, 0x43, 0x48, 0x45, 0x43, + 0x4b, 0x45, 0x44, 0x10, 0x02, 0x12, 0x0e, 0x0a, 0x0a, 0x43, 0x50, 0x53, 0x5f, 0x46, 0x41, 0x49, + 0x4c, 0x45, 0x44, 0x10, 0x03, 0x12, 0x0e, 0x0a, 0x0a, 0x43, 0x50, 0x53, 0x5f, 0x50, 0x41, 0x55, + 0x53, 0x45, 0x44, 0x10, 0x04, 0x12, 0x0f, 0x0a, 0x0b, 0x43, 0x50, 0x53, 0x5f, 0x50, 0x45, 0x4e, + 0x44, 0x49, 0x4e, 0x47, 0x10, 0x05, 0x12, 0x0f, 0x0a, 0x0b, 0x43, 0x50, 0x53, 0x5f, 0x53, 0x54, + 0x4f, 0x50, 0x50, 0x45, 0x44, 0x10, 0x06, 0x12, 0x12, 0x0a, 0x0e, 0x43, 0x50, 0x53, 0x5f, 0x49, + 0x4d, 0x50, 0x4c, 0x49, 0x43, 0x41, 0x54, 0x45, 0x44, 0x10, 0x07, 0x2a, 0xe0, 0x01, 0x0a, 0x0e, + 0x43, 0x68, 0x65, 0x63, 0x6b, 0x53, 0x63, 0x61, 0x6e, 0x50, 0x68, 0x61, 0x73, 0x65, 0x12, 0x0f, + 0x0a, 0x0b, 0x43, 0x53, 0x50, 0x5f, 0x50, 0x52, 0x45, 0x50, 0x41, 0x52, 0x45, 0x10, 0x00, 0x12, + 0x11, 0x0a, 0x0d, 0x43, 0x53, 0x50, 0x5f, 0x50, 0x4f, 0x4f, 0x4c, 0x5f, 0x4c, 0x49, 0x53, 0x54, + 0x10, 0x01, 0x12, 0x10, 0x0a, 0x0c, 0x43, 0x53, 0x50, 0x5f, 0x50, 0x4f, 0x4f, 0x4c, 0x5f, 0x4d, + 0x42, 0x53, 0x10, 0x02, 0x12, 0x14, 0x0a, 0x10, 0x43, 0x53, 0x50, 0x5f, 0x50, 0x4f, 0x4f, 0x4c, + 0x5f, 0x43, 0x4c, 0x45, 0x41, 0x4e, 0x55, 0x50, 0x10, 0x03, 0x12, 0x11, 0x0a, 0x0d, 0x43, 0x53, + 0x50, 0x5f, 0x43, 0x4f, 0x4e, 0x54, 0x5f, 0x4c, 0x49, 0x53, 0x54, 0x10, 0x04, 0x12, 0x14, 0x0a, + 0x10, 0x43, 0x53, 0x50, 0x5f, 0x43, 0x4f, 0x4e, 0x54, 0x5f, 0x43, 0x4c, 0x45, 0x41, 0x4e, 0x55, + 0x50, 0x10, 0x05, 0x12, 0x12, 0x0a, 0x0e, 0x43, 0x53, 0x50, 0x5f, 0x44, 0x54, 0x58, 0x5f, 0x52, + 0x45, 0x53, 0x59, 0x4e, 0x43, 0x10, 0x06, 0x12, 0x11, 0x0a, 0x0d, 0x43, 0x53, 0x50, 0x5f, 0x4f, + 0x42, 0x4a, 0x5f, 0x53, 0x43, 0x52, 0x55, 0x42, 0x10, 0x07, 0x12, 0x0f, 0x0a, 0x0b, 0x43, 0x53, + 0x50, 0x5f, 0x52, 0x45, 0x42, 0x55, 0x49, 0x4c, 0x44, 0x10, 0x08, 0x12, 0x13, 0x0a, 0x0f, 0x43, + 0x53, 0x50, 0x5f, 0x41, 0x47, 0x47, 0x52, 0x45, 0x47, 0x41, 0x54, 0x49, 0x4f, 0x4e, 0x10, 0x09, + 0x12, 0x0c, 0x0a, 0x08, 0x43, 0x53, 0x50, 0x5f, 0x44, 0x4f, 0x4e, 0x45, 0x10, 0x0a, 0x42, 0x39, + 0x5a, 0x37, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x64, 0x61, 0x6f, + 0x73, 0x2d, 0x73, 0x74, 0x61, 0x63, 0x6b, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2f, 0x73, 0x72, 0x63, + 0x2f, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x2f, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x2f, + 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x63, 0x68, 0x6b, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, + 0x33, +} + +var ( + file_chk_chk_proto_rawDescOnce sync.Once + file_chk_chk_proto_rawDescData = file_chk_chk_proto_rawDesc +) + +func file_chk_chk_proto_rawDescGZIP() []byte { + file_chk_chk_proto_rawDescOnce.Do(func() { + file_chk_chk_proto_rawDescData = protoimpl.X.CompressGZIP(file_chk_chk_proto_rawDescData) + }) + return file_chk_chk_proto_rawDescData +} + +var file_chk_chk_proto_enumTypes = make([]protoimpl.EnumInfo, 6) +var file_chk_chk_proto_msgTypes = make([]protoimpl.MessageInfo, 1) +var file_chk_chk_proto_goTypes = []interface{}{ + (CheckInconsistClass)(0), // 0: chk.CheckInconsistClass + (CheckInconsistAction)(0), // 1: chk.CheckInconsistAction + (CheckFlag)(0), // 2: chk.CheckFlag + (CheckInstStatus)(0), // 3: chk.CheckInstStatus + (CheckPoolStatus)(0), // 4: chk.CheckPoolStatus + (CheckScanPhase)(0), // 5: chk.CheckScanPhase + (*CheckReport)(nil), // 6: chk.CheckReport +} +var file_chk_chk_proto_depIdxs = []int32{ + 0, // 0: chk.CheckReport.class:type_name -> chk.CheckInconsistClass + 1, // 1: chk.CheckReport.action:type_name -> chk.CheckInconsistAction + 1, // 2: chk.CheckReport.act_choices:type_name -> chk.CheckInconsistAction + 3, // [3:3] is the sub-list for method output_type + 3, // [3:3] is the sub-list for method input_type + 3, // [3:3] is the sub-list for extension type_name + 3, // [3:3] is the sub-list for extension extendee + 0, // [0:3] is the sub-list for field type_name +} + +func init() { file_chk_chk_proto_init() } +func file_chk_chk_proto_init() { + if File_chk_chk_proto != nil { + return + } + if !protoimpl.UnsafeEnabled { + file_chk_chk_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CheckReport); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + } + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: file_chk_chk_proto_rawDesc, + NumEnums: 6, + NumMessages: 1, + NumExtensions: 0, + NumServices: 0, + }, + GoTypes: file_chk_chk_proto_goTypes, + DependencyIndexes: file_chk_chk_proto_depIdxs, + EnumInfos: file_chk_chk_proto_enumTypes, + MessageInfos: file_chk_chk_proto_msgTypes, + }.Build() + File_chk_chk_proto = out.File + file_chk_chk_proto_rawDesc = nil + file_chk_chk_proto_goTypes = nil + file_chk_chk_proto_depIdxs = nil +} diff --git a/src/control/common/proto/chk/faults.pb.go b/src/control/common/proto/chk/faults.pb.go new file mode 100644 index 00000000000..d18df89b7ab --- /dev/null +++ b/src/control/common/proto/chk/faults.pb.go @@ -0,0 +1,184 @@ +// +// (C) Copyright 2022 Intel Corporation. +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// + +// Code generated by protoc-gen-go. DO NOT EDIT. +// versions: +// protoc-gen-go v1.30.0 +// protoc v3.5.0 +// source: chk/faults.proto + +package chk + +import ( + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" + reflect "reflect" + sync "sync" +) + +const ( + // Verify that this generated code is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) + // Verify that runtime/protoimpl is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) +) + +type Fault struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Class CheckInconsistClass `protobuf:"varint,1,opt,name=class,proto3,enum=chk.CheckInconsistClass" json:"class,omitempty"` + Strings []string `protobuf:"bytes,2,rep,name=strings,proto3" json:"strings,omitempty"` + Uints []uint32 `protobuf:"varint,3,rep,packed,name=uints,proto3" json:"uints,omitempty"` + Ints []int32 `protobuf:"varint,4,rep,packed,name=ints,proto3" json:"ints,omitempty"` +} + +func (x *Fault) Reset() { + *x = Fault{} + if protoimpl.UnsafeEnabled { + mi := &file_chk_faults_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *Fault) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*Fault) ProtoMessage() {} + +func (x *Fault) ProtoReflect() protoreflect.Message { + mi := &file_chk_faults_proto_msgTypes[0] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use Fault.ProtoReflect.Descriptor instead. +func (*Fault) Descriptor() ([]byte, []int) { + return file_chk_faults_proto_rawDescGZIP(), []int{0} +} + +func (x *Fault) GetClass() CheckInconsistClass { + if x != nil { + return x.Class + } + return CheckInconsistClass_CIC_NONE +} + +func (x *Fault) GetStrings() []string { + if x != nil { + return x.Strings + } + return nil +} + +func (x *Fault) GetUints() []uint32 { + if x != nil { + return x.Uints + } + return nil +} + +func (x *Fault) GetInts() []int32 { + if x != nil { + return x.Ints + } + return nil +} + +var File_chk_faults_proto protoreflect.FileDescriptor + +var file_chk_faults_proto_rawDesc = []byte{ + 0x0a, 0x10, 0x63, 0x68, 0x6b, 0x2f, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x73, 0x2e, 0x70, 0x72, 0x6f, + 0x74, 0x6f, 0x12, 0x03, 0x63, 0x68, 0x6b, 0x1a, 0x0d, 0x63, 0x68, 0x6b, 0x2f, 0x63, 0x68, 0x6b, + 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x22, 0x7b, 0x0a, 0x05, 0x46, 0x61, 0x75, 0x6c, 0x74, 0x12, + 0x2e, 0x0a, 0x05, 0x63, 0x6c, 0x61, 0x73, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x18, + 0x2e, 0x63, 0x68, 0x6b, 0x2e, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x49, 0x6e, 0x63, 0x6f, 0x6e, 0x73, + 0x69, 0x73, 0x74, 0x43, 0x6c, 0x61, 0x73, 0x73, 0x52, 0x05, 0x63, 0x6c, 0x61, 0x73, 0x73, 0x12, + 0x18, 0x0a, 0x07, 0x73, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x09, + 0x52, 0x07, 0x73, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x75, 0x69, 0x6e, + 0x74, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x05, 0x75, 0x69, 0x6e, 0x74, 0x73, 0x12, + 0x12, 0x0a, 0x04, 0x69, 0x6e, 0x74, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x05, 0x52, 0x04, 0x69, + 0x6e, 0x74, 0x73, 0x42, 0x39, 0x5a, 0x37, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, + 0x6d, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2d, 0x73, 0x74, 0x61, 0x63, 0x6b, 0x2f, 0x64, 0x61, 0x6f, + 0x73, 0x2f, 0x73, 0x72, 0x63, 0x2f, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x2f, 0x63, 0x6f, + 0x6d, 0x6d, 0x6f, 0x6e, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x63, 0x68, 0x6b, 0x62, 0x06, + 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, +} + +var ( + file_chk_faults_proto_rawDescOnce sync.Once + file_chk_faults_proto_rawDescData = file_chk_faults_proto_rawDesc +) + +func file_chk_faults_proto_rawDescGZIP() []byte { + file_chk_faults_proto_rawDescOnce.Do(func() { + file_chk_faults_proto_rawDescData = protoimpl.X.CompressGZIP(file_chk_faults_proto_rawDescData) + }) + return file_chk_faults_proto_rawDescData +} + +var file_chk_faults_proto_msgTypes = make([]protoimpl.MessageInfo, 1) +var file_chk_faults_proto_goTypes = []interface{}{ + (*Fault)(nil), // 0: chk.Fault + (CheckInconsistClass)(0), // 1: chk.CheckInconsistClass +} +var file_chk_faults_proto_depIdxs = []int32{ + 1, // 0: chk.Fault.class:type_name -> chk.CheckInconsistClass + 1, // [1:1] is the sub-list for method output_type + 1, // [1:1] is the sub-list for method input_type + 1, // [1:1] is the sub-list for extension type_name + 1, // [1:1] is the sub-list for extension extendee + 0, // [0:1] is the sub-list for field type_name +} + +func init() { file_chk_faults_proto_init() } +func file_chk_faults_proto_init() { + if File_chk_faults_proto != nil { + return + } + file_chk_chk_proto_init() + if !protoimpl.UnsafeEnabled { + file_chk_faults_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*Fault); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + } + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: file_chk_faults_proto_rawDesc, + NumEnums: 0, + NumMessages: 1, + NumExtensions: 0, + NumServices: 0, + }, + GoTypes: file_chk_faults_proto_goTypes, + DependencyIndexes: file_chk_faults_proto_depIdxs, + MessageInfos: file_chk_faults_proto_msgTypes, + }.Build() + File_chk_faults_proto = out.File + file_chk_faults_proto_rawDesc = nil + file_chk_faults_proto_goTypes = nil + file_chk_faults_proto_depIdxs = nil +} diff --git a/src/control/common/proto/ctl/ranks.pb.go b/src/control/common/proto/ctl/ranks.pb.go index 905e24a5424..4e08382c944 100644 --- a/src/control/common/proto/ctl/ranks.pb.go +++ b/src/control/common/proto/ctl/ranks.pb.go @@ -1,12 +1,12 @@ // -// (C) Copyright 2020-2021 Intel Corporation. +// (C) Copyright 2020-2022 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // // Code generated by protoc-gen-go. DO NOT EDIT. // versions: -// protoc-gen-go v1.28.1 +// protoc-gen-go v1.30.0 // protoc v3.5.0 // source: ctl/ranks.proto @@ -34,8 +34,9 @@ type RanksReq struct { sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - Force bool `protobuf:"varint,3,opt,name=force,proto3" json:"force,omitempty"` // force operation - Ranks string `protobuf:"bytes,4,opt,name=ranks,proto3" json:"ranks,omitempty"` // rankset to operate over + Force bool `protobuf:"varint,3,opt,name=force,proto3" json:"force,omitempty"` // force operation + Ranks string `protobuf:"bytes,4,opt,name=ranks,proto3" json:"ranks,omitempty"` // rankset to operate over + CheckMode bool `protobuf:"varint,5,opt,name=check_mode,json=checkMode,proto3" json:"check_mode,omitempty"` // start in check mode } func (x *RanksReq) Reset() { @@ -84,6 +85,13 @@ func (x *RanksReq) GetRanks() string { return "" } +func (x *RanksReq) GetCheckMode() bool { + if x != nil { + return x.CheckMode + } + return false +} + // Generic response containing DER result from multiple ranks. // Used in gRPC fanout to operate on hosts with multiple ranks. type RanksResp struct { @@ -138,18 +146,20 @@ var File_ctl_ranks_proto protoreflect.FileDescriptor var file_ctl_ranks_proto_rawDesc = []byte{ 0x0a, 0x0f, 0x63, 0x74, 0x6c, 0x2f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x03, 0x63, 0x74, 0x6c, 0x1a, 0x12, 0x73, 0x68, 0x61, 0x72, 0x65, 0x64, 0x2f, 0x72, - 0x61, 0x6e, 0x6b, 0x73, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x22, 0x36, 0x0a, 0x08, 0x52, 0x61, + 0x61, 0x6e, 0x6b, 0x73, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x22, 0x55, 0x0a, 0x08, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x52, 0x65, 0x71, 0x12, 0x14, 0x0a, 0x05, 0x66, 0x6f, 0x72, 0x63, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x08, 0x52, 0x05, 0x66, 0x6f, 0x72, 0x63, 0x65, 0x12, 0x14, 0x0a, 0x05, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x72, 0x61, 0x6e, - 0x6b, 0x73, 0x22, 0x39, 0x0a, 0x09, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x52, 0x65, 0x73, 0x70, 0x12, - 0x2c, 0x0a, 0x07, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, - 0x32, 0x12, 0x2e, 0x73, 0x68, 0x61, 0x72, 0x65, 0x64, 0x2e, 0x52, 0x61, 0x6e, 0x6b, 0x52, 0x65, - 0x73, 0x75, 0x6c, 0x74, 0x52, 0x07, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x73, 0x42, 0x39, 0x5a, - 0x37, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x64, 0x61, 0x6f, 0x73, - 0x2d, 0x73, 0x74, 0x61, 0x63, 0x6b, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2f, 0x73, 0x72, 0x63, 0x2f, - 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x2f, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x2f, 0x70, - 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x63, 0x74, 0x6c, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x6b, 0x73, 0x12, 0x1d, 0x0a, 0x0a, 0x63, 0x68, 0x65, 0x63, 0x6b, 0x5f, 0x6d, 0x6f, 0x64, 0x65, + 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x63, 0x68, 0x65, 0x63, 0x6b, 0x4d, 0x6f, 0x64, + 0x65, 0x22, 0x39, 0x0a, 0x09, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x52, 0x65, 0x73, 0x70, 0x12, 0x2c, + 0x0a, 0x07, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, + 0x12, 0x2e, 0x73, 0x68, 0x61, 0x72, 0x65, 0x64, 0x2e, 0x52, 0x61, 0x6e, 0x6b, 0x52, 0x65, 0x73, + 0x75, 0x6c, 0x74, 0x52, 0x07, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x73, 0x42, 0x39, 0x5a, 0x37, + 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2d, + 0x73, 0x74, 0x61, 0x63, 0x6b, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2f, 0x73, 0x72, 0x63, 0x2f, 0x63, + 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x2f, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x2f, 0x70, 0x72, + 0x6f, 0x74, 0x6f, 0x2f, 0x63, 0x74, 0x6c, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( diff --git a/src/control/common/proto/mgmt/check.pb.go b/src/control/common/proto/mgmt/check.pb.go new file mode 100644 index 00000000000..b54335501d4 --- /dev/null +++ b/src/control/common/proto/mgmt/check.pb.go @@ -0,0 +1,1839 @@ +// +// (C) Copyright 2022 Intel Corporation. +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// + +// Code generated by protoc-gen-go. DO NOT EDIT. +// versions: +// protoc-gen-go v1.30.0 +// protoc v3.5.0 +// source: mgmt/check.proto + +package mgmt + +import ( + chk "github.com/daos-stack/daos/src/control/common/proto/chk" + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" + reflect "reflect" + sync "sync" +) + +const ( + // Verify that this generated code is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) + // Verify that runtime/protoimpl is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) +) + +// The pairs for kinds of inconsistency and related repair action. The control plane need to +// generate such policy array from some configuration file either via command line option or +// some default location, such as /etc/daos/daos_check.yml. Such policy arrge will be passed +// to DAOS engine when start check and cannot changed during check scanning, but can be list +// via 'dmg check prop' - see CheckPropResp. +type CheckInconsistPolicy struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + InconsistCas chk.CheckInconsistClass `protobuf:"varint,1,opt,name=inconsist_cas,json=inconsistCas,proto3,enum=chk.CheckInconsistClass" json:"inconsist_cas,omitempty"` // See CheckInconsistClass. + InconsistAct chk.CheckInconsistAction `protobuf:"varint,2,opt,name=inconsist_act,json=inconsistAct,proto3,enum=chk.CheckInconsistAction" json:"inconsist_act,omitempty"` // See CheckInconsistAction. +} + +func (x *CheckInconsistPolicy) Reset() { + *x = CheckInconsistPolicy{} + if protoimpl.UnsafeEnabled { + mi := &file_mgmt_check_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CheckInconsistPolicy) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CheckInconsistPolicy) ProtoMessage() {} + +func (x *CheckInconsistPolicy) ProtoReflect() protoreflect.Message { + mi := &file_mgmt_check_proto_msgTypes[0] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CheckInconsistPolicy.ProtoReflect.Descriptor instead. +func (*CheckInconsistPolicy) Descriptor() ([]byte, []int) { + return file_mgmt_check_proto_rawDescGZIP(), []int{0} +} + +func (x *CheckInconsistPolicy) GetInconsistCas() chk.CheckInconsistClass { + if x != nil { + return x.InconsistCas + } + return chk.CheckInconsistClass(0) +} + +func (x *CheckInconsistPolicy) GetInconsistAct() chk.CheckInconsistAction { + if x != nil { + return x.InconsistAct + } + return chk.CheckInconsistAction(0) +} + +type CheckEnableReq struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Sys string `protobuf:"bytes,1,opt,name=sys,proto3" json:"sys,omitempty"` +} + +func (x *CheckEnableReq) Reset() { + *x = CheckEnableReq{} + if protoimpl.UnsafeEnabled { + mi := &file_mgmt_check_proto_msgTypes[1] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CheckEnableReq) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CheckEnableReq) ProtoMessage() {} + +func (x *CheckEnableReq) ProtoReflect() protoreflect.Message { + mi := &file_mgmt_check_proto_msgTypes[1] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CheckEnableReq.ProtoReflect.Descriptor instead. +func (*CheckEnableReq) Descriptor() ([]byte, []int) { + return file_mgmt_check_proto_rawDescGZIP(), []int{1} +} + +func (x *CheckEnableReq) GetSys() string { + if x != nil { + return x.Sys + } + return "" +} + +type CheckDisableReq struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Sys string `protobuf:"bytes,1,opt,name=sys,proto3" json:"sys,omitempty"` +} + +func (x *CheckDisableReq) Reset() { + *x = CheckDisableReq{} + if protoimpl.UnsafeEnabled { + mi := &file_mgmt_check_proto_msgTypes[2] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CheckDisableReq) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CheckDisableReq) ProtoMessage() {} + +func (x *CheckDisableReq) ProtoReflect() protoreflect.Message { + mi := &file_mgmt_check_proto_msgTypes[2] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CheckDisableReq.ProtoReflect.Descriptor instead. +func (*CheckDisableReq) Descriptor() ([]byte, []int) { + return file_mgmt_check_proto_rawDescGZIP(), []int{2} +} + +func (x *CheckDisableReq) GetSys() string { + if x != nil { + return x.Sys + } + return "" +} + +// For 'dmg check start'. +type CheckStartReq struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Sys string `protobuf:"bytes,1,opt,name=sys,proto3" json:"sys,omitempty"` // DAOS system identifier. + Flags uint32 `protobuf:"varint,2,opt,name=flags,proto3" json:"flags,omitempty"` // See CheckFlag. + // The list of ranks to start DAOS check. Cannot be empty. + // + // The control plane will generate the ranks list and guarantee that any rank in the system + // is either will participate in check or has been excluded. Otherwise, partial ranks check + // may cause some unexpected and unrecoverable result unless the specified pool(s) does not + // exist on those missed rank(s). + Ranks []uint32 `protobuf:"varint,3,rep,packed,name=ranks,proto3" json:"ranks,omitempty"` + // UUID for the pools for which to start DAOS check. + // If empty, then start DAOS check for all pools in the system. + Uuids []string `protobuf:"bytes,4,rep,name=uuids,proto3" json:"uuids,omitempty"` + Policies []*CheckInconsistPolicy `protobuf:"bytes,5,rep,name=policies,proto3" json:"policies,omitempty"` // Policy array for handling inconsistency. +} + +func (x *CheckStartReq) Reset() { + *x = CheckStartReq{} + if protoimpl.UnsafeEnabled { + mi := &file_mgmt_check_proto_msgTypes[3] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CheckStartReq) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CheckStartReq) ProtoMessage() {} + +func (x *CheckStartReq) ProtoReflect() protoreflect.Message { + mi := &file_mgmt_check_proto_msgTypes[3] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CheckStartReq.ProtoReflect.Descriptor instead. +func (*CheckStartReq) Descriptor() ([]byte, []int) { + return file_mgmt_check_proto_rawDescGZIP(), []int{3} +} + +func (x *CheckStartReq) GetSys() string { + if x != nil { + return x.Sys + } + return "" +} + +func (x *CheckStartReq) GetFlags() uint32 { + if x != nil { + return x.Flags + } + return 0 +} + +func (x *CheckStartReq) GetRanks() []uint32 { + if x != nil { + return x.Ranks + } + return nil +} + +func (x *CheckStartReq) GetUuids() []string { + if x != nil { + return x.Uuids + } + return nil +} + +func (x *CheckStartReq) GetPolicies() []*CheckInconsistPolicy { + if x != nil { + return x.Policies + } + return nil +} + +// CheckStartResp returns the result of check start. +type CheckStartResp struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Status int32 `protobuf:"varint,1,opt,name=status,proto3" json:"status,omitempty"` // DAOS error code. +} + +func (x *CheckStartResp) Reset() { + *x = CheckStartResp{} + if protoimpl.UnsafeEnabled { + mi := &file_mgmt_check_proto_msgTypes[4] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CheckStartResp) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CheckStartResp) ProtoMessage() {} + +func (x *CheckStartResp) ProtoReflect() protoreflect.Message { + mi := &file_mgmt_check_proto_msgTypes[4] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CheckStartResp.ProtoReflect.Descriptor instead. +func (*CheckStartResp) Descriptor() ([]byte, []int) { + return file_mgmt_check_proto_rawDescGZIP(), []int{4} +} + +func (x *CheckStartResp) GetStatus() int32 { + if x != nil { + return x.Status + } + return 0 +} + +// For 'dmg check stop'. +type CheckStopReq struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Sys string `protobuf:"bytes,1,opt,name=sys,proto3" json:"sys,omitempty"` // DAOS system identifier. + // UUID for the pools for which to stop DAOS check. + // If empty, then stop check for all pools in the system. + Uuids []string `protobuf:"bytes,2,rep,name=uuids,proto3" json:"uuids,omitempty"` +} + +func (x *CheckStopReq) Reset() { + *x = CheckStopReq{} + if protoimpl.UnsafeEnabled { + mi := &file_mgmt_check_proto_msgTypes[5] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CheckStopReq) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CheckStopReq) ProtoMessage() {} + +func (x *CheckStopReq) ProtoReflect() protoreflect.Message { + mi := &file_mgmt_check_proto_msgTypes[5] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CheckStopReq.ProtoReflect.Descriptor instead. +func (*CheckStopReq) Descriptor() ([]byte, []int) { + return file_mgmt_check_proto_rawDescGZIP(), []int{5} +} + +func (x *CheckStopReq) GetSys() string { + if x != nil { + return x.Sys + } + return "" +} + +func (x *CheckStopReq) GetUuids() []string { + if x != nil { + return x.Uuids + } + return nil +} + +// CheckStopResp returns the result of check stop. +type CheckStopResp struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Status int32 `protobuf:"varint,1,opt,name=status,proto3" json:"status,omitempty"` // DAOS error code. +} + +func (x *CheckStopResp) Reset() { + *x = CheckStopResp{} + if protoimpl.UnsafeEnabled { + mi := &file_mgmt_check_proto_msgTypes[6] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CheckStopResp) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CheckStopResp) ProtoMessage() {} + +func (x *CheckStopResp) ProtoReflect() protoreflect.Message { + mi := &file_mgmt_check_proto_msgTypes[6] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CheckStopResp.ProtoReflect.Descriptor instead. +func (*CheckStopResp) Descriptor() ([]byte, []int) { + return file_mgmt_check_proto_rawDescGZIP(), []int{6} +} + +func (x *CheckStopResp) GetStatus() int32 { + if x != nil { + return x.Status + } + return 0 +} + +// For 'dmg check query'. +type CheckQueryReq struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Sys string `protobuf:"bytes,1,opt,name=sys,proto3" json:"sys,omitempty"` // DAOS system identifier. + // UUID for the pools for which to query DAOS check. + // If empty, then query DAOS check for all pools in the system. + Uuids []string `protobuf:"bytes,2,rep,name=uuids,proto3" json:"uuids,omitempty"` + Shallow bool `protobuf:"varint,3,opt,name=shallow,proto3" json:"shallow,omitempty"` // shallow query (findings only) + Seqs []uint64 `protobuf:"varint,4,rep,packed,name=seqs,proto3" json:"seqs,omitempty"` // return findings with these sequences (implies shallow) +} + +func (x *CheckQueryReq) Reset() { + *x = CheckQueryReq{} + if protoimpl.UnsafeEnabled { + mi := &file_mgmt_check_proto_msgTypes[7] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CheckQueryReq) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CheckQueryReq) ProtoMessage() {} + +func (x *CheckQueryReq) ProtoReflect() protoreflect.Message { + mi := &file_mgmt_check_proto_msgTypes[7] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CheckQueryReq.ProtoReflect.Descriptor instead. +func (*CheckQueryReq) Descriptor() ([]byte, []int) { + return file_mgmt_check_proto_rawDescGZIP(), []int{7} +} + +func (x *CheckQueryReq) GetSys() string { + if x != nil { + return x.Sys + } + return "" +} + +func (x *CheckQueryReq) GetUuids() []string { + if x != nil { + return x.Uuids + } + return nil +} + +func (x *CheckQueryReq) GetShallow() bool { + if x != nil { + return x.Shallow + } + return false +} + +func (x *CheckQueryReq) GetSeqs() []uint64 { + if x != nil { + return x.Seqs + } + return nil +} + +// Time information on related component: system, pool or target. +type CheckQueryTime struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + StartTime uint64 `protobuf:"varint,1,opt,name=start_time,json=startTime,proto3" json:"start_time,omitempty"` // The time of check instance being started on the component. + // If the check instance is still running on the component, then it is the estimated + // remaining time to complete the check on the component. Otherwise, it is the time + // of the check instance completed, failed or stopped on the component. + MiscTime uint64 `protobuf:"varint,2,opt,name=misc_time,json=miscTime,proto3" json:"misc_time,omitempty"` +} + +func (x *CheckQueryTime) Reset() { + *x = CheckQueryTime{} + if protoimpl.UnsafeEnabled { + mi := &file_mgmt_check_proto_msgTypes[8] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CheckQueryTime) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CheckQueryTime) ProtoMessage() {} + +func (x *CheckQueryTime) ProtoReflect() protoreflect.Message { + mi := &file_mgmt_check_proto_msgTypes[8] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CheckQueryTime.ProtoReflect.Descriptor instead. +func (*CheckQueryTime) Descriptor() ([]byte, []int) { + return file_mgmt_check_proto_rawDescGZIP(), []int{8} +} + +func (x *CheckQueryTime) GetStartTime() uint64 { + if x != nil { + return x.StartTime + } + return 0 +} + +func (x *CheckQueryTime) GetMiscTime() uint64 { + if x != nil { + return x.MiscTime + } + return 0 +} + +// Inconsistency statistics on related component: system, pool or target. +type CheckQueryInconsist struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Total uint32 `protobuf:"varint,1,opt,name=total,proto3" json:"total,omitempty"` // The count of total found inconsistency on the component. + Repaired uint32 `protobuf:"varint,2,opt,name=repaired,proto3" json:"repaired,omitempty"` // The count of repaired inconsistency on the component. + Ignored uint32 `protobuf:"varint,3,opt,name=ignored,proto3" json:"ignored,omitempty"` // The count of ignored inconsistency on the component. + Failed uint32 `protobuf:"varint,4,opt,name=failed,proto3" json:"failed,omitempty"` // The count of fail to repaired inconsistency on the component. +} + +func (x *CheckQueryInconsist) Reset() { + *x = CheckQueryInconsist{} + if protoimpl.UnsafeEnabled { + mi := &file_mgmt_check_proto_msgTypes[9] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CheckQueryInconsist) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CheckQueryInconsist) ProtoMessage() {} + +func (x *CheckQueryInconsist) ProtoReflect() protoreflect.Message { + mi := &file_mgmt_check_proto_msgTypes[9] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CheckQueryInconsist.ProtoReflect.Descriptor instead. +func (*CheckQueryInconsist) Descriptor() ([]byte, []int) { + return file_mgmt_check_proto_rawDescGZIP(), []int{9} +} + +func (x *CheckQueryInconsist) GetTotal() uint32 { + if x != nil { + return x.Total + } + return 0 +} + +func (x *CheckQueryInconsist) GetRepaired() uint32 { + if x != nil { + return x.Repaired + } + return 0 +} + +func (x *CheckQueryInconsist) GetIgnored() uint32 { + if x != nil { + return x.Ignored + } + return 0 +} + +func (x *CheckQueryInconsist) GetFailed() uint32 { + if x != nil { + return x.Failed + } + return 0 +} + +// Check query result for the pool shard on the target. +type CheckQueryTarget struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Rank uint32 `protobuf:"varint,1,opt,name=rank,proto3" json:"rank,omitempty"` // Rank ID. + Target uint32 `protobuf:"varint,2,opt,name=target,proto3" json:"target,omitempty"` // Target index in the rank. + Status chk.CheckInstStatus `protobuf:"varint,3,opt,name=status,proto3,enum=chk.CheckInstStatus" json:"status,omitempty"` // Check instance status on this target - see CheckInstStatus. + // Inconsistency statistics during the phases range + // [CSP_DTX_RESYNC, CSP_AGGREGATION] for the pool shard on the target. + Inconsistency *CheckQueryInconsist `protobuf:"bytes,4,opt,name=inconsistency,proto3" json:"inconsistency,omitempty"` + // Time information for the pool shard on the target if applicable. + Time *CheckQueryTime `protobuf:"bytes,5,opt,name=time,proto3" json:"time,omitempty"` +} + +func (x *CheckQueryTarget) Reset() { + *x = CheckQueryTarget{} + if protoimpl.UnsafeEnabled { + mi := &file_mgmt_check_proto_msgTypes[10] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CheckQueryTarget) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CheckQueryTarget) ProtoMessage() {} + +func (x *CheckQueryTarget) ProtoReflect() protoreflect.Message { + mi := &file_mgmt_check_proto_msgTypes[10] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CheckQueryTarget.ProtoReflect.Descriptor instead. +func (*CheckQueryTarget) Descriptor() ([]byte, []int) { + return file_mgmt_check_proto_rawDescGZIP(), []int{10} +} + +func (x *CheckQueryTarget) GetRank() uint32 { + if x != nil { + return x.Rank + } + return 0 +} + +func (x *CheckQueryTarget) GetTarget() uint32 { + if x != nil { + return x.Target + } + return 0 +} + +func (x *CheckQueryTarget) GetStatus() chk.CheckInstStatus { + if x != nil { + return x.Status + } + return chk.CheckInstStatus(0) +} + +func (x *CheckQueryTarget) GetInconsistency() *CheckQueryInconsist { + if x != nil { + return x.Inconsistency + } + return nil +} + +func (x *CheckQueryTarget) GetTime() *CheckQueryTime { + if x != nil { + return x.Time + } + return nil +} + +// Check query result for the pool. +type CheckQueryPool struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Uuid string `protobuf:"bytes,1,opt,name=uuid,proto3" json:"uuid,omitempty"` // Pool UUID. + Status chk.CheckPoolStatus `protobuf:"varint,2,opt,name=status,proto3,enum=chk.CheckPoolStatus" json:"status,omitempty"` // Pool status - see CheckPoolStatus. + Phase chk.CheckScanPhase `protobuf:"varint,3,opt,name=phase,proto3,enum=chk.CheckScanPhase" json:"phase,omitempty"` // Scan phase - see CheckScanPhase. + // Inconsistency statistics during the phases range + // [CSP_POOL_MBS, CSP_CONT_CLEANUP] for the pool. + Inconsistency *CheckQueryInconsist `protobuf:"bytes,4,opt,name=inconsistency,proto3" json:"inconsistency,omitempty"` + // Time information for the pool if applicable. + Time *CheckQueryTime `protobuf:"bytes,5,opt,name=time,proto3" json:"time,omitempty"` + // Per target based query result for the phases since CSP_DTX_RESYNC. + Targets []*CheckQueryTarget `protobuf:"bytes,6,rep,name=targets,proto3" json:"targets,omitempty"` +} + +func (x *CheckQueryPool) Reset() { + *x = CheckQueryPool{} + if protoimpl.UnsafeEnabled { + mi := &file_mgmt_check_proto_msgTypes[11] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CheckQueryPool) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CheckQueryPool) ProtoMessage() {} + +func (x *CheckQueryPool) ProtoReflect() protoreflect.Message { + mi := &file_mgmt_check_proto_msgTypes[11] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CheckQueryPool.ProtoReflect.Descriptor instead. +func (*CheckQueryPool) Descriptor() ([]byte, []int) { + return file_mgmt_check_proto_rawDescGZIP(), []int{11} +} + +func (x *CheckQueryPool) GetUuid() string { + if x != nil { + return x.Uuid + } + return "" +} + +func (x *CheckQueryPool) GetStatus() chk.CheckPoolStatus { + if x != nil { + return x.Status + } + return chk.CheckPoolStatus(0) +} + +func (x *CheckQueryPool) GetPhase() chk.CheckScanPhase { + if x != nil { + return x.Phase + } + return chk.CheckScanPhase(0) +} + +func (x *CheckQueryPool) GetInconsistency() *CheckQueryInconsist { + if x != nil { + return x.Inconsistency + } + return nil +} + +func (x *CheckQueryPool) GetTime() *CheckQueryTime { + if x != nil { + return x.Time + } + return nil +} + +func (x *CheckQueryPool) GetTargets() []*CheckQueryTarget { + if x != nil { + return x.Targets + } + return nil +} + +// CheckQueryResp returns DAOS check status for required pool(s) or the whole system. +// Depend on the dmg command line option, the control plane needs to reorganize the query +// results with summary (of pool shards from targets) and different detailed information. +type CheckQueryResp struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + ReqStatus int32 `protobuf:"varint,1,opt,name=req_status,json=reqStatus,proto3" json:"req_status,omitempty"` // DAOS error code. + // The whole check instance status depends on the each engine status: + // As long as one target is in CIS_RUNNING, then the instance is CIS_RUNNING. + // Otherwise, in turn with the status of CIS_FAILED, CIS_STOPPED, CIS_IMPLICATED, + // CIS_COMPLETED, CIS_PAUSED, CIS_INIT. + InsStatus chk.CheckInstStatus `protobuf:"varint,2,opt,name=ins_status,json=insStatus,proto3,enum=chk.CheckInstStatus" json:"ins_status,omitempty"` + // Scan phase - see CheckScanPhase. Before moving to CSP_POOL_MBS, the check + // instance status is maintained on the check leader. And then multiple pools + // can be processed in parallel, so the instance phase for different pools may + // be different, see CheckQueryPool::phase. + InsPhase chk.CheckScanPhase `protobuf:"varint,3,opt,name=ins_phase,json=insPhase,proto3,enum=chk.CheckScanPhase" json:"ins_phase,omitempty"` + // Inconsistency statistics during the phases range + // [CSP_PREPARE, CSP_POOL_LIST] for the whole system. + Inconsistency *CheckQueryInconsist `protobuf:"bytes,4,opt,name=inconsistency,proto3" json:"inconsistency,omitempty"` + // Time information for the whole system if applicable. + Time *CheckQueryTime `protobuf:"bytes,5,opt,name=time,proto3" json:"time,omitempty"` + // Per pool based query result for the phases since CSP_POOL_MBS. + Pools []*CheckQueryPool `protobuf:"bytes,6,rep,name=pools,proto3" json:"pools,omitempty"` + // Inconsistency reports to be displayed + Reports []*chk.CheckReport `protobuf:"bytes,7,rep,name=reports,proto3" json:"reports,omitempty"` +} + +func (x *CheckQueryResp) Reset() { + *x = CheckQueryResp{} + if protoimpl.UnsafeEnabled { + mi := &file_mgmt_check_proto_msgTypes[12] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CheckQueryResp) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CheckQueryResp) ProtoMessage() {} + +func (x *CheckQueryResp) ProtoReflect() protoreflect.Message { + mi := &file_mgmt_check_proto_msgTypes[12] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CheckQueryResp.ProtoReflect.Descriptor instead. +func (*CheckQueryResp) Descriptor() ([]byte, []int) { + return file_mgmt_check_proto_rawDescGZIP(), []int{12} +} + +func (x *CheckQueryResp) GetReqStatus() int32 { + if x != nil { + return x.ReqStatus + } + return 0 +} + +func (x *CheckQueryResp) GetInsStatus() chk.CheckInstStatus { + if x != nil { + return x.InsStatus + } + return chk.CheckInstStatus(0) +} + +func (x *CheckQueryResp) GetInsPhase() chk.CheckScanPhase { + if x != nil { + return x.InsPhase + } + return chk.CheckScanPhase(0) +} + +func (x *CheckQueryResp) GetInconsistency() *CheckQueryInconsist { + if x != nil { + return x.Inconsistency + } + return nil +} + +func (x *CheckQueryResp) GetTime() *CheckQueryTime { + if x != nil { + return x.Time + } + return nil +} + +func (x *CheckQueryResp) GetPools() []*CheckQueryPool { + if x != nil { + return x.Pools + } + return nil +} + +func (x *CheckQueryResp) GetReports() []*chk.CheckReport { + if x != nil { + return x.Reports + } + return nil +} + +// For 'dmg check set-policy' +type CheckSetPolicyReq struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Sys string `protobuf:"bytes,1,opt,name=sys,proto3" json:"sys,omitempty"` // DAOS system identifier. + Flags uint32 `protobuf:"varint,2,opt,name=flags,proto3" json:"flags,omitempty"` // The flags when start check - see CheckFlag. + Policies []*CheckInconsistPolicy `protobuf:"bytes,3,rep,name=policies,proto3" json:"policies,omitempty"` // Inconsistency policy array. +} + +func (x *CheckSetPolicyReq) Reset() { + *x = CheckSetPolicyReq{} + if protoimpl.UnsafeEnabled { + mi := &file_mgmt_check_proto_msgTypes[13] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CheckSetPolicyReq) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CheckSetPolicyReq) ProtoMessage() {} + +func (x *CheckSetPolicyReq) ProtoReflect() protoreflect.Message { + mi := &file_mgmt_check_proto_msgTypes[13] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CheckSetPolicyReq.ProtoReflect.Descriptor instead. +func (*CheckSetPolicyReq) Descriptor() ([]byte, []int) { + return file_mgmt_check_proto_rawDescGZIP(), []int{13} +} + +func (x *CheckSetPolicyReq) GetSys() string { + if x != nil { + return x.Sys + } + return "" +} + +func (x *CheckSetPolicyReq) GetFlags() uint32 { + if x != nil { + return x.Flags + } + return 0 +} + +func (x *CheckSetPolicyReq) GetPolicies() []*CheckInconsistPolicy { + if x != nil { + return x.Policies + } + return nil +} + +// To allow daos_server to query check leader properties +type CheckPropReq struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Sys string `protobuf:"bytes,1,opt,name=sys,proto3" json:"sys,omitempty"` // DAOS system identifier. +} + +func (x *CheckPropReq) Reset() { + *x = CheckPropReq{} + if protoimpl.UnsafeEnabled { + mi := &file_mgmt_check_proto_msgTypes[14] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CheckPropReq) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CheckPropReq) ProtoMessage() {} + +func (x *CheckPropReq) ProtoReflect() protoreflect.Message { + mi := &file_mgmt_check_proto_msgTypes[14] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CheckPropReq.ProtoReflect.Descriptor instead. +func (*CheckPropReq) Descriptor() ([]byte, []int) { + return file_mgmt_check_proto_rawDescGZIP(), []int{14} +} + +func (x *CheckPropReq) GetSys() string { + if x != nil { + return x.Sys + } + return "" +} + +// CheckPropResp returns the result of check prop and the properties when start check. +type CheckPropResp struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Status int32 `protobuf:"varint,1,opt,name=status,proto3" json:"status,omitempty"` // DAOS error code. + Flags uint32 `protobuf:"varint,2,opt,name=flags,proto3" json:"flags,omitempty"` // The flags when start check - see CheckFlag. + Policies []*CheckInconsistPolicy `protobuf:"bytes,3,rep,name=policies,proto3" json:"policies,omitempty"` // Inconsistency policy array. +} + +func (x *CheckPropResp) Reset() { + *x = CheckPropResp{} + if protoimpl.UnsafeEnabled { + mi := &file_mgmt_check_proto_msgTypes[15] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CheckPropResp) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CheckPropResp) ProtoMessage() {} + +func (x *CheckPropResp) ProtoReflect() protoreflect.Message { + mi := &file_mgmt_check_proto_msgTypes[15] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CheckPropResp.ProtoReflect.Descriptor instead. +func (*CheckPropResp) Descriptor() ([]byte, []int) { + return file_mgmt_check_proto_rawDescGZIP(), []int{15} +} + +func (x *CheckPropResp) GetStatus() int32 { + if x != nil { + return x.Status + } + return 0 +} + +func (x *CheckPropResp) GetFlags() uint32 { + if x != nil { + return x.Flags + } + return 0 +} + +func (x *CheckPropResp) GetPolicies() []*CheckInconsistPolicy { + if x != nil { + return x.Policies + } + return nil +} + +// For 'dmg check get-policy' +type CheckGetPolicyReq struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Sys string `protobuf:"bytes,1,opt,name=sys,proto3" json:"sys,omitempty"` // DAOS system identifier. + Classes []chk.CheckInconsistClass `protobuf:"varint,2,rep,packed,name=classes,proto3,enum=chk.CheckInconsistClass" json:"classes,omitempty"` + LastUsed bool `protobuf:"varint,3,opt,name=last_used,json=lastUsed,proto3" json:"last_used,omitempty"` +} + +func (x *CheckGetPolicyReq) Reset() { + *x = CheckGetPolicyReq{} + if protoimpl.UnsafeEnabled { + mi := &file_mgmt_check_proto_msgTypes[16] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CheckGetPolicyReq) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CheckGetPolicyReq) ProtoMessage() {} + +func (x *CheckGetPolicyReq) ProtoReflect() protoreflect.Message { + mi := &file_mgmt_check_proto_msgTypes[16] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CheckGetPolicyReq.ProtoReflect.Descriptor instead. +func (*CheckGetPolicyReq) Descriptor() ([]byte, []int) { + return file_mgmt_check_proto_rawDescGZIP(), []int{16} +} + +func (x *CheckGetPolicyReq) GetSys() string { + if x != nil { + return x.Sys + } + return "" +} + +func (x *CheckGetPolicyReq) GetClasses() []chk.CheckInconsistClass { + if x != nil { + return x.Classes + } + return nil +} + +func (x *CheckGetPolicyReq) GetLastUsed() bool { + if x != nil { + return x.LastUsed + } + return false +} + +// CheckGetPolicyResp returns the result of check prop and the properties when start check. +// NB: Dupe of CheckPropResp currently; may consolidate if they don't diverge. +type CheckGetPolicyResp struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Status int32 `protobuf:"varint,1,opt,name=status,proto3" json:"status,omitempty"` // DAOS error code. + Flags uint32 `protobuf:"varint,2,opt,name=flags,proto3" json:"flags,omitempty"` // The flags when start check - see CheckFlag. + Policies []*CheckInconsistPolicy `protobuf:"bytes,3,rep,name=policies,proto3" json:"policies,omitempty"` // Inconsistency policy array. +} + +func (x *CheckGetPolicyResp) Reset() { + *x = CheckGetPolicyResp{} + if protoimpl.UnsafeEnabled { + mi := &file_mgmt_check_proto_msgTypes[17] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CheckGetPolicyResp) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CheckGetPolicyResp) ProtoMessage() {} + +func (x *CheckGetPolicyResp) ProtoReflect() protoreflect.Message { + mi := &file_mgmt_check_proto_msgTypes[17] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CheckGetPolicyResp.ProtoReflect.Descriptor instead. +func (*CheckGetPolicyResp) Descriptor() ([]byte, []int) { + return file_mgmt_check_proto_rawDescGZIP(), []int{17} +} + +func (x *CheckGetPolicyResp) GetStatus() int32 { + if x != nil { + return x.Status + } + return 0 +} + +func (x *CheckGetPolicyResp) GetFlags() uint32 { + if x != nil { + return x.Flags + } + return 0 +} + +func (x *CheckGetPolicyResp) GetPolicies() []*CheckInconsistPolicy { + if x != nil { + return x.Policies + } + return nil +} + +// For the admin's decision from DAOS check interaction. +type CheckActReq struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Sys string `protobuf:"bytes,1,opt,name=sys,proto3" json:"sys,omitempty"` // DAOS system identifier. + // DAOS RAS event sequence - see RASEvent::extended_info::check_info::chk_inconsist_seq. + Seq uint64 `protobuf:"varint,2,opt,name=seq,proto3" json:"seq,omitempty"` + // The decision from RASEvent::extended_info::check_info::chk_opts. + Act chk.CheckInconsistAction `protobuf:"varint,3,opt,name=act,proto3,enum=chk.CheckInconsistAction" json:"act,omitempty"` + ForAll bool `protobuf:"varint,4,opt,name=for_all,json=forAll,proto3" json:"for_all,omitempty"` // The same action is applicable to the same type of inconsistency. +} + +func (x *CheckActReq) Reset() { + *x = CheckActReq{} + if protoimpl.UnsafeEnabled { + mi := &file_mgmt_check_proto_msgTypes[18] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CheckActReq) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CheckActReq) ProtoMessage() {} + +func (x *CheckActReq) ProtoReflect() protoreflect.Message { + mi := &file_mgmt_check_proto_msgTypes[18] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CheckActReq.ProtoReflect.Descriptor instead. +func (*CheckActReq) Descriptor() ([]byte, []int) { + return file_mgmt_check_proto_rawDescGZIP(), []int{18} +} + +func (x *CheckActReq) GetSys() string { + if x != nil { + return x.Sys + } + return "" +} + +func (x *CheckActReq) GetSeq() uint64 { + if x != nil { + return x.Seq + } + return 0 +} + +func (x *CheckActReq) GetAct() chk.CheckInconsistAction { + if x != nil { + return x.Act + } + return chk.CheckInconsistAction(0) +} + +func (x *CheckActReq) GetForAll() bool { + if x != nil { + return x.ForAll + } + return false +} + +// CheckActResp returns the result of executing admin's decision. +type CheckActResp struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Status int32 `protobuf:"varint,1,opt,name=status,proto3" json:"status,omitempty"` // DAOS error code. +} + +func (x *CheckActResp) Reset() { + *x = CheckActResp{} + if protoimpl.UnsafeEnabled { + mi := &file_mgmt_check_proto_msgTypes[19] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CheckActResp) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CheckActResp) ProtoMessage() {} + +func (x *CheckActResp) ProtoReflect() protoreflect.Message { + mi := &file_mgmt_check_proto_msgTypes[19] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CheckActResp.ProtoReflect.Descriptor instead. +func (*CheckActResp) Descriptor() ([]byte, []int) { + return file_mgmt_check_proto_rawDescGZIP(), []int{19} +} + +func (x *CheckActResp) GetStatus() int32 { + if x != nil { + return x.Status + } + return 0 +} + +var File_mgmt_check_proto protoreflect.FileDescriptor + +var file_mgmt_check_proto_rawDesc = []byte{ + 0x0a, 0x10, 0x6d, 0x67, 0x6d, 0x74, 0x2f, 0x63, 0x68, 0x65, 0x63, 0x6b, 0x2e, 0x70, 0x72, 0x6f, + 0x74, 0x6f, 0x12, 0x04, 0x6d, 0x67, 0x6d, 0x74, 0x1a, 0x0d, 0x63, 0x68, 0x6b, 0x2f, 0x63, 0x68, + 0x6b, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x22, 0x95, 0x01, 0x0a, 0x14, 0x43, 0x68, 0x65, 0x63, + 0x6b, 0x49, 0x6e, 0x63, 0x6f, 0x6e, 0x73, 0x69, 0x73, 0x74, 0x50, 0x6f, 0x6c, 0x69, 0x63, 0x79, + 0x12, 0x3d, 0x0a, 0x0d, 0x69, 0x6e, 0x63, 0x6f, 0x6e, 0x73, 0x69, 0x73, 0x74, 0x5f, 0x63, 0x61, + 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x18, 0x2e, 0x63, 0x68, 0x6b, 0x2e, 0x43, 0x68, + 0x65, 0x63, 0x6b, 0x49, 0x6e, 0x63, 0x6f, 0x6e, 0x73, 0x69, 0x73, 0x74, 0x43, 0x6c, 0x61, 0x73, + 0x73, 0x52, 0x0c, 0x69, 0x6e, 0x63, 0x6f, 0x6e, 0x73, 0x69, 0x73, 0x74, 0x43, 0x61, 0x73, 0x12, + 0x3e, 0x0a, 0x0d, 0x69, 0x6e, 0x63, 0x6f, 0x6e, 0x73, 0x69, 0x73, 0x74, 0x5f, 0x61, 0x63, 0x74, + 0x18, 0x02, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x19, 0x2e, 0x63, 0x68, 0x6b, 0x2e, 0x43, 0x68, 0x65, + 0x63, 0x6b, 0x49, 0x6e, 0x63, 0x6f, 0x6e, 0x73, 0x69, 0x73, 0x74, 0x41, 0x63, 0x74, 0x69, 0x6f, + 0x6e, 0x52, 0x0c, 0x69, 0x6e, 0x63, 0x6f, 0x6e, 0x73, 0x69, 0x73, 0x74, 0x41, 0x63, 0x74, 0x22, + 0x22, 0x0a, 0x0e, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x45, 0x6e, 0x61, 0x62, 0x6c, 0x65, 0x52, 0x65, + 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, + 0x73, 0x79, 0x73, 0x22, 0x23, 0x0a, 0x0f, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x44, 0x69, 0x73, 0x61, + 0x62, 0x6c, 0x65, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x22, 0x9b, 0x01, 0x0a, 0x0d, 0x43, 0x68, 0x65, + 0x63, 0x6b, 0x53, 0x74, 0x61, 0x72, 0x74, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, + 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x14, 0x0a, 0x05, + 0x66, 0x6c, 0x61, 0x67, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x05, 0x66, 0x6c, 0x61, + 0x67, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, + 0x0d, 0x52, 0x05, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x75, 0x75, 0x69, 0x64, + 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x09, 0x52, 0x05, 0x75, 0x75, 0x69, 0x64, 0x73, 0x12, 0x36, + 0x0a, 0x08, 0x70, 0x6f, 0x6c, 0x69, 0x63, 0x69, 0x65, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x0b, + 0x32, 0x1a, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x49, 0x6e, 0x63, + 0x6f, 0x6e, 0x73, 0x69, 0x73, 0x74, 0x50, 0x6f, 0x6c, 0x69, 0x63, 0x79, 0x52, 0x08, 0x70, 0x6f, + 0x6c, 0x69, 0x63, 0x69, 0x65, 0x73, 0x22, 0x28, 0x0a, 0x0e, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x53, + 0x74, 0x61, 0x72, 0x74, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, + 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, + 0x22, 0x36, 0x0a, 0x0c, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x53, 0x74, 0x6f, 0x70, 0x52, 0x65, 0x71, + 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, + 0x79, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x75, 0x75, 0x69, 0x64, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, + 0x09, 0x52, 0x05, 0x75, 0x75, 0x69, 0x64, 0x73, 0x22, 0x27, 0x0a, 0x0d, 0x43, 0x68, 0x65, 0x63, + 0x6b, 0x53, 0x74, 0x6f, 0x70, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, + 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, + 0x73, 0x22, 0x65, 0x0a, 0x0d, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, + 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x03, 0x73, 0x79, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x75, 0x75, 0x69, 0x64, 0x73, 0x18, 0x02, 0x20, + 0x03, 0x28, 0x09, 0x52, 0x05, 0x75, 0x75, 0x69, 0x64, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x73, 0x68, + 0x61, 0x6c, 0x6c, 0x6f, 0x77, 0x18, 0x03, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x73, 0x68, 0x61, + 0x6c, 0x6c, 0x6f, 0x77, 0x12, 0x12, 0x0a, 0x04, 0x73, 0x65, 0x71, 0x73, 0x18, 0x04, 0x20, 0x03, + 0x28, 0x04, 0x52, 0x04, 0x73, 0x65, 0x71, 0x73, 0x22, 0x4c, 0x0a, 0x0e, 0x43, 0x68, 0x65, 0x63, + 0x6b, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x69, 0x6d, 0x65, 0x12, 0x1d, 0x0a, 0x0a, 0x73, 0x74, + 0x61, 0x72, 0x74, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x04, 0x52, 0x09, + 0x73, 0x74, 0x61, 0x72, 0x74, 0x54, 0x69, 0x6d, 0x65, 0x12, 0x1b, 0x0a, 0x09, 0x6d, 0x69, 0x73, + 0x63, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x04, 0x52, 0x08, 0x6d, 0x69, + 0x73, 0x63, 0x54, 0x69, 0x6d, 0x65, 0x22, 0x79, 0x0a, 0x13, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x51, + 0x75, 0x65, 0x72, 0x79, 0x49, 0x6e, 0x63, 0x6f, 0x6e, 0x73, 0x69, 0x73, 0x74, 0x12, 0x14, 0x0a, + 0x05, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x05, 0x74, 0x6f, + 0x74, 0x61, 0x6c, 0x12, 0x1a, 0x0a, 0x08, 0x72, 0x65, 0x70, 0x61, 0x69, 0x72, 0x65, 0x64, 0x18, + 0x02, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x08, 0x72, 0x65, 0x70, 0x61, 0x69, 0x72, 0x65, 0x64, 0x12, + 0x18, 0x0a, 0x07, 0x69, 0x67, 0x6e, 0x6f, 0x72, 0x65, 0x64, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0d, + 0x52, 0x07, 0x69, 0x67, 0x6e, 0x6f, 0x72, 0x65, 0x64, 0x12, 0x16, 0x0a, 0x06, 0x66, 0x61, 0x69, + 0x6c, 0x65, 0x64, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x06, 0x66, 0x61, 0x69, 0x6c, 0x65, + 0x64, 0x22, 0xd7, 0x01, 0x0a, 0x10, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x51, 0x75, 0x65, 0x72, 0x79, + 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x12, 0x16, 0x0a, 0x06, 0x74, 0x61, + 0x72, 0x67, 0x65, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x06, 0x74, 0x61, 0x72, 0x67, + 0x65, 0x74, 0x12, 0x2c, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x03, 0x20, 0x01, + 0x28, 0x0e, 0x32, 0x14, 0x2e, 0x63, 0x68, 0x6b, 0x2e, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x49, 0x6e, + 0x73, 0x74, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, + 0x12, 0x3f, 0x0a, 0x0d, 0x69, 0x6e, 0x63, 0x6f, 0x6e, 0x73, 0x69, 0x73, 0x74, 0x65, 0x6e, 0x63, + 0x79, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x19, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x43, + 0x68, 0x65, 0x63, 0x6b, 0x51, 0x75, 0x65, 0x72, 0x79, 0x49, 0x6e, 0x63, 0x6f, 0x6e, 0x73, 0x69, + 0x73, 0x74, 0x52, 0x0d, 0x69, 0x6e, 0x63, 0x6f, 0x6e, 0x73, 0x69, 0x73, 0x74, 0x65, 0x6e, 0x63, + 0x79, 0x12, 0x28, 0x0a, 0x04, 0x74, 0x69, 0x6d, 0x65, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0b, 0x32, + 0x14, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x51, 0x75, 0x65, 0x72, + 0x79, 0x54, 0x69, 0x6d, 0x65, 0x52, 0x04, 0x74, 0x69, 0x6d, 0x65, 0x22, 0x9a, 0x02, 0x0a, 0x0e, + 0x43, 0x68, 0x65, 0x63, 0x6b, 0x51, 0x75, 0x65, 0x72, 0x79, 0x50, 0x6f, 0x6f, 0x6c, 0x12, 0x12, + 0x0a, 0x04, 0x75, 0x75, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x75, 0x75, + 0x69, 0x64, 0x12, 0x2c, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x02, 0x20, 0x01, + 0x28, 0x0e, 0x32, 0x14, 0x2e, 0x63, 0x68, 0x6b, 0x2e, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x50, 0x6f, + 0x6f, 0x6c, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, + 0x12, 0x29, 0x0a, 0x05, 0x70, 0x68, 0x61, 0x73, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0e, 0x32, + 0x13, 0x2e, 0x63, 0x68, 0x6b, 0x2e, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x53, 0x63, 0x61, 0x6e, 0x50, + 0x68, 0x61, 0x73, 0x65, 0x52, 0x05, 0x70, 0x68, 0x61, 0x73, 0x65, 0x12, 0x3f, 0x0a, 0x0d, 0x69, + 0x6e, 0x63, 0x6f, 0x6e, 0x73, 0x69, 0x73, 0x74, 0x65, 0x6e, 0x63, 0x79, 0x18, 0x04, 0x20, 0x01, + 0x28, 0x0b, 0x32, 0x19, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x51, + 0x75, 0x65, 0x72, 0x79, 0x49, 0x6e, 0x63, 0x6f, 0x6e, 0x73, 0x69, 0x73, 0x74, 0x52, 0x0d, 0x69, + 0x6e, 0x63, 0x6f, 0x6e, 0x73, 0x69, 0x73, 0x74, 0x65, 0x6e, 0x63, 0x79, 0x12, 0x28, 0x0a, 0x04, + 0x74, 0x69, 0x6d, 0x65, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x14, 0x2e, 0x6d, 0x67, 0x6d, + 0x74, 0x2e, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x69, 0x6d, 0x65, + 0x52, 0x04, 0x74, 0x69, 0x6d, 0x65, 0x12, 0x30, 0x0a, 0x07, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, + 0x73, 0x18, 0x06, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x16, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x43, + 0x68, 0x65, 0x63, 0x6b, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x52, + 0x07, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x22, 0xd9, 0x02, 0x0a, 0x0e, 0x43, 0x68, 0x65, + 0x63, 0x6b, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, 0x65, 0x73, 0x70, 0x12, 0x1d, 0x0a, 0x0a, 0x72, + 0x65, 0x71, 0x5f, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, + 0x09, 0x72, 0x65, 0x71, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x33, 0x0a, 0x0a, 0x69, 0x6e, + 0x73, 0x5f, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x14, + 0x2e, 0x63, 0x68, 0x6b, 0x2e, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x49, 0x6e, 0x73, 0x74, 0x53, 0x74, + 0x61, 0x74, 0x75, 0x73, 0x52, 0x09, 0x69, 0x6e, 0x73, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, + 0x30, 0x0a, 0x09, 0x69, 0x6e, 0x73, 0x5f, 0x70, 0x68, 0x61, 0x73, 0x65, 0x18, 0x03, 0x20, 0x01, + 0x28, 0x0e, 0x32, 0x13, 0x2e, 0x63, 0x68, 0x6b, 0x2e, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x53, 0x63, + 0x61, 0x6e, 0x50, 0x68, 0x61, 0x73, 0x65, 0x52, 0x08, 0x69, 0x6e, 0x73, 0x50, 0x68, 0x61, 0x73, + 0x65, 0x12, 0x3f, 0x0a, 0x0d, 0x69, 0x6e, 0x63, 0x6f, 0x6e, 0x73, 0x69, 0x73, 0x74, 0x65, 0x6e, + 0x63, 0x79, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x19, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, + 0x43, 0x68, 0x65, 0x63, 0x6b, 0x51, 0x75, 0x65, 0x72, 0x79, 0x49, 0x6e, 0x63, 0x6f, 0x6e, 0x73, + 0x69, 0x73, 0x74, 0x52, 0x0d, 0x69, 0x6e, 0x63, 0x6f, 0x6e, 0x73, 0x69, 0x73, 0x74, 0x65, 0x6e, + 0x63, 0x79, 0x12, 0x28, 0x0a, 0x04, 0x74, 0x69, 0x6d, 0x65, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0b, + 0x32, 0x14, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x51, 0x75, 0x65, + 0x72, 0x79, 0x54, 0x69, 0x6d, 0x65, 0x52, 0x04, 0x74, 0x69, 0x6d, 0x65, 0x12, 0x2a, 0x0a, 0x05, + 0x70, 0x6f, 0x6f, 0x6c, 0x73, 0x18, 0x06, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x14, 0x2e, 0x6d, 0x67, + 0x6d, 0x74, 0x2e, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x51, 0x75, 0x65, 0x72, 0x79, 0x50, 0x6f, 0x6f, + 0x6c, 0x52, 0x05, 0x70, 0x6f, 0x6f, 0x6c, 0x73, 0x12, 0x2a, 0x0a, 0x07, 0x72, 0x65, 0x70, 0x6f, + 0x72, 0x74, 0x73, 0x18, 0x07, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x10, 0x2e, 0x63, 0x68, 0x6b, 0x2e, + 0x43, 0x68, 0x65, 0x63, 0x6b, 0x52, 0x65, 0x70, 0x6f, 0x72, 0x74, 0x52, 0x07, 0x72, 0x65, 0x70, + 0x6f, 0x72, 0x74, 0x73, 0x22, 0x73, 0x0a, 0x11, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x53, 0x65, 0x74, + 0x50, 0x6f, 0x6c, 0x69, 0x63, 0x79, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, + 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x66, + 0x6c, 0x61, 0x67, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x05, 0x66, 0x6c, 0x61, 0x67, + 0x73, 0x12, 0x36, 0x0a, 0x08, 0x70, 0x6f, 0x6c, 0x69, 0x63, 0x69, 0x65, 0x73, 0x18, 0x03, 0x20, + 0x03, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x43, 0x68, 0x65, 0x63, 0x6b, + 0x49, 0x6e, 0x63, 0x6f, 0x6e, 0x73, 0x69, 0x73, 0x74, 0x50, 0x6f, 0x6c, 0x69, 0x63, 0x79, 0x52, + 0x08, 0x70, 0x6f, 0x6c, 0x69, 0x63, 0x69, 0x65, 0x73, 0x22, 0x20, 0x0a, 0x0c, 0x43, 0x68, 0x65, + 0x63, 0x6b, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, + 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x22, 0x75, 0x0a, 0x0d, 0x43, + 0x68, 0x65, 0x63, 0x6b, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, + 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, + 0x61, 0x74, 0x75, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x66, 0x6c, 0x61, 0x67, 0x73, 0x18, 0x02, 0x20, + 0x01, 0x28, 0x0d, 0x52, 0x05, 0x66, 0x6c, 0x61, 0x67, 0x73, 0x12, 0x36, 0x0a, 0x08, 0x70, 0x6f, + 0x6c, 0x69, 0x63, 0x69, 0x65, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x6d, + 0x67, 0x6d, 0x74, 0x2e, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x49, 0x6e, 0x63, 0x6f, 0x6e, 0x73, 0x69, + 0x73, 0x74, 0x50, 0x6f, 0x6c, 0x69, 0x63, 0x79, 0x52, 0x08, 0x70, 0x6f, 0x6c, 0x69, 0x63, 0x69, + 0x65, 0x73, 0x22, 0x76, 0x0a, 0x11, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x47, 0x65, 0x74, 0x50, 0x6f, + 0x6c, 0x69, 0x63, 0x79, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x32, 0x0a, 0x07, 0x63, 0x6c, 0x61, + 0x73, 0x73, 0x65, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0e, 0x32, 0x18, 0x2e, 0x63, 0x68, 0x6b, + 0x2e, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x49, 0x6e, 0x63, 0x6f, 0x6e, 0x73, 0x69, 0x73, 0x74, 0x43, + 0x6c, 0x61, 0x73, 0x73, 0x52, 0x07, 0x63, 0x6c, 0x61, 0x73, 0x73, 0x65, 0x73, 0x12, 0x1b, 0x0a, + 0x09, 0x6c, 0x61, 0x73, 0x74, 0x5f, 0x75, 0x73, 0x65, 0x64, 0x18, 0x03, 0x20, 0x01, 0x28, 0x08, + 0x52, 0x08, 0x6c, 0x61, 0x73, 0x74, 0x55, 0x73, 0x65, 0x64, 0x22, 0x7a, 0x0a, 0x12, 0x43, 0x68, + 0x65, 0x63, 0x6b, 0x47, 0x65, 0x74, 0x50, 0x6f, 0x6c, 0x69, 0x63, 0x79, 0x52, 0x65, 0x73, 0x70, + 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, + 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x66, 0x6c, 0x61, 0x67, + 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x05, 0x66, 0x6c, 0x61, 0x67, 0x73, 0x12, 0x36, + 0x0a, 0x08, 0x70, 0x6f, 0x6c, 0x69, 0x63, 0x69, 0x65, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, + 0x32, 0x1a, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x49, 0x6e, 0x63, + 0x6f, 0x6e, 0x73, 0x69, 0x73, 0x74, 0x50, 0x6f, 0x6c, 0x69, 0x63, 0x79, 0x52, 0x08, 0x70, 0x6f, + 0x6c, 0x69, 0x63, 0x69, 0x65, 0x73, 0x22, 0x77, 0x0a, 0x0b, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x41, + 0x63, 0x74, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x65, 0x71, 0x18, 0x02, + 0x20, 0x01, 0x28, 0x04, 0x52, 0x03, 0x73, 0x65, 0x71, 0x12, 0x2b, 0x0a, 0x03, 0x61, 0x63, 0x74, + 0x18, 0x03, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x19, 0x2e, 0x63, 0x68, 0x6b, 0x2e, 0x43, 0x68, 0x65, + 0x63, 0x6b, 0x49, 0x6e, 0x63, 0x6f, 0x6e, 0x73, 0x69, 0x73, 0x74, 0x41, 0x63, 0x74, 0x69, 0x6f, + 0x6e, 0x52, 0x03, 0x61, 0x63, 0x74, 0x12, 0x17, 0x0a, 0x07, 0x66, 0x6f, 0x72, 0x5f, 0x61, 0x6c, + 0x6c, 0x18, 0x04, 0x20, 0x01, 0x28, 0x08, 0x52, 0x06, 0x66, 0x6f, 0x72, 0x41, 0x6c, 0x6c, 0x22, + 0x26, 0x0a, 0x0c, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x41, 0x63, 0x74, 0x52, 0x65, 0x73, 0x70, 0x12, + 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, + 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x42, 0x3a, 0x5a, 0x38, 0x67, 0x69, 0x74, 0x68, 0x75, + 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2d, 0x73, 0x74, 0x61, 0x63, 0x6b, + 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2f, 0x73, 0x72, 0x63, 0x2f, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, + 0x6c, 0x2f, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x6d, + 0x67, 0x6d, 0x74, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, +} + +var ( + file_mgmt_check_proto_rawDescOnce sync.Once + file_mgmt_check_proto_rawDescData = file_mgmt_check_proto_rawDesc +) + +func file_mgmt_check_proto_rawDescGZIP() []byte { + file_mgmt_check_proto_rawDescOnce.Do(func() { + file_mgmt_check_proto_rawDescData = protoimpl.X.CompressGZIP(file_mgmt_check_proto_rawDescData) + }) + return file_mgmt_check_proto_rawDescData +} + +var file_mgmt_check_proto_msgTypes = make([]protoimpl.MessageInfo, 20) +var file_mgmt_check_proto_goTypes = []interface{}{ + (*CheckInconsistPolicy)(nil), // 0: mgmt.CheckInconsistPolicy + (*CheckEnableReq)(nil), // 1: mgmt.CheckEnableReq + (*CheckDisableReq)(nil), // 2: mgmt.CheckDisableReq + (*CheckStartReq)(nil), // 3: mgmt.CheckStartReq + (*CheckStartResp)(nil), // 4: mgmt.CheckStartResp + (*CheckStopReq)(nil), // 5: mgmt.CheckStopReq + (*CheckStopResp)(nil), // 6: mgmt.CheckStopResp + (*CheckQueryReq)(nil), // 7: mgmt.CheckQueryReq + (*CheckQueryTime)(nil), // 8: mgmt.CheckQueryTime + (*CheckQueryInconsist)(nil), // 9: mgmt.CheckQueryInconsist + (*CheckQueryTarget)(nil), // 10: mgmt.CheckQueryTarget + (*CheckQueryPool)(nil), // 11: mgmt.CheckQueryPool + (*CheckQueryResp)(nil), // 12: mgmt.CheckQueryResp + (*CheckSetPolicyReq)(nil), // 13: mgmt.CheckSetPolicyReq + (*CheckPropReq)(nil), // 14: mgmt.CheckPropReq + (*CheckPropResp)(nil), // 15: mgmt.CheckPropResp + (*CheckGetPolicyReq)(nil), // 16: mgmt.CheckGetPolicyReq + (*CheckGetPolicyResp)(nil), // 17: mgmt.CheckGetPolicyResp + (*CheckActReq)(nil), // 18: mgmt.CheckActReq + (*CheckActResp)(nil), // 19: mgmt.CheckActResp + (chk.CheckInconsistClass)(0), // 20: chk.CheckInconsistClass + (chk.CheckInconsistAction)(0), // 21: chk.CheckInconsistAction + (chk.CheckInstStatus)(0), // 22: chk.CheckInstStatus + (chk.CheckPoolStatus)(0), // 23: chk.CheckPoolStatus + (chk.CheckScanPhase)(0), // 24: chk.CheckScanPhase + (*chk.CheckReport)(nil), // 25: chk.CheckReport +} +var file_mgmt_check_proto_depIdxs = []int32{ + 20, // 0: mgmt.CheckInconsistPolicy.inconsist_cas:type_name -> chk.CheckInconsistClass + 21, // 1: mgmt.CheckInconsistPolicy.inconsist_act:type_name -> chk.CheckInconsistAction + 0, // 2: mgmt.CheckStartReq.policies:type_name -> mgmt.CheckInconsistPolicy + 22, // 3: mgmt.CheckQueryTarget.status:type_name -> chk.CheckInstStatus + 9, // 4: mgmt.CheckQueryTarget.inconsistency:type_name -> mgmt.CheckQueryInconsist + 8, // 5: mgmt.CheckQueryTarget.time:type_name -> mgmt.CheckQueryTime + 23, // 6: mgmt.CheckQueryPool.status:type_name -> chk.CheckPoolStatus + 24, // 7: mgmt.CheckQueryPool.phase:type_name -> chk.CheckScanPhase + 9, // 8: mgmt.CheckQueryPool.inconsistency:type_name -> mgmt.CheckQueryInconsist + 8, // 9: mgmt.CheckQueryPool.time:type_name -> mgmt.CheckQueryTime + 10, // 10: mgmt.CheckQueryPool.targets:type_name -> mgmt.CheckQueryTarget + 22, // 11: mgmt.CheckQueryResp.ins_status:type_name -> chk.CheckInstStatus + 24, // 12: mgmt.CheckQueryResp.ins_phase:type_name -> chk.CheckScanPhase + 9, // 13: mgmt.CheckQueryResp.inconsistency:type_name -> mgmt.CheckQueryInconsist + 8, // 14: mgmt.CheckQueryResp.time:type_name -> mgmt.CheckQueryTime + 11, // 15: mgmt.CheckQueryResp.pools:type_name -> mgmt.CheckQueryPool + 25, // 16: mgmt.CheckQueryResp.reports:type_name -> chk.CheckReport + 0, // 17: mgmt.CheckSetPolicyReq.policies:type_name -> mgmt.CheckInconsistPolicy + 0, // 18: mgmt.CheckPropResp.policies:type_name -> mgmt.CheckInconsistPolicy + 20, // 19: mgmt.CheckGetPolicyReq.classes:type_name -> chk.CheckInconsistClass + 0, // 20: mgmt.CheckGetPolicyResp.policies:type_name -> mgmt.CheckInconsistPolicy + 21, // 21: mgmt.CheckActReq.act:type_name -> chk.CheckInconsistAction + 22, // [22:22] is the sub-list for method output_type + 22, // [22:22] is the sub-list for method input_type + 22, // [22:22] is the sub-list for extension type_name + 22, // [22:22] is the sub-list for extension extendee + 0, // [0:22] is the sub-list for field type_name +} + +func init() { file_mgmt_check_proto_init() } +func file_mgmt_check_proto_init() { + if File_mgmt_check_proto != nil { + return + } + if !protoimpl.UnsafeEnabled { + file_mgmt_check_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CheckInconsistPolicy); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_mgmt_check_proto_msgTypes[1].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CheckEnableReq); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_mgmt_check_proto_msgTypes[2].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CheckDisableReq); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_mgmt_check_proto_msgTypes[3].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CheckStartReq); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_mgmt_check_proto_msgTypes[4].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CheckStartResp); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_mgmt_check_proto_msgTypes[5].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CheckStopReq); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_mgmt_check_proto_msgTypes[6].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CheckStopResp); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_mgmt_check_proto_msgTypes[7].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CheckQueryReq); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_mgmt_check_proto_msgTypes[8].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CheckQueryTime); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_mgmt_check_proto_msgTypes[9].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CheckQueryInconsist); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_mgmt_check_proto_msgTypes[10].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CheckQueryTarget); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_mgmt_check_proto_msgTypes[11].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CheckQueryPool); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_mgmt_check_proto_msgTypes[12].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CheckQueryResp); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_mgmt_check_proto_msgTypes[13].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CheckSetPolicyReq); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_mgmt_check_proto_msgTypes[14].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CheckPropReq); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_mgmt_check_proto_msgTypes[15].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CheckPropResp); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_mgmt_check_proto_msgTypes[16].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CheckGetPolicyReq); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_mgmt_check_proto_msgTypes[17].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CheckGetPolicyResp); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_mgmt_check_proto_msgTypes[18].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CheckActReq); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_mgmt_check_proto_msgTypes[19].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CheckActResp); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + } + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: file_mgmt_check_proto_rawDesc, + NumEnums: 0, + NumMessages: 20, + NumExtensions: 0, + NumServices: 0, + }, + GoTypes: file_mgmt_check_proto_goTypes, + DependencyIndexes: file_mgmt_check_proto_depIdxs, + MessageInfos: file_mgmt_check_proto_msgTypes, + }.Build() + File_mgmt_check_proto = out.File + file_mgmt_check_proto_rawDesc = nil + file_mgmt_check_proto_goTypes = nil + file_mgmt_check_proto_depIdxs = nil +} diff --git a/src/control/common/proto/mgmt/mgmt.pb.go b/src/control/common/proto/mgmt/mgmt.pb.go index 820c1057709..9b0c52ed9a8 100644 --- a/src/control/common/proto/mgmt/mgmt.pb.go +++ b/src/control/common/proto/mgmt/mgmt.pb.go @@ -6,13 +6,14 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: -// protoc-gen-go v1.28.1 +// protoc-gen-go v1.30.0 // protoc v3.5.0 // source: mgmt/mgmt.proto package mgmt import ( + chk "github.com/daos-stack/daos/src/control/common/proto/chk" shared "github.com/daos-stack/daos/src/control/common/proto/shared" protoreflect "google.golang.org/protobuf/reflect/protoreflect" protoimpl "google.golang.org/protobuf/runtime/protoimpl" @@ -32,142 +33,189 @@ var file_mgmt_mgmt_proto_rawDesc = []byte{ 0x0a, 0x0f, 0x6d, 0x67, 0x6d, 0x74, 0x2f, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x04, 0x6d, 0x67, 0x6d, 0x74, 0x1a, 0x12, 0x73, 0x68, 0x61, 0x72, 0x65, 0x64, 0x2f, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x1a, 0x0f, 0x6d, 0x67, 0x6d, - 0x74, 0x2f, 0x70, 0x6f, 0x6f, 0x6c, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x1a, 0x0f, 0x6d, 0x67, - 0x6d, 0x74, 0x2f, 0x63, 0x6f, 0x6e, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x1a, 0x0e, 0x6d, - 0x67, 0x6d, 0x74, 0x2f, 0x73, 0x76, 0x63, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x1a, 0x0e, 0x6d, - 0x67, 0x6d, 0x74, 0x2f, 0x61, 0x63, 0x6c, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x1a, 0x11, 0x6d, - 0x67, 0x6d, 0x74, 0x2f, 0x73, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, - 0x32, 0xe5, 0x0f, 0x0a, 0x07, 0x4d, 0x67, 0x6d, 0x74, 0x53, 0x76, 0x63, 0x12, 0x27, 0x0a, 0x04, - 0x4a, 0x6f, 0x69, 0x6e, 0x12, 0x0d, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x4a, 0x6f, 0x69, 0x6e, - 0x52, 0x65, 0x71, 0x1a, 0x0e, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x4a, 0x6f, 0x69, 0x6e, 0x52, - 0x65, 0x73, 0x70, 0x22, 0x00, 0x12, 0x43, 0x0a, 0x0c, 0x43, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, - 0x45, 0x76, 0x65, 0x6e, 0x74, 0x12, 0x17, 0x2e, 0x73, 0x68, 0x61, 0x72, 0x65, 0x64, 0x2e, 0x43, - 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x52, 0x65, 0x71, 0x1a, 0x18, - 0x2e, 0x73, 0x68, 0x61, 0x72, 0x65, 0x64, 0x2e, 0x43, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x45, - 0x76, 0x65, 0x6e, 0x74, 0x52, 0x65, 0x73, 0x70, 0x22, 0x00, 0x12, 0x3c, 0x0a, 0x0b, 0x4c, 0x65, - 0x61, 0x64, 0x65, 0x72, 0x51, 0x75, 0x65, 0x72, 0x79, 0x12, 0x14, 0x2e, 0x6d, 0x67, 0x6d, 0x74, - 0x2e, 0x4c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, 0x65, 0x71, 0x1a, - 0x15, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x4c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x51, 0x75, 0x65, - 0x72, 0x79, 0x52, 0x65, 0x73, 0x70, 0x22, 0x00, 0x12, 0x39, 0x0a, 0x0a, 0x50, 0x6f, 0x6f, 0x6c, - 0x43, 0x72, 0x65, 0x61, 0x74, 0x65, 0x12, 0x13, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, - 0x6f, 0x6c, 0x43, 0x72, 0x65, 0x61, 0x74, 0x65, 0x52, 0x65, 0x71, 0x1a, 0x14, 0x2e, 0x6d, 0x67, - 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x43, 0x72, 0x65, 0x61, 0x74, 0x65, 0x52, 0x65, 0x73, - 0x70, 0x22, 0x00, 0x12, 0x3c, 0x0a, 0x0b, 0x50, 0x6f, 0x6f, 0x6c, 0x44, 0x65, 0x73, 0x74, 0x72, - 0x6f, 0x79, 0x12, 0x14, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x44, 0x65, - 0x73, 0x74, 0x72, 0x6f, 0x79, 0x52, 0x65, 0x71, 0x1a, 0x15, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, - 0x50, 0x6f, 0x6f, 0x6c, 0x44, 0x65, 0x73, 0x74, 0x72, 0x6f, 0x79, 0x52, 0x65, 0x73, 0x70, 0x22, - 0x00, 0x12, 0x36, 0x0a, 0x09, 0x50, 0x6f, 0x6f, 0x6c, 0x45, 0x76, 0x69, 0x63, 0x74, 0x12, 0x12, - 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x45, 0x76, 0x69, 0x63, 0x74, 0x52, - 0x65, 0x71, 0x1a, 0x13, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x45, 0x76, - 0x69, 0x63, 0x74, 0x52, 0x65, 0x73, 0x70, 0x22, 0x00, 0x12, 0x3c, 0x0a, 0x0b, 0x50, 0x6f, 0x6f, - 0x6c, 0x45, 0x78, 0x63, 0x6c, 0x75, 0x64, 0x65, 0x12, 0x14, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, - 0x50, 0x6f, 0x6f, 0x6c, 0x45, 0x78, 0x63, 0x6c, 0x75, 0x64, 0x65, 0x52, 0x65, 0x71, 0x1a, 0x15, - 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x45, 0x78, 0x63, 0x6c, 0x75, 0x64, - 0x65, 0x52, 0x65, 0x73, 0x70, 0x22, 0x00, 0x12, 0x36, 0x0a, 0x09, 0x50, 0x6f, 0x6f, 0x6c, 0x44, - 0x72, 0x61, 0x69, 0x6e, 0x12, 0x12, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, - 0x44, 0x72, 0x61, 0x69, 0x6e, 0x52, 0x65, 0x71, 0x1a, 0x13, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, - 0x50, 0x6f, 0x6f, 0x6c, 0x44, 0x72, 0x61, 0x69, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x22, 0x00, 0x12, - 0x39, 0x0a, 0x0a, 0x50, 0x6f, 0x6f, 0x6c, 0x45, 0x78, 0x74, 0x65, 0x6e, 0x64, 0x12, 0x13, 0x2e, - 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x45, 0x78, 0x74, 0x65, 0x6e, 0x64, 0x52, - 0x65, 0x71, 0x1a, 0x14, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x45, 0x78, - 0x74, 0x65, 0x6e, 0x64, 0x52, 0x65, 0x73, 0x70, 0x22, 0x00, 0x12, 0x48, 0x0a, 0x0f, 0x50, 0x6f, - 0x6f, 0x6c, 0x52, 0x65, 0x69, 0x6e, 0x74, 0x65, 0x67, 0x72, 0x61, 0x74, 0x65, 0x12, 0x18, 0x2e, - 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x52, 0x65, 0x69, 0x6e, 0x74, 0x65, 0x67, - 0x72, 0x61, 0x74, 0x65, 0x52, 0x65, 0x71, 0x1a, 0x19, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, - 0x6f, 0x6f, 0x6c, 0x52, 0x65, 0x69, 0x6e, 0x74, 0x65, 0x67, 0x72, 0x61, 0x74, 0x65, 0x52, 0x65, - 0x73, 0x70, 0x22, 0x00, 0x12, 0x36, 0x0a, 0x09, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, - 0x79, 0x12, 0x12, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, - 0x72, 0x79, 0x52, 0x65, 0x71, 0x1a, 0x13, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, - 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, 0x65, 0x73, 0x70, 0x22, 0x00, 0x12, 0x48, 0x0a, 0x0f, - 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x12, - 0x18, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, - 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x52, 0x65, 0x71, 0x1a, 0x19, 0x2e, 0x6d, 0x67, 0x6d, 0x74, - 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, - 0x52, 0x65, 0x73, 0x70, 0x22, 0x00, 0x12, 0x3c, 0x0a, 0x0b, 0x50, 0x6f, 0x6f, 0x6c, 0x53, 0x65, - 0x74, 0x50, 0x72, 0x6f, 0x70, 0x12, 0x14, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, - 0x6c, 0x53, 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, 0x71, 0x1a, 0x15, 0x2e, 0x6d, 0x67, - 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x53, 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, - 0x73, 0x70, 0x22, 0x00, 0x12, 0x3c, 0x0a, 0x0b, 0x50, 0x6f, 0x6f, 0x6c, 0x47, 0x65, 0x74, 0x50, - 0x72, 0x6f, 0x70, 0x12, 0x14, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x47, - 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, 0x71, 0x1a, 0x15, 0x2e, 0x6d, 0x67, 0x6d, 0x74, - 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x47, 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, 0x73, 0x70, - 0x22, 0x00, 0x12, 0x2e, 0x0a, 0x0a, 0x50, 0x6f, 0x6f, 0x6c, 0x47, 0x65, 0x74, 0x41, 0x43, 0x4c, - 0x12, 0x0f, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x47, 0x65, 0x74, 0x41, 0x43, 0x4c, 0x52, 0x65, - 0x71, 0x1a, 0x0d, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x41, 0x43, 0x4c, 0x52, 0x65, 0x73, 0x70, - 0x22, 0x00, 0x12, 0x37, 0x0a, 0x10, 0x50, 0x6f, 0x6f, 0x6c, 0x4f, 0x76, 0x65, 0x72, 0x77, 0x72, - 0x69, 0x74, 0x65, 0x41, 0x43, 0x4c, 0x12, 0x12, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x4d, 0x6f, - 0x64, 0x69, 0x66, 0x79, 0x41, 0x43, 0x4c, 0x52, 0x65, 0x71, 0x1a, 0x0d, 0x2e, 0x6d, 0x67, 0x6d, - 0x74, 0x2e, 0x41, 0x43, 0x4c, 0x52, 0x65, 0x73, 0x70, 0x22, 0x00, 0x12, 0x34, 0x0a, 0x0d, 0x50, - 0x6f, 0x6f, 0x6c, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x41, 0x43, 0x4c, 0x12, 0x12, 0x2e, 0x6d, - 0x67, 0x6d, 0x74, 0x2e, 0x4d, 0x6f, 0x64, 0x69, 0x66, 0x79, 0x41, 0x43, 0x4c, 0x52, 0x65, 0x71, - 0x1a, 0x0d, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x41, 0x43, 0x4c, 0x52, 0x65, 0x73, 0x70, 0x22, - 0x00, 0x12, 0x34, 0x0a, 0x0d, 0x50, 0x6f, 0x6f, 0x6c, 0x44, 0x65, 0x6c, 0x65, 0x74, 0x65, 0x41, - 0x43, 0x4c, 0x12, 0x12, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x44, 0x65, 0x6c, 0x65, 0x74, 0x65, - 0x41, 0x43, 0x4c, 0x52, 0x65, 0x71, 0x1a, 0x0d, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x41, 0x43, - 0x4c, 0x52, 0x65, 0x73, 0x70, 0x22, 0x00, 0x12, 0x42, 0x0a, 0x0d, 0x47, 0x65, 0x74, 0x41, 0x74, - 0x74, 0x61, 0x63, 0x68, 0x49, 0x6e, 0x66, 0x6f, 0x12, 0x16, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, - 0x47, 0x65, 0x74, 0x41, 0x74, 0x74, 0x61, 0x63, 0x68, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, 0x71, - 0x1a, 0x17, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x47, 0x65, 0x74, 0x41, 0x74, 0x74, 0x61, 0x63, - 0x68, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, 0x73, 0x70, 0x22, 0x00, 0x12, 0x36, 0x0a, 0x09, 0x4c, - 0x69, 0x73, 0x74, 0x50, 0x6f, 0x6f, 0x6c, 0x73, 0x12, 0x12, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, - 0x4c, 0x69, 0x73, 0x74, 0x50, 0x6f, 0x6f, 0x6c, 0x73, 0x52, 0x65, 0x71, 0x1a, 0x13, 0x2e, 0x6d, - 0x67, 0x6d, 0x74, 0x2e, 0x4c, 0x69, 0x73, 0x74, 0x50, 0x6f, 0x6f, 0x6c, 0x73, 0x52, 0x65, 0x73, - 0x70, 0x22, 0x00, 0x12, 0x39, 0x0a, 0x0e, 0x4c, 0x69, 0x73, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x61, - 0x69, 0x6e, 0x65, 0x72, 0x73, 0x12, 0x11, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x4c, 0x69, 0x73, - 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x52, 0x65, 0x71, 0x1a, 0x12, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, - 0x4c, 0x69, 0x73, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x52, 0x65, 0x73, 0x70, 0x22, 0x00, 0x12, 0x3f, - 0x0a, 0x0c, 0x43, 0x6f, 0x6e, 0x74, 0x53, 0x65, 0x74, 0x4f, 0x77, 0x6e, 0x65, 0x72, 0x12, 0x15, - 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x43, 0x6f, 0x6e, 0x74, 0x53, 0x65, 0x74, 0x4f, 0x77, 0x6e, - 0x65, 0x72, 0x52, 0x65, 0x71, 0x1a, 0x16, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x43, 0x6f, 0x6e, - 0x74, 0x53, 0x65, 0x74, 0x4f, 0x77, 0x6e, 0x65, 0x72, 0x52, 0x65, 0x73, 0x70, 0x22, 0x00, 0x12, - 0x3c, 0x0a, 0x0b, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x51, 0x75, 0x65, 0x72, 0x79, 0x12, 0x14, - 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x51, 0x75, 0x65, 0x72, - 0x79, 0x52, 0x65, 0x71, 0x1a, 0x15, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x79, 0x73, 0x74, - 0x65, 0x6d, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, 0x65, 0x73, 0x70, 0x22, 0x00, 0x12, 0x39, 0x0a, - 0x0a, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x53, 0x74, 0x6f, 0x70, 0x12, 0x13, 0x2e, 0x6d, 0x67, - 0x6d, 0x74, 0x2e, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x53, 0x74, 0x6f, 0x70, 0x52, 0x65, 0x71, - 0x1a, 0x14, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x53, 0x74, - 0x6f, 0x70, 0x52, 0x65, 0x73, 0x70, 0x22, 0x00, 0x12, 0x3c, 0x0a, 0x0b, 0x53, 0x79, 0x73, 0x74, - 0x65, 0x6d, 0x53, 0x74, 0x61, 0x72, 0x74, 0x12, 0x14, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, - 0x79, 0x73, 0x74, 0x65, 0x6d, 0x53, 0x74, 0x61, 0x72, 0x74, 0x52, 0x65, 0x71, 0x1a, 0x15, 0x2e, - 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x53, 0x74, 0x61, 0x72, 0x74, - 0x52, 0x65, 0x73, 0x70, 0x22, 0x00, 0x12, 0x42, 0x0a, 0x0d, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, - 0x45, 0x78, 0x63, 0x6c, 0x75, 0x64, 0x65, 0x12, 0x16, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, - 0x79, 0x73, 0x74, 0x65, 0x6d, 0x45, 0x78, 0x63, 0x6c, 0x75, 0x64, 0x65, 0x52, 0x65, 0x71, 0x1a, - 0x17, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x45, 0x78, 0x63, - 0x6c, 0x75, 0x64, 0x65, 0x52, 0x65, 0x73, 0x70, 0x22, 0x00, 0x12, 0x3c, 0x0a, 0x0b, 0x53, 0x79, - 0x73, 0x74, 0x65, 0x6d, 0x45, 0x72, 0x61, 0x73, 0x65, 0x12, 0x14, 0x2e, 0x6d, 0x67, 0x6d, 0x74, - 0x2e, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x45, 0x72, 0x61, 0x73, 0x65, 0x52, 0x65, 0x71, 0x1a, - 0x15, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x45, 0x72, 0x61, - 0x73, 0x65, 0x52, 0x65, 0x73, 0x70, 0x22, 0x00, 0x12, 0x42, 0x0a, 0x0d, 0x53, 0x79, 0x73, 0x74, - 0x65, 0x6d, 0x43, 0x6c, 0x65, 0x61, 0x6e, 0x75, 0x70, 0x12, 0x16, 0x2e, 0x6d, 0x67, 0x6d, 0x74, - 0x2e, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x43, 0x6c, 0x65, 0x61, 0x6e, 0x75, 0x70, 0x52, 0x65, - 0x71, 0x1a, 0x17, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x43, - 0x6c, 0x65, 0x61, 0x6e, 0x75, 0x70, 0x52, 0x65, 0x73, 0x70, 0x22, 0x00, 0x12, 0x3c, 0x0a, 0x0b, - 0x50, 0x6f, 0x6f, 0x6c, 0x55, 0x70, 0x67, 0x72, 0x61, 0x64, 0x65, 0x12, 0x14, 0x2e, 0x6d, 0x67, - 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x55, 0x70, 0x67, 0x72, 0x61, 0x64, 0x65, 0x52, 0x65, - 0x71, 0x1a, 0x15, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x55, 0x70, 0x67, - 0x72, 0x61, 0x64, 0x65, 0x52, 0x65, 0x73, 0x70, 0x22, 0x00, 0x12, 0x39, 0x0a, 0x0d, 0x53, 0x79, - 0x73, 0x74, 0x65, 0x6d, 0x53, 0x65, 0x74, 0x41, 0x74, 0x74, 0x72, 0x12, 0x16, 0x2e, 0x6d, 0x67, - 0x6d, 0x74, 0x2e, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x53, 0x65, 0x74, 0x41, 0x74, 0x74, 0x72, - 0x52, 0x65, 0x71, 0x1a, 0x0e, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x44, 0x61, 0x6f, 0x73, 0x52, - 0x65, 0x73, 0x70, 0x22, 0x00, 0x12, 0x42, 0x0a, 0x0d, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x47, - 0x65, 0x74, 0x41, 0x74, 0x74, 0x72, 0x12, 0x16, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x79, - 0x73, 0x74, 0x65, 0x6d, 0x47, 0x65, 0x74, 0x41, 0x74, 0x74, 0x72, 0x52, 0x65, 0x71, 0x1a, 0x17, - 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x47, 0x65, 0x74, 0x41, - 0x74, 0x74, 0x72, 0x52, 0x65, 0x73, 0x70, 0x22, 0x00, 0x12, 0x39, 0x0a, 0x0d, 0x53, 0x79, 0x73, - 0x74, 0x65, 0x6d, 0x53, 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x12, 0x16, 0x2e, 0x6d, 0x67, 0x6d, - 0x74, 0x2e, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x53, 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, - 0x65, 0x71, 0x1a, 0x0e, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x44, 0x61, 0x6f, 0x73, 0x52, 0x65, - 0x73, 0x70, 0x22, 0x00, 0x12, 0x42, 0x0a, 0x0d, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x47, 0x65, - 0x74, 0x50, 0x72, 0x6f, 0x70, 0x12, 0x16, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x79, 0x73, - 0x74, 0x65, 0x6d, 0x47, 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, 0x71, 0x1a, 0x17, 0x2e, - 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x47, 0x65, 0x74, 0x50, 0x72, - 0x6f, 0x70, 0x52, 0x65, 0x73, 0x70, 0x22, 0x00, 0x42, 0x3a, 0x5a, 0x38, 0x67, 0x69, 0x74, 0x68, - 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2d, 0x73, 0x74, 0x61, 0x63, - 0x6b, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2f, 0x73, 0x72, 0x63, 0x2f, 0x63, 0x6f, 0x6e, 0x74, 0x72, - 0x6f, 0x6c, 0x2f, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, - 0x6d, 0x67, 0x6d, 0x74, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x74, 0x2f, 0x70, 0x6f, 0x6f, 0x6c, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x1a, 0x10, 0x6d, 0x67, + 0x6d, 0x74, 0x2f, 0x63, 0x68, 0x65, 0x63, 0x6b, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x1a, 0x0f, + 0x6d, 0x67, 0x6d, 0x74, 0x2f, 0x63, 0x6f, 0x6e, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x1a, + 0x0e, 0x6d, 0x67, 0x6d, 0x74, 0x2f, 0x73, 0x76, 0x63, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x1a, + 0x0e, 0x6d, 0x67, 0x6d, 0x74, 0x2f, 0x61, 0x63, 0x6c, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x1a, + 0x11, 0x6d, 0x67, 0x6d, 0x74, 0x2f, 0x73, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x2e, 0x70, 0x72, 0x6f, + 0x74, 0x6f, 0x1a, 0x0d, 0x63, 0x68, 0x6b, 0x2f, 0x63, 0x68, 0x6b, 0x2e, 0x70, 0x72, 0x6f, 0x74, + 0x6f, 0x1a, 0x10, 0x63, 0x68, 0x6b, 0x2f, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x73, 0x2e, 0x70, 0x72, + 0x6f, 0x74, 0x6f, 0x32, 0x98, 0x15, 0x0a, 0x07, 0x4d, 0x67, 0x6d, 0x74, 0x53, 0x76, 0x63, 0x12, + 0x27, 0x0a, 0x04, 0x4a, 0x6f, 0x69, 0x6e, 0x12, 0x0d, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x4a, + 0x6f, 0x69, 0x6e, 0x52, 0x65, 0x71, 0x1a, 0x0e, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x4a, 0x6f, + 0x69, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x22, 0x00, 0x12, 0x43, 0x0a, 0x0c, 0x43, 0x6c, 0x75, 0x73, + 0x74, 0x65, 0x72, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x12, 0x17, 0x2e, 0x73, 0x68, 0x61, 0x72, 0x65, + 0x64, 0x2e, 0x43, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x52, 0x65, + 0x71, 0x1a, 0x18, 0x2e, 0x73, 0x68, 0x61, 0x72, 0x65, 0x64, 0x2e, 0x43, 0x6c, 0x75, 0x73, 0x74, + 0x65, 0x72, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x52, 0x65, 0x73, 0x70, 0x22, 0x00, 0x12, 0x3c, 0x0a, + 0x0b, 0x4c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x51, 0x75, 0x65, 0x72, 0x79, 0x12, 0x14, 0x2e, 0x6d, + 0x67, 0x6d, 0x74, 0x2e, 0x4c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, + 0x65, 0x71, 0x1a, 0x15, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x4c, 0x65, 0x61, 0x64, 0x65, 0x72, + 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, 0x65, 0x73, 0x70, 0x22, 0x00, 0x12, 0x39, 0x0a, 0x0a, 0x50, + 0x6f, 0x6f, 0x6c, 0x43, 0x72, 0x65, 0x61, 0x74, 0x65, 0x12, 0x13, 0x2e, 0x6d, 0x67, 0x6d, 0x74, + 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x43, 0x72, 0x65, 0x61, 0x74, 0x65, 0x52, 0x65, 0x71, 0x1a, 0x14, + 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x43, 0x72, 0x65, 0x61, 0x74, 0x65, + 0x52, 0x65, 0x73, 0x70, 0x22, 0x00, 0x12, 0x3c, 0x0a, 0x0b, 0x50, 0x6f, 0x6f, 0x6c, 0x44, 0x65, + 0x73, 0x74, 0x72, 0x6f, 0x79, 0x12, 0x14, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, + 0x6c, 0x44, 0x65, 0x73, 0x74, 0x72, 0x6f, 0x79, 0x52, 0x65, 0x71, 0x1a, 0x15, 0x2e, 0x6d, 0x67, + 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x44, 0x65, 0x73, 0x74, 0x72, 0x6f, 0x79, 0x52, 0x65, + 0x73, 0x70, 0x22, 0x00, 0x12, 0x36, 0x0a, 0x09, 0x50, 0x6f, 0x6f, 0x6c, 0x45, 0x76, 0x69, 0x63, + 0x74, 0x12, 0x12, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x45, 0x76, 0x69, + 0x63, 0x74, 0x52, 0x65, 0x71, 0x1a, 0x13, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, + 0x6c, 0x45, 0x76, 0x69, 0x63, 0x74, 0x52, 0x65, 0x73, 0x70, 0x22, 0x00, 0x12, 0x3c, 0x0a, 0x0b, + 0x50, 0x6f, 0x6f, 0x6c, 0x45, 0x78, 0x63, 0x6c, 0x75, 0x64, 0x65, 0x12, 0x14, 0x2e, 0x6d, 0x67, + 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x45, 0x78, 0x63, 0x6c, 0x75, 0x64, 0x65, 0x52, 0x65, + 0x71, 0x1a, 0x15, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x45, 0x78, 0x63, + 0x6c, 0x75, 0x64, 0x65, 0x52, 0x65, 0x73, 0x70, 0x22, 0x00, 0x12, 0x36, 0x0a, 0x09, 0x50, 0x6f, + 0x6f, 0x6c, 0x44, 0x72, 0x61, 0x69, 0x6e, 0x12, 0x12, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, + 0x6f, 0x6f, 0x6c, 0x44, 0x72, 0x61, 0x69, 0x6e, 0x52, 0x65, 0x71, 0x1a, 0x13, 0x2e, 0x6d, 0x67, + 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x44, 0x72, 0x61, 0x69, 0x6e, 0x52, 0x65, 0x73, 0x70, + 0x22, 0x00, 0x12, 0x39, 0x0a, 0x0a, 0x50, 0x6f, 0x6f, 0x6c, 0x45, 0x78, 0x74, 0x65, 0x6e, 0x64, + 0x12, 0x13, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x45, 0x78, 0x74, 0x65, + 0x6e, 0x64, 0x52, 0x65, 0x71, 0x1a, 0x14, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, + 0x6c, 0x45, 0x78, 0x74, 0x65, 0x6e, 0x64, 0x52, 0x65, 0x73, 0x70, 0x22, 0x00, 0x12, 0x48, 0x0a, + 0x0f, 0x50, 0x6f, 0x6f, 0x6c, 0x52, 0x65, 0x69, 0x6e, 0x74, 0x65, 0x67, 0x72, 0x61, 0x74, 0x65, + 0x12, 0x18, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x52, 0x65, 0x69, 0x6e, + 0x74, 0x65, 0x67, 0x72, 0x61, 0x74, 0x65, 0x52, 0x65, 0x71, 0x1a, 0x19, 0x2e, 0x6d, 0x67, 0x6d, + 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x52, 0x65, 0x69, 0x6e, 0x74, 0x65, 0x67, 0x72, 0x61, 0x74, + 0x65, 0x52, 0x65, 0x73, 0x70, 0x22, 0x00, 0x12, 0x36, 0x0a, 0x09, 0x50, 0x6f, 0x6f, 0x6c, 0x51, + 0x75, 0x65, 0x72, 0x79, 0x12, 0x12, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, + 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, 0x65, 0x71, 0x1a, 0x13, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, + 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, 0x65, 0x73, 0x70, 0x22, 0x00, 0x12, + 0x48, 0x0a, 0x0f, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, + 0x65, 0x74, 0x12, 0x18, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, + 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x52, 0x65, 0x71, 0x1a, 0x19, 0x2e, 0x6d, + 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, + 0x67, 0x65, 0x74, 0x52, 0x65, 0x73, 0x70, 0x22, 0x00, 0x12, 0x3c, 0x0a, 0x0b, 0x50, 0x6f, 0x6f, + 0x6c, 0x53, 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x12, 0x14, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, + 0x50, 0x6f, 0x6f, 0x6c, 0x53, 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, 0x71, 0x1a, 0x15, + 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x53, 0x65, 0x74, 0x50, 0x72, 0x6f, + 0x70, 0x52, 0x65, 0x73, 0x70, 0x22, 0x00, 0x12, 0x3c, 0x0a, 0x0b, 0x50, 0x6f, 0x6f, 0x6c, 0x47, + 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x12, 0x14, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, + 0x6f, 0x6c, 0x47, 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, 0x71, 0x1a, 0x15, 0x2e, 0x6d, + 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x47, 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, + 0x65, 0x73, 0x70, 0x22, 0x00, 0x12, 0x2e, 0x0a, 0x0a, 0x50, 0x6f, 0x6f, 0x6c, 0x47, 0x65, 0x74, + 0x41, 0x43, 0x4c, 0x12, 0x0f, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x47, 0x65, 0x74, 0x41, 0x43, + 0x4c, 0x52, 0x65, 0x71, 0x1a, 0x0d, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x41, 0x43, 0x4c, 0x52, + 0x65, 0x73, 0x70, 0x22, 0x00, 0x12, 0x37, 0x0a, 0x10, 0x50, 0x6f, 0x6f, 0x6c, 0x4f, 0x76, 0x65, + 0x72, 0x77, 0x72, 0x69, 0x74, 0x65, 0x41, 0x43, 0x4c, 0x12, 0x12, 0x2e, 0x6d, 0x67, 0x6d, 0x74, + 0x2e, 0x4d, 0x6f, 0x64, 0x69, 0x66, 0x79, 0x41, 0x43, 0x4c, 0x52, 0x65, 0x71, 0x1a, 0x0d, 0x2e, + 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x41, 0x43, 0x4c, 0x52, 0x65, 0x73, 0x70, 0x22, 0x00, 0x12, 0x34, + 0x0a, 0x0d, 0x50, 0x6f, 0x6f, 0x6c, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x41, 0x43, 0x4c, 0x12, + 0x12, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x4d, 0x6f, 0x64, 0x69, 0x66, 0x79, 0x41, 0x43, 0x4c, + 0x52, 0x65, 0x71, 0x1a, 0x0d, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x41, 0x43, 0x4c, 0x52, 0x65, + 0x73, 0x70, 0x22, 0x00, 0x12, 0x34, 0x0a, 0x0d, 0x50, 0x6f, 0x6f, 0x6c, 0x44, 0x65, 0x6c, 0x65, + 0x74, 0x65, 0x41, 0x43, 0x4c, 0x12, 0x12, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x44, 0x65, 0x6c, + 0x65, 0x74, 0x65, 0x41, 0x43, 0x4c, 0x52, 0x65, 0x71, 0x1a, 0x0d, 0x2e, 0x6d, 0x67, 0x6d, 0x74, + 0x2e, 0x41, 0x43, 0x4c, 0x52, 0x65, 0x73, 0x70, 0x22, 0x00, 0x12, 0x42, 0x0a, 0x0d, 0x47, 0x65, + 0x74, 0x41, 0x74, 0x74, 0x61, 0x63, 0x68, 0x49, 0x6e, 0x66, 0x6f, 0x12, 0x16, 0x2e, 0x6d, 0x67, + 0x6d, 0x74, 0x2e, 0x47, 0x65, 0x74, 0x41, 0x74, 0x74, 0x61, 0x63, 0x68, 0x49, 0x6e, 0x66, 0x6f, + 0x52, 0x65, 0x71, 0x1a, 0x17, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x47, 0x65, 0x74, 0x41, 0x74, + 0x74, 0x61, 0x63, 0x68, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, 0x73, 0x70, 0x22, 0x00, 0x12, 0x36, + 0x0a, 0x09, 0x4c, 0x69, 0x73, 0x74, 0x50, 0x6f, 0x6f, 0x6c, 0x73, 0x12, 0x12, 0x2e, 0x6d, 0x67, + 0x6d, 0x74, 0x2e, 0x4c, 0x69, 0x73, 0x74, 0x50, 0x6f, 0x6f, 0x6c, 0x73, 0x52, 0x65, 0x71, 0x1a, + 0x13, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x4c, 0x69, 0x73, 0x74, 0x50, 0x6f, 0x6f, 0x6c, 0x73, + 0x52, 0x65, 0x73, 0x70, 0x22, 0x00, 0x12, 0x39, 0x0a, 0x0e, 0x4c, 0x69, 0x73, 0x74, 0x43, 0x6f, + 0x6e, 0x74, 0x61, 0x69, 0x6e, 0x65, 0x72, 0x73, 0x12, 0x11, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, + 0x4c, 0x69, 0x73, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x52, 0x65, 0x71, 0x1a, 0x12, 0x2e, 0x6d, 0x67, + 0x6d, 0x74, 0x2e, 0x4c, 0x69, 0x73, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x52, 0x65, 0x73, 0x70, 0x22, + 0x00, 0x12, 0x3f, 0x0a, 0x0c, 0x43, 0x6f, 0x6e, 0x74, 0x53, 0x65, 0x74, 0x4f, 0x77, 0x6e, 0x65, + 0x72, 0x12, 0x15, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x43, 0x6f, 0x6e, 0x74, 0x53, 0x65, 0x74, + 0x4f, 0x77, 0x6e, 0x65, 0x72, 0x52, 0x65, 0x71, 0x1a, 0x16, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, + 0x43, 0x6f, 0x6e, 0x74, 0x53, 0x65, 0x74, 0x4f, 0x77, 0x6e, 0x65, 0x72, 0x52, 0x65, 0x73, 0x70, + 0x22, 0x00, 0x12, 0x3c, 0x0a, 0x0b, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x51, 0x75, 0x65, 0x72, + 0x79, 0x12, 0x14, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x51, + 0x75, 0x65, 0x72, 0x79, 0x52, 0x65, 0x71, 0x1a, 0x15, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, + 0x79, 0x73, 0x74, 0x65, 0x6d, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, 0x65, 0x73, 0x70, 0x22, 0x00, + 0x12, 0x39, 0x0a, 0x0a, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x53, 0x74, 0x6f, 0x70, 0x12, 0x13, + 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x53, 0x74, 0x6f, 0x70, + 0x52, 0x65, 0x71, 0x1a, 0x14, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x79, 0x73, 0x74, 0x65, + 0x6d, 0x53, 0x74, 0x6f, 0x70, 0x52, 0x65, 0x73, 0x70, 0x22, 0x00, 0x12, 0x3c, 0x0a, 0x0b, 0x53, + 0x79, 0x73, 0x74, 0x65, 0x6d, 0x53, 0x74, 0x61, 0x72, 0x74, 0x12, 0x14, 0x2e, 0x6d, 0x67, 0x6d, + 0x74, 0x2e, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x53, 0x74, 0x61, 0x72, 0x74, 0x52, 0x65, 0x71, + 0x1a, 0x15, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x53, 0x74, + 0x61, 0x72, 0x74, 0x52, 0x65, 0x73, 0x70, 0x22, 0x00, 0x12, 0x42, 0x0a, 0x0d, 0x53, 0x79, 0x73, + 0x74, 0x65, 0x6d, 0x45, 0x78, 0x63, 0x6c, 0x75, 0x64, 0x65, 0x12, 0x16, 0x2e, 0x6d, 0x67, 0x6d, + 0x74, 0x2e, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x45, 0x78, 0x63, 0x6c, 0x75, 0x64, 0x65, 0x52, + 0x65, 0x71, 0x1a, 0x17, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, + 0x45, 0x78, 0x63, 0x6c, 0x75, 0x64, 0x65, 0x52, 0x65, 0x73, 0x70, 0x22, 0x00, 0x12, 0x3c, 0x0a, + 0x0b, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x45, 0x72, 0x61, 0x73, 0x65, 0x12, 0x14, 0x2e, 0x6d, + 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x45, 0x72, 0x61, 0x73, 0x65, 0x52, + 0x65, 0x71, 0x1a, 0x15, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, + 0x45, 0x72, 0x61, 0x73, 0x65, 0x52, 0x65, 0x73, 0x70, 0x22, 0x00, 0x12, 0x42, 0x0a, 0x0d, 0x53, + 0x79, 0x73, 0x74, 0x65, 0x6d, 0x43, 0x6c, 0x65, 0x61, 0x6e, 0x75, 0x70, 0x12, 0x16, 0x2e, 0x6d, + 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x43, 0x6c, 0x65, 0x61, 0x6e, 0x75, + 0x70, 0x52, 0x65, 0x71, 0x1a, 0x17, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x79, 0x73, 0x74, + 0x65, 0x6d, 0x43, 0x6c, 0x65, 0x61, 0x6e, 0x75, 0x70, 0x52, 0x65, 0x73, 0x70, 0x22, 0x00, 0x12, + 0x3b, 0x0a, 0x11, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x45, 0x6e, + 0x61, 0x62, 0x6c, 0x65, 0x12, 0x14, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x43, 0x68, 0x65, 0x63, + 0x6b, 0x45, 0x6e, 0x61, 0x62, 0x6c, 0x65, 0x52, 0x65, 0x71, 0x1a, 0x0e, 0x2e, 0x6d, 0x67, 0x6d, + 0x74, 0x2e, 0x44, 0x61, 0x6f, 0x73, 0x52, 0x65, 0x73, 0x70, 0x22, 0x00, 0x12, 0x3d, 0x0a, 0x12, + 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x44, 0x69, 0x73, 0x61, 0x62, + 0x6c, 0x65, 0x12, 0x15, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x44, + 0x69, 0x73, 0x61, 0x62, 0x6c, 0x65, 0x52, 0x65, 0x71, 0x1a, 0x0e, 0x2e, 0x6d, 0x67, 0x6d, 0x74, + 0x2e, 0x44, 0x61, 0x6f, 0x73, 0x52, 0x65, 0x73, 0x70, 0x22, 0x00, 0x12, 0x3f, 0x0a, 0x10, 0x53, + 0x79, 0x73, 0x74, 0x65, 0x6d, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x53, 0x74, 0x61, 0x72, 0x74, 0x12, + 0x13, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x53, 0x74, 0x61, 0x72, + 0x74, 0x52, 0x65, 0x71, 0x1a, 0x14, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x43, 0x68, 0x65, 0x63, + 0x6b, 0x53, 0x74, 0x61, 0x72, 0x74, 0x52, 0x65, 0x73, 0x70, 0x22, 0x00, 0x12, 0x3c, 0x0a, 0x0f, + 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x53, 0x74, 0x6f, 0x70, 0x12, + 0x12, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x53, 0x74, 0x6f, 0x70, + 0x52, 0x65, 0x71, 0x1a, 0x13, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x43, 0x68, 0x65, 0x63, 0x6b, + 0x53, 0x74, 0x6f, 0x70, 0x52, 0x65, 0x73, 0x70, 0x22, 0x00, 0x12, 0x3f, 0x0a, 0x10, 0x53, 0x79, + 0x73, 0x74, 0x65, 0x6d, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x51, 0x75, 0x65, 0x72, 0x79, 0x12, 0x13, + 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x51, 0x75, 0x65, 0x72, 0x79, + 0x52, 0x65, 0x71, 0x1a, 0x14, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x43, 0x68, 0x65, 0x63, 0x6b, + 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, 0x65, 0x73, 0x70, 0x22, 0x00, 0x12, 0x41, 0x0a, 0x14, 0x53, + 0x79, 0x73, 0x74, 0x65, 0x6d, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x53, 0x65, 0x74, 0x50, 0x6f, 0x6c, + 0x69, 0x63, 0x79, 0x12, 0x17, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x43, 0x68, 0x65, 0x63, 0x6b, + 0x53, 0x65, 0x74, 0x50, 0x6f, 0x6c, 0x69, 0x63, 0x79, 0x52, 0x65, 0x71, 0x1a, 0x0e, 0x2e, 0x6d, + 0x67, 0x6d, 0x74, 0x2e, 0x44, 0x61, 0x6f, 0x73, 0x52, 0x65, 0x73, 0x70, 0x22, 0x00, 0x12, 0x4b, + 0x0a, 0x14, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x47, 0x65, 0x74, + 0x50, 0x6f, 0x6c, 0x69, 0x63, 0x79, 0x12, 0x17, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x43, 0x68, + 0x65, 0x63, 0x6b, 0x47, 0x65, 0x74, 0x50, 0x6f, 0x6c, 0x69, 0x63, 0x79, 0x52, 0x65, 0x71, 0x1a, + 0x18, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x47, 0x65, 0x74, 0x50, + 0x6f, 0x6c, 0x69, 0x63, 0x79, 0x52, 0x65, 0x73, 0x70, 0x22, 0x00, 0x12, 0x3c, 0x0a, 0x11, 0x53, + 0x79, 0x73, 0x74, 0x65, 0x6d, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x52, 0x65, 0x70, 0x61, 0x69, 0x72, + 0x12, 0x11, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x41, 0x63, 0x74, + 0x52, 0x65, 0x71, 0x1a, 0x12, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x43, 0x68, 0x65, 0x63, 0x6b, + 0x41, 0x63, 0x74, 0x52, 0x65, 0x73, 0x70, 0x22, 0x00, 0x12, 0x3c, 0x0a, 0x0b, 0x50, 0x6f, 0x6f, + 0x6c, 0x55, 0x70, 0x67, 0x72, 0x61, 0x64, 0x65, 0x12, 0x14, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, + 0x50, 0x6f, 0x6f, 0x6c, 0x55, 0x70, 0x67, 0x72, 0x61, 0x64, 0x65, 0x52, 0x65, 0x71, 0x1a, 0x15, + 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x55, 0x70, 0x67, 0x72, 0x61, 0x64, + 0x65, 0x52, 0x65, 0x73, 0x70, 0x22, 0x00, 0x12, 0x39, 0x0a, 0x0d, 0x53, 0x79, 0x73, 0x74, 0x65, + 0x6d, 0x53, 0x65, 0x74, 0x41, 0x74, 0x74, 0x72, 0x12, 0x16, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, + 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x53, 0x65, 0x74, 0x41, 0x74, 0x74, 0x72, 0x52, 0x65, 0x71, + 0x1a, 0x0e, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x44, 0x61, 0x6f, 0x73, 0x52, 0x65, 0x73, 0x70, + 0x22, 0x00, 0x12, 0x42, 0x0a, 0x0d, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x47, 0x65, 0x74, 0x41, + 0x74, 0x74, 0x72, 0x12, 0x16, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x79, 0x73, 0x74, 0x65, + 0x6d, 0x47, 0x65, 0x74, 0x41, 0x74, 0x74, 0x72, 0x52, 0x65, 0x71, 0x1a, 0x17, 0x2e, 0x6d, 0x67, + 0x6d, 0x74, 0x2e, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x47, 0x65, 0x74, 0x41, 0x74, 0x74, 0x72, + 0x52, 0x65, 0x73, 0x70, 0x22, 0x00, 0x12, 0x39, 0x0a, 0x0d, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, + 0x53, 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x12, 0x16, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, + 0x79, 0x73, 0x74, 0x65, 0x6d, 0x53, 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, 0x71, 0x1a, + 0x0e, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x44, 0x61, 0x6f, 0x73, 0x52, 0x65, 0x73, 0x70, 0x22, + 0x00, 0x12, 0x42, 0x0a, 0x0d, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x47, 0x65, 0x74, 0x50, 0x72, + 0x6f, 0x70, 0x12, 0x16, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, + 0x47, 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, 0x71, 0x1a, 0x17, 0x2e, 0x6d, 0x67, 0x6d, + 0x74, 0x2e, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x47, 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, + 0x65, 0x73, 0x70, 0x22, 0x00, 0x12, 0x37, 0x0a, 0x11, 0x46, 0x61, 0x75, 0x6c, 0x74, 0x49, 0x6e, + 0x6a, 0x65, 0x63, 0x74, 0x52, 0x65, 0x70, 0x6f, 0x72, 0x74, 0x12, 0x10, 0x2e, 0x63, 0x68, 0x6b, + 0x2e, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x52, 0x65, 0x70, 0x6f, 0x72, 0x74, 0x1a, 0x0e, 0x2e, 0x6d, + 0x67, 0x6d, 0x74, 0x2e, 0x44, 0x61, 0x6f, 0x73, 0x52, 0x65, 0x73, 0x70, 0x22, 0x00, 0x12, 0x34, + 0x0a, 0x14, 0x46, 0x61, 0x75, 0x6c, 0x74, 0x49, 0x6e, 0x6a, 0x65, 0x63, 0x74, 0x50, 0x6f, 0x6f, + 0x6c, 0x46, 0x61, 0x75, 0x6c, 0x74, 0x12, 0x0a, 0x2e, 0x63, 0x68, 0x6b, 0x2e, 0x46, 0x61, 0x75, + 0x6c, 0x74, 0x1a, 0x0e, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x44, 0x61, 0x6f, 0x73, 0x52, 0x65, + 0x73, 0x70, 0x22, 0x00, 0x12, 0x38, 0x0a, 0x18, 0x46, 0x61, 0x75, 0x6c, 0x74, 0x49, 0x6e, 0x6a, + 0x65, 0x63, 0x74, 0x4d, 0x67, 0x6d, 0x74, 0x50, 0x6f, 0x6f, 0x6c, 0x46, 0x61, 0x75, 0x6c, 0x74, + 0x12, 0x0a, 0x2e, 0x63, 0x68, 0x6b, 0x2e, 0x46, 0x61, 0x75, 0x6c, 0x74, 0x1a, 0x0e, 0x2e, 0x6d, + 0x67, 0x6d, 0x74, 0x2e, 0x44, 0x61, 0x6f, 0x73, 0x52, 0x65, 0x73, 0x70, 0x22, 0x00, 0x42, 0x3a, + 0x5a, 0x38, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x64, 0x61, 0x6f, + 0x73, 0x2d, 0x73, 0x74, 0x61, 0x63, 0x6b, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2f, 0x73, 0x72, 0x63, + 0x2f, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x2f, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x2f, + 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x6d, 0x67, 0x6d, 0x74, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, + 0x6f, 0x33, } var file_mgmt_mgmt_proto_goTypes = []interface{}{ @@ -198,40 +246,55 @@ var file_mgmt_mgmt_proto_goTypes = []interface{}{ (*SystemExcludeReq)(nil), // 24: mgmt.SystemExcludeReq (*SystemEraseReq)(nil), // 25: mgmt.SystemEraseReq (*SystemCleanupReq)(nil), // 26: mgmt.SystemCleanupReq - (*PoolUpgradeReq)(nil), // 27: mgmt.PoolUpgradeReq - (*SystemSetAttrReq)(nil), // 28: mgmt.SystemSetAttrReq - (*SystemGetAttrReq)(nil), // 29: mgmt.SystemGetAttrReq - (*SystemSetPropReq)(nil), // 30: mgmt.SystemSetPropReq - (*SystemGetPropReq)(nil), // 31: mgmt.SystemGetPropReq - (*JoinResp)(nil), // 32: mgmt.JoinResp - (*shared.ClusterEventResp)(nil), // 33: shared.ClusterEventResp - (*LeaderQueryResp)(nil), // 34: mgmt.LeaderQueryResp - (*PoolCreateResp)(nil), // 35: mgmt.PoolCreateResp - (*PoolDestroyResp)(nil), // 36: mgmt.PoolDestroyResp - (*PoolEvictResp)(nil), // 37: mgmt.PoolEvictResp - (*PoolExcludeResp)(nil), // 38: mgmt.PoolExcludeResp - (*PoolDrainResp)(nil), // 39: mgmt.PoolDrainResp - (*PoolExtendResp)(nil), // 40: mgmt.PoolExtendResp - (*PoolReintegrateResp)(nil), // 41: mgmt.PoolReintegrateResp - (*PoolQueryResp)(nil), // 42: mgmt.PoolQueryResp - (*PoolQueryTargetResp)(nil), // 43: mgmt.PoolQueryTargetResp - (*PoolSetPropResp)(nil), // 44: mgmt.PoolSetPropResp - (*PoolGetPropResp)(nil), // 45: mgmt.PoolGetPropResp - (*ACLResp)(nil), // 46: mgmt.ACLResp - (*GetAttachInfoResp)(nil), // 47: mgmt.GetAttachInfoResp - (*ListPoolsResp)(nil), // 48: mgmt.ListPoolsResp - (*ListContResp)(nil), // 49: mgmt.ListContResp - (*ContSetOwnerResp)(nil), // 50: mgmt.ContSetOwnerResp - (*SystemQueryResp)(nil), // 51: mgmt.SystemQueryResp - (*SystemStopResp)(nil), // 52: mgmt.SystemStopResp - (*SystemStartResp)(nil), // 53: mgmt.SystemStartResp - (*SystemExcludeResp)(nil), // 54: mgmt.SystemExcludeResp - (*SystemEraseResp)(nil), // 55: mgmt.SystemEraseResp - (*SystemCleanupResp)(nil), // 56: mgmt.SystemCleanupResp - (*PoolUpgradeResp)(nil), // 57: mgmt.PoolUpgradeResp - (*DaosResp)(nil), // 58: mgmt.DaosResp - (*SystemGetAttrResp)(nil), // 59: mgmt.SystemGetAttrResp - (*SystemGetPropResp)(nil), // 60: mgmt.SystemGetPropResp + (*CheckEnableReq)(nil), // 27: mgmt.CheckEnableReq + (*CheckDisableReq)(nil), // 28: mgmt.CheckDisableReq + (*CheckStartReq)(nil), // 29: mgmt.CheckStartReq + (*CheckStopReq)(nil), // 30: mgmt.CheckStopReq + (*CheckQueryReq)(nil), // 31: mgmt.CheckQueryReq + (*CheckSetPolicyReq)(nil), // 32: mgmt.CheckSetPolicyReq + (*CheckGetPolicyReq)(nil), // 33: mgmt.CheckGetPolicyReq + (*CheckActReq)(nil), // 34: mgmt.CheckActReq + (*PoolUpgradeReq)(nil), // 35: mgmt.PoolUpgradeReq + (*SystemSetAttrReq)(nil), // 36: mgmt.SystemSetAttrReq + (*SystemGetAttrReq)(nil), // 37: mgmt.SystemGetAttrReq + (*SystemSetPropReq)(nil), // 38: mgmt.SystemSetPropReq + (*SystemGetPropReq)(nil), // 39: mgmt.SystemGetPropReq + (*chk.CheckReport)(nil), // 40: chk.CheckReport + (*chk.Fault)(nil), // 41: chk.Fault + (*JoinResp)(nil), // 42: mgmt.JoinResp + (*shared.ClusterEventResp)(nil), // 43: shared.ClusterEventResp + (*LeaderQueryResp)(nil), // 44: mgmt.LeaderQueryResp + (*PoolCreateResp)(nil), // 45: mgmt.PoolCreateResp + (*PoolDestroyResp)(nil), // 46: mgmt.PoolDestroyResp + (*PoolEvictResp)(nil), // 47: mgmt.PoolEvictResp + (*PoolExcludeResp)(nil), // 48: mgmt.PoolExcludeResp + (*PoolDrainResp)(nil), // 49: mgmt.PoolDrainResp + (*PoolExtendResp)(nil), // 50: mgmt.PoolExtendResp + (*PoolReintegrateResp)(nil), // 51: mgmt.PoolReintegrateResp + (*PoolQueryResp)(nil), // 52: mgmt.PoolQueryResp + (*PoolQueryTargetResp)(nil), // 53: mgmt.PoolQueryTargetResp + (*PoolSetPropResp)(nil), // 54: mgmt.PoolSetPropResp + (*PoolGetPropResp)(nil), // 55: mgmt.PoolGetPropResp + (*ACLResp)(nil), // 56: mgmt.ACLResp + (*GetAttachInfoResp)(nil), // 57: mgmt.GetAttachInfoResp + (*ListPoolsResp)(nil), // 58: mgmt.ListPoolsResp + (*ListContResp)(nil), // 59: mgmt.ListContResp + (*ContSetOwnerResp)(nil), // 60: mgmt.ContSetOwnerResp + (*SystemQueryResp)(nil), // 61: mgmt.SystemQueryResp + (*SystemStopResp)(nil), // 62: mgmt.SystemStopResp + (*SystemStartResp)(nil), // 63: mgmt.SystemStartResp + (*SystemExcludeResp)(nil), // 64: mgmt.SystemExcludeResp + (*SystemEraseResp)(nil), // 65: mgmt.SystemEraseResp + (*SystemCleanupResp)(nil), // 66: mgmt.SystemCleanupResp + (*DaosResp)(nil), // 67: mgmt.DaosResp + (*CheckStartResp)(nil), // 68: mgmt.CheckStartResp + (*CheckStopResp)(nil), // 69: mgmt.CheckStopResp + (*CheckQueryResp)(nil), // 70: mgmt.CheckQueryResp + (*CheckGetPolicyResp)(nil), // 71: mgmt.CheckGetPolicyResp + (*CheckActResp)(nil), // 72: mgmt.CheckActResp + (*PoolUpgradeResp)(nil), // 73: mgmt.PoolUpgradeResp + (*SystemGetAttrResp)(nil), // 74: mgmt.SystemGetAttrResp + (*SystemGetPropResp)(nil), // 75: mgmt.SystemGetPropResp } var file_mgmt_mgmt_proto_depIdxs = []int32{ 0, // 0: mgmt.MgmtSvc.Join:input_type -> mgmt.JoinReq @@ -262,46 +325,68 @@ var file_mgmt_mgmt_proto_depIdxs = []int32{ 24, // 25: mgmt.MgmtSvc.SystemExclude:input_type -> mgmt.SystemExcludeReq 25, // 26: mgmt.MgmtSvc.SystemErase:input_type -> mgmt.SystemEraseReq 26, // 27: mgmt.MgmtSvc.SystemCleanup:input_type -> mgmt.SystemCleanupReq - 27, // 28: mgmt.MgmtSvc.PoolUpgrade:input_type -> mgmt.PoolUpgradeReq - 28, // 29: mgmt.MgmtSvc.SystemSetAttr:input_type -> mgmt.SystemSetAttrReq - 29, // 30: mgmt.MgmtSvc.SystemGetAttr:input_type -> mgmt.SystemGetAttrReq - 30, // 31: mgmt.MgmtSvc.SystemSetProp:input_type -> mgmt.SystemSetPropReq - 31, // 32: mgmt.MgmtSvc.SystemGetProp:input_type -> mgmt.SystemGetPropReq - 32, // 33: mgmt.MgmtSvc.Join:output_type -> mgmt.JoinResp - 33, // 34: mgmt.MgmtSvc.ClusterEvent:output_type -> shared.ClusterEventResp - 34, // 35: mgmt.MgmtSvc.LeaderQuery:output_type -> mgmt.LeaderQueryResp - 35, // 36: mgmt.MgmtSvc.PoolCreate:output_type -> mgmt.PoolCreateResp - 36, // 37: mgmt.MgmtSvc.PoolDestroy:output_type -> mgmt.PoolDestroyResp - 37, // 38: mgmt.MgmtSvc.PoolEvict:output_type -> mgmt.PoolEvictResp - 38, // 39: mgmt.MgmtSvc.PoolExclude:output_type -> mgmt.PoolExcludeResp - 39, // 40: mgmt.MgmtSvc.PoolDrain:output_type -> mgmt.PoolDrainResp - 40, // 41: mgmt.MgmtSvc.PoolExtend:output_type -> mgmt.PoolExtendResp - 41, // 42: mgmt.MgmtSvc.PoolReintegrate:output_type -> mgmt.PoolReintegrateResp - 42, // 43: mgmt.MgmtSvc.PoolQuery:output_type -> mgmt.PoolQueryResp - 43, // 44: mgmt.MgmtSvc.PoolQueryTarget:output_type -> mgmt.PoolQueryTargetResp - 44, // 45: mgmt.MgmtSvc.PoolSetProp:output_type -> mgmt.PoolSetPropResp - 45, // 46: mgmt.MgmtSvc.PoolGetProp:output_type -> mgmt.PoolGetPropResp - 46, // 47: mgmt.MgmtSvc.PoolGetACL:output_type -> mgmt.ACLResp - 46, // 48: mgmt.MgmtSvc.PoolOverwriteACL:output_type -> mgmt.ACLResp - 46, // 49: mgmt.MgmtSvc.PoolUpdateACL:output_type -> mgmt.ACLResp - 46, // 50: mgmt.MgmtSvc.PoolDeleteACL:output_type -> mgmt.ACLResp - 47, // 51: mgmt.MgmtSvc.GetAttachInfo:output_type -> mgmt.GetAttachInfoResp - 48, // 52: mgmt.MgmtSvc.ListPools:output_type -> mgmt.ListPoolsResp - 49, // 53: mgmt.MgmtSvc.ListContainers:output_type -> mgmt.ListContResp - 50, // 54: mgmt.MgmtSvc.ContSetOwner:output_type -> mgmt.ContSetOwnerResp - 51, // 55: mgmt.MgmtSvc.SystemQuery:output_type -> mgmt.SystemQueryResp - 52, // 56: mgmt.MgmtSvc.SystemStop:output_type -> mgmt.SystemStopResp - 53, // 57: mgmt.MgmtSvc.SystemStart:output_type -> mgmt.SystemStartResp - 54, // 58: mgmt.MgmtSvc.SystemExclude:output_type -> mgmt.SystemExcludeResp - 55, // 59: mgmt.MgmtSvc.SystemErase:output_type -> mgmt.SystemEraseResp - 56, // 60: mgmt.MgmtSvc.SystemCleanup:output_type -> mgmt.SystemCleanupResp - 57, // 61: mgmt.MgmtSvc.PoolUpgrade:output_type -> mgmt.PoolUpgradeResp - 58, // 62: mgmt.MgmtSvc.SystemSetAttr:output_type -> mgmt.DaosResp - 59, // 63: mgmt.MgmtSvc.SystemGetAttr:output_type -> mgmt.SystemGetAttrResp - 58, // 64: mgmt.MgmtSvc.SystemSetProp:output_type -> mgmt.DaosResp - 60, // 65: mgmt.MgmtSvc.SystemGetProp:output_type -> mgmt.SystemGetPropResp - 33, // [33:66] is the sub-list for method output_type - 0, // [0:33] is the sub-list for method input_type + 27, // 28: mgmt.MgmtSvc.SystemCheckEnable:input_type -> mgmt.CheckEnableReq + 28, // 29: mgmt.MgmtSvc.SystemCheckDisable:input_type -> mgmt.CheckDisableReq + 29, // 30: mgmt.MgmtSvc.SystemCheckStart:input_type -> mgmt.CheckStartReq + 30, // 31: mgmt.MgmtSvc.SystemCheckStop:input_type -> mgmt.CheckStopReq + 31, // 32: mgmt.MgmtSvc.SystemCheckQuery:input_type -> mgmt.CheckQueryReq + 32, // 33: mgmt.MgmtSvc.SystemCheckSetPolicy:input_type -> mgmt.CheckSetPolicyReq + 33, // 34: mgmt.MgmtSvc.SystemCheckGetPolicy:input_type -> mgmt.CheckGetPolicyReq + 34, // 35: mgmt.MgmtSvc.SystemCheckRepair:input_type -> mgmt.CheckActReq + 35, // 36: mgmt.MgmtSvc.PoolUpgrade:input_type -> mgmt.PoolUpgradeReq + 36, // 37: mgmt.MgmtSvc.SystemSetAttr:input_type -> mgmt.SystemSetAttrReq + 37, // 38: mgmt.MgmtSvc.SystemGetAttr:input_type -> mgmt.SystemGetAttrReq + 38, // 39: mgmt.MgmtSvc.SystemSetProp:input_type -> mgmt.SystemSetPropReq + 39, // 40: mgmt.MgmtSvc.SystemGetProp:input_type -> mgmt.SystemGetPropReq + 40, // 41: mgmt.MgmtSvc.FaultInjectReport:input_type -> chk.CheckReport + 41, // 42: mgmt.MgmtSvc.FaultInjectPoolFault:input_type -> chk.Fault + 41, // 43: mgmt.MgmtSvc.FaultInjectMgmtPoolFault:input_type -> chk.Fault + 42, // 44: mgmt.MgmtSvc.Join:output_type -> mgmt.JoinResp + 43, // 45: mgmt.MgmtSvc.ClusterEvent:output_type -> shared.ClusterEventResp + 44, // 46: mgmt.MgmtSvc.LeaderQuery:output_type -> mgmt.LeaderQueryResp + 45, // 47: mgmt.MgmtSvc.PoolCreate:output_type -> mgmt.PoolCreateResp + 46, // 48: mgmt.MgmtSvc.PoolDestroy:output_type -> mgmt.PoolDestroyResp + 47, // 49: mgmt.MgmtSvc.PoolEvict:output_type -> mgmt.PoolEvictResp + 48, // 50: mgmt.MgmtSvc.PoolExclude:output_type -> mgmt.PoolExcludeResp + 49, // 51: mgmt.MgmtSvc.PoolDrain:output_type -> mgmt.PoolDrainResp + 50, // 52: mgmt.MgmtSvc.PoolExtend:output_type -> mgmt.PoolExtendResp + 51, // 53: mgmt.MgmtSvc.PoolReintegrate:output_type -> mgmt.PoolReintegrateResp + 52, // 54: mgmt.MgmtSvc.PoolQuery:output_type -> mgmt.PoolQueryResp + 53, // 55: mgmt.MgmtSvc.PoolQueryTarget:output_type -> mgmt.PoolQueryTargetResp + 54, // 56: mgmt.MgmtSvc.PoolSetProp:output_type -> mgmt.PoolSetPropResp + 55, // 57: mgmt.MgmtSvc.PoolGetProp:output_type -> mgmt.PoolGetPropResp + 56, // 58: mgmt.MgmtSvc.PoolGetACL:output_type -> mgmt.ACLResp + 56, // 59: mgmt.MgmtSvc.PoolOverwriteACL:output_type -> mgmt.ACLResp + 56, // 60: mgmt.MgmtSvc.PoolUpdateACL:output_type -> mgmt.ACLResp + 56, // 61: mgmt.MgmtSvc.PoolDeleteACL:output_type -> mgmt.ACLResp + 57, // 62: mgmt.MgmtSvc.GetAttachInfo:output_type -> mgmt.GetAttachInfoResp + 58, // 63: mgmt.MgmtSvc.ListPools:output_type -> mgmt.ListPoolsResp + 59, // 64: mgmt.MgmtSvc.ListContainers:output_type -> mgmt.ListContResp + 60, // 65: mgmt.MgmtSvc.ContSetOwner:output_type -> mgmt.ContSetOwnerResp + 61, // 66: mgmt.MgmtSvc.SystemQuery:output_type -> mgmt.SystemQueryResp + 62, // 67: mgmt.MgmtSvc.SystemStop:output_type -> mgmt.SystemStopResp + 63, // 68: mgmt.MgmtSvc.SystemStart:output_type -> mgmt.SystemStartResp + 64, // 69: mgmt.MgmtSvc.SystemExclude:output_type -> mgmt.SystemExcludeResp + 65, // 70: mgmt.MgmtSvc.SystemErase:output_type -> mgmt.SystemEraseResp + 66, // 71: mgmt.MgmtSvc.SystemCleanup:output_type -> mgmt.SystemCleanupResp + 67, // 72: mgmt.MgmtSvc.SystemCheckEnable:output_type -> mgmt.DaosResp + 67, // 73: mgmt.MgmtSvc.SystemCheckDisable:output_type -> mgmt.DaosResp + 68, // 74: mgmt.MgmtSvc.SystemCheckStart:output_type -> mgmt.CheckStartResp + 69, // 75: mgmt.MgmtSvc.SystemCheckStop:output_type -> mgmt.CheckStopResp + 70, // 76: mgmt.MgmtSvc.SystemCheckQuery:output_type -> mgmt.CheckQueryResp + 67, // 77: mgmt.MgmtSvc.SystemCheckSetPolicy:output_type -> mgmt.DaosResp + 71, // 78: mgmt.MgmtSvc.SystemCheckGetPolicy:output_type -> mgmt.CheckGetPolicyResp + 72, // 79: mgmt.MgmtSvc.SystemCheckRepair:output_type -> mgmt.CheckActResp + 73, // 80: mgmt.MgmtSvc.PoolUpgrade:output_type -> mgmt.PoolUpgradeResp + 67, // 81: mgmt.MgmtSvc.SystemSetAttr:output_type -> mgmt.DaosResp + 74, // 82: mgmt.MgmtSvc.SystemGetAttr:output_type -> mgmt.SystemGetAttrResp + 67, // 83: mgmt.MgmtSvc.SystemSetProp:output_type -> mgmt.DaosResp + 75, // 84: mgmt.MgmtSvc.SystemGetProp:output_type -> mgmt.SystemGetPropResp + 67, // 85: mgmt.MgmtSvc.FaultInjectReport:output_type -> mgmt.DaosResp + 67, // 86: mgmt.MgmtSvc.FaultInjectPoolFault:output_type -> mgmt.DaosResp + 67, // 87: mgmt.MgmtSvc.FaultInjectMgmtPoolFault:output_type -> mgmt.DaosResp + 44, // [44:88] is the sub-list for method output_type + 0, // [0:44] is the sub-list for method input_type 0, // [0:0] is the sub-list for extension type_name 0, // [0:0] is the sub-list for extension extendee 0, // [0:0] is the sub-list for field type_name @@ -313,6 +398,7 @@ func file_mgmt_mgmt_proto_init() { return } file_mgmt_pool_proto_init() + file_mgmt_check_proto_init() file_mgmt_cont_proto_init() file_mgmt_svc_proto_init() file_mgmt_acl_proto_init() diff --git a/src/control/common/proto/mgmt/mgmt_grpc.pb.go b/src/control/common/proto/mgmt/mgmt_grpc.pb.go index 99ef1e8e95b..1c976e82251 100644 --- a/src/control/common/proto/mgmt/mgmt_grpc.pb.go +++ b/src/control/common/proto/mgmt/mgmt_grpc.pb.go @@ -1,6 +1,12 @@ +// +// (C) Copyright 2019-2022 Intel Corporation. +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// + // Code generated by protoc-gen-go-grpc. DO NOT EDIT. // versions: -// - protoc-gen-go-grpc v1.2.0 +// - protoc-gen-go-grpc v1.3.0 // - protoc v3.5.0 // source: mgmt/mgmt.proto @@ -8,6 +14,7 @@ package mgmt import ( context "context" + chk "github.com/daos-stack/daos/src/control/common/proto/chk" shared "github.com/daos-stack/daos/src/control/common/proto/shared" grpc "google.golang.org/grpc" codes "google.golang.org/grpc/codes" @@ -19,6 +26,53 @@ import ( // Requires gRPC-Go v1.32.0 or later. const _ = grpc.SupportPackageIsVersion7 +const ( + MgmtSvc_Join_FullMethodName = "/mgmt.MgmtSvc/Join" + MgmtSvc_ClusterEvent_FullMethodName = "/mgmt.MgmtSvc/ClusterEvent" + MgmtSvc_LeaderQuery_FullMethodName = "/mgmt.MgmtSvc/LeaderQuery" + MgmtSvc_PoolCreate_FullMethodName = "/mgmt.MgmtSvc/PoolCreate" + MgmtSvc_PoolDestroy_FullMethodName = "/mgmt.MgmtSvc/PoolDestroy" + MgmtSvc_PoolEvict_FullMethodName = "/mgmt.MgmtSvc/PoolEvict" + MgmtSvc_PoolExclude_FullMethodName = "/mgmt.MgmtSvc/PoolExclude" + MgmtSvc_PoolDrain_FullMethodName = "/mgmt.MgmtSvc/PoolDrain" + MgmtSvc_PoolExtend_FullMethodName = "/mgmt.MgmtSvc/PoolExtend" + MgmtSvc_PoolReintegrate_FullMethodName = "/mgmt.MgmtSvc/PoolReintegrate" + MgmtSvc_PoolQuery_FullMethodName = "/mgmt.MgmtSvc/PoolQuery" + MgmtSvc_PoolQueryTarget_FullMethodName = "/mgmt.MgmtSvc/PoolQueryTarget" + MgmtSvc_PoolSetProp_FullMethodName = "/mgmt.MgmtSvc/PoolSetProp" + MgmtSvc_PoolGetProp_FullMethodName = "/mgmt.MgmtSvc/PoolGetProp" + MgmtSvc_PoolGetACL_FullMethodName = "/mgmt.MgmtSvc/PoolGetACL" + MgmtSvc_PoolOverwriteACL_FullMethodName = "/mgmt.MgmtSvc/PoolOverwriteACL" + MgmtSvc_PoolUpdateACL_FullMethodName = "/mgmt.MgmtSvc/PoolUpdateACL" + MgmtSvc_PoolDeleteACL_FullMethodName = "/mgmt.MgmtSvc/PoolDeleteACL" + MgmtSvc_GetAttachInfo_FullMethodName = "/mgmt.MgmtSvc/GetAttachInfo" + MgmtSvc_ListPools_FullMethodName = "/mgmt.MgmtSvc/ListPools" + MgmtSvc_ListContainers_FullMethodName = "/mgmt.MgmtSvc/ListContainers" + MgmtSvc_ContSetOwner_FullMethodName = "/mgmt.MgmtSvc/ContSetOwner" + MgmtSvc_SystemQuery_FullMethodName = "/mgmt.MgmtSvc/SystemQuery" + MgmtSvc_SystemStop_FullMethodName = "/mgmt.MgmtSvc/SystemStop" + MgmtSvc_SystemStart_FullMethodName = "/mgmt.MgmtSvc/SystemStart" + MgmtSvc_SystemExclude_FullMethodName = "/mgmt.MgmtSvc/SystemExclude" + MgmtSvc_SystemErase_FullMethodName = "/mgmt.MgmtSvc/SystemErase" + MgmtSvc_SystemCleanup_FullMethodName = "/mgmt.MgmtSvc/SystemCleanup" + MgmtSvc_SystemCheckEnable_FullMethodName = "/mgmt.MgmtSvc/SystemCheckEnable" + MgmtSvc_SystemCheckDisable_FullMethodName = "/mgmt.MgmtSvc/SystemCheckDisable" + MgmtSvc_SystemCheckStart_FullMethodName = "/mgmt.MgmtSvc/SystemCheckStart" + MgmtSvc_SystemCheckStop_FullMethodName = "/mgmt.MgmtSvc/SystemCheckStop" + MgmtSvc_SystemCheckQuery_FullMethodName = "/mgmt.MgmtSvc/SystemCheckQuery" + MgmtSvc_SystemCheckSetPolicy_FullMethodName = "/mgmt.MgmtSvc/SystemCheckSetPolicy" + MgmtSvc_SystemCheckGetPolicy_FullMethodName = "/mgmt.MgmtSvc/SystemCheckGetPolicy" + MgmtSvc_SystemCheckRepair_FullMethodName = "/mgmt.MgmtSvc/SystemCheckRepair" + MgmtSvc_PoolUpgrade_FullMethodName = "/mgmt.MgmtSvc/PoolUpgrade" + MgmtSvc_SystemSetAttr_FullMethodName = "/mgmt.MgmtSvc/SystemSetAttr" + MgmtSvc_SystemGetAttr_FullMethodName = "/mgmt.MgmtSvc/SystemGetAttr" + MgmtSvc_SystemSetProp_FullMethodName = "/mgmt.MgmtSvc/SystemSetProp" + MgmtSvc_SystemGetProp_FullMethodName = "/mgmt.MgmtSvc/SystemGetProp" + MgmtSvc_FaultInjectReport_FullMethodName = "/mgmt.MgmtSvc/FaultInjectReport" + MgmtSvc_FaultInjectPoolFault_FullMethodName = "/mgmt.MgmtSvc/FaultInjectPoolFault" + MgmtSvc_FaultInjectMgmtPoolFault_FullMethodName = "/mgmt.MgmtSvc/FaultInjectMgmtPoolFault" +) + // MgmtSvcClient is the client API for MgmtSvc service. // // For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream. @@ -80,6 +134,22 @@ type MgmtSvcClient interface { SystemErase(ctx context.Context, in *SystemEraseReq, opts ...grpc.CallOption) (*SystemEraseResp, error) // Clean up leaked resources for a given node SystemCleanup(ctx context.Context, in *SystemCleanupReq, opts ...grpc.CallOption) (*SystemCleanupResp, error) + // Enable system check mode + SystemCheckEnable(ctx context.Context, in *CheckEnableReq, opts ...grpc.CallOption) (*DaosResp, error) + // Disable system check mode + SystemCheckDisable(ctx context.Context, in *CheckDisableReq, opts ...grpc.CallOption) (*DaosResp, error) + // Initiate a system check + SystemCheckStart(ctx context.Context, in *CheckStartReq, opts ...grpc.CallOption) (*CheckStartResp, error) + // Stop a system check + SystemCheckStop(ctx context.Context, in *CheckStopReq, opts ...grpc.CallOption) (*CheckStopResp, error) + // Query a system check + SystemCheckQuery(ctx context.Context, in *CheckQueryReq, opts ...grpc.CallOption) (*CheckQueryResp, error) + // Set system check properties + SystemCheckSetPolicy(ctx context.Context, in *CheckSetPolicyReq, opts ...grpc.CallOption) (*DaosResp, error) + // Query system check properties + SystemCheckGetPolicy(ctx context.Context, in *CheckGetPolicyReq, opts ...grpc.CallOption) (*CheckGetPolicyResp, error) + // Send the desired action to repair an inconsistency. + SystemCheckRepair(ctx context.Context, in *CheckActReq, opts ...grpc.CallOption) (*CheckActResp, error) // PoolUpgrade queries a DAOS pool. PoolUpgrade(ctx context.Context, in *PoolUpgradeReq, opts ...grpc.CallOption) (*PoolUpgradeResp, error) // Set a system attribute or attributes. @@ -90,6 +160,12 @@ type MgmtSvcClient interface { SystemSetProp(ctx context.Context, in *SystemSetPropReq, opts ...grpc.CallOption) (*DaosResp, error) // Get a system property or properties. SystemGetProp(ctx context.Context, in *SystemGetPropReq, opts ...grpc.CallOption) (*SystemGetPropResp, error) + // Fault injection handlers are only implemented in non-release builds. + // FaultInjectReport injects a checker report. + FaultInjectReport(ctx context.Context, in *chk.CheckReport, opts ...grpc.CallOption) (*DaosResp, error) + // FaultInjectPoolFault creates a pool fault for testing the checker. + FaultInjectPoolFault(ctx context.Context, in *chk.Fault, opts ...grpc.CallOption) (*DaosResp, error) + FaultInjectMgmtPoolFault(ctx context.Context, in *chk.Fault, opts ...grpc.CallOption) (*DaosResp, error) } type mgmtSvcClient struct { @@ -102,7 +178,7 @@ func NewMgmtSvcClient(cc grpc.ClientConnInterface) MgmtSvcClient { func (c *mgmtSvcClient) Join(ctx context.Context, in *JoinReq, opts ...grpc.CallOption) (*JoinResp, error) { out := new(JoinResp) - err := c.cc.Invoke(ctx, "/mgmt.MgmtSvc/Join", in, out, opts...) + err := c.cc.Invoke(ctx, MgmtSvc_Join_FullMethodName, in, out, opts...) if err != nil { return nil, err } @@ -111,7 +187,7 @@ func (c *mgmtSvcClient) Join(ctx context.Context, in *JoinReq, opts ...grpc.Call func (c *mgmtSvcClient) ClusterEvent(ctx context.Context, in *shared.ClusterEventReq, opts ...grpc.CallOption) (*shared.ClusterEventResp, error) { out := new(shared.ClusterEventResp) - err := c.cc.Invoke(ctx, "/mgmt.MgmtSvc/ClusterEvent", in, out, opts...) + err := c.cc.Invoke(ctx, MgmtSvc_ClusterEvent_FullMethodName, in, out, opts...) if err != nil { return nil, err } @@ -120,7 +196,7 @@ func (c *mgmtSvcClient) ClusterEvent(ctx context.Context, in *shared.ClusterEven func (c *mgmtSvcClient) LeaderQuery(ctx context.Context, in *LeaderQueryReq, opts ...grpc.CallOption) (*LeaderQueryResp, error) { out := new(LeaderQueryResp) - err := c.cc.Invoke(ctx, "/mgmt.MgmtSvc/LeaderQuery", in, out, opts...) + err := c.cc.Invoke(ctx, MgmtSvc_LeaderQuery_FullMethodName, in, out, opts...) if err != nil { return nil, err } @@ -129,7 +205,7 @@ func (c *mgmtSvcClient) LeaderQuery(ctx context.Context, in *LeaderQueryReq, opt func (c *mgmtSvcClient) PoolCreate(ctx context.Context, in *PoolCreateReq, opts ...grpc.CallOption) (*PoolCreateResp, error) { out := new(PoolCreateResp) - err := c.cc.Invoke(ctx, "/mgmt.MgmtSvc/PoolCreate", in, out, opts...) + err := c.cc.Invoke(ctx, MgmtSvc_PoolCreate_FullMethodName, in, out, opts...) if err != nil { return nil, err } @@ -138,7 +214,7 @@ func (c *mgmtSvcClient) PoolCreate(ctx context.Context, in *PoolCreateReq, opts func (c *mgmtSvcClient) PoolDestroy(ctx context.Context, in *PoolDestroyReq, opts ...grpc.CallOption) (*PoolDestroyResp, error) { out := new(PoolDestroyResp) - err := c.cc.Invoke(ctx, "/mgmt.MgmtSvc/PoolDestroy", in, out, opts...) + err := c.cc.Invoke(ctx, MgmtSvc_PoolDestroy_FullMethodName, in, out, opts...) if err != nil { return nil, err } @@ -147,7 +223,7 @@ func (c *mgmtSvcClient) PoolDestroy(ctx context.Context, in *PoolDestroyReq, opt func (c *mgmtSvcClient) PoolEvict(ctx context.Context, in *PoolEvictReq, opts ...grpc.CallOption) (*PoolEvictResp, error) { out := new(PoolEvictResp) - err := c.cc.Invoke(ctx, "/mgmt.MgmtSvc/PoolEvict", in, out, opts...) + err := c.cc.Invoke(ctx, MgmtSvc_PoolEvict_FullMethodName, in, out, opts...) if err != nil { return nil, err } @@ -156,7 +232,7 @@ func (c *mgmtSvcClient) PoolEvict(ctx context.Context, in *PoolEvictReq, opts .. func (c *mgmtSvcClient) PoolExclude(ctx context.Context, in *PoolExcludeReq, opts ...grpc.CallOption) (*PoolExcludeResp, error) { out := new(PoolExcludeResp) - err := c.cc.Invoke(ctx, "/mgmt.MgmtSvc/PoolExclude", in, out, opts...) + err := c.cc.Invoke(ctx, MgmtSvc_PoolExclude_FullMethodName, in, out, opts...) if err != nil { return nil, err } @@ -165,7 +241,7 @@ func (c *mgmtSvcClient) PoolExclude(ctx context.Context, in *PoolExcludeReq, opt func (c *mgmtSvcClient) PoolDrain(ctx context.Context, in *PoolDrainReq, opts ...grpc.CallOption) (*PoolDrainResp, error) { out := new(PoolDrainResp) - err := c.cc.Invoke(ctx, "/mgmt.MgmtSvc/PoolDrain", in, out, opts...) + err := c.cc.Invoke(ctx, MgmtSvc_PoolDrain_FullMethodName, in, out, opts...) if err != nil { return nil, err } @@ -174,7 +250,7 @@ func (c *mgmtSvcClient) PoolDrain(ctx context.Context, in *PoolDrainReq, opts .. func (c *mgmtSvcClient) PoolExtend(ctx context.Context, in *PoolExtendReq, opts ...grpc.CallOption) (*PoolExtendResp, error) { out := new(PoolExtendResp) - err := c.cc.Invoke(ctx, "/mgmt.MgmtSvc/PoolExtend", in, out, opts...) + err := c.cc.Invoke(ctx, MgmtSvc_PoolExtend_FullMethodName, in, out, opts...) if err != nil { return nil, err } @@ -183,7 +259,7 @@ func (c *mgmtSvcClient) PoolExtend(ctx context.Context, in *PoolExtendReq, opts func (c *mgmtSvcClient) PoolReintegrate(ctx context.Context, in *PoolReintegrateReq, opts ...grpc.CallOption) (*PoolReintegrateResp, error) { out := new(PoolReintegrateResp) - err := c.cc.Invoke(ctx, "/mgmt.MgmtSvc/PoolReintegrate", in, out, opts...) + err := c.cc.Invoke(ctx, MgmtSvc_PoolReintegrate_FullMethodName, in, out, opts...) if err != nil { return nil, err } @@ -192,7 +268,7 @@ func (c *mgmtSvcClient) PoolReintegrate(ctx context.Context, in *PoolReintegrate func (c *mgmtSvcClient) PoolQuery(ctx context.Context, in *PoolQueryReq, opts ...grpc.CallOption) (*PoolQueryResp, error) { out := new(PoolQueryResp) - err := c.cc.Invoke(ctx, "/mgmt.MgmtSvc/PoolQuery", in, out, opts...) + err := c.cc.Invoke(ctx, MgmtSvc_PoolQuery_FullMethodName, in, out, opts...) if err != nil { return nil, err } @@ -201,7 +277,7 @@ func (c *mgmtSvcClient) PoolQuery(ctx context.Context, in *PoolQueryReq, opts .. func (c *mgmtSvcClient) PoolQueryTarget(ctx context.Context, in *PoolQueryTargetReq, opts ...grpc.CallOption) (*PoolQueryTargetResp, error) { out := new(PoolQueryTargetResp) - err := c.cc.Invoke(ctx, "/mgmt.MgmtSvc/PoolQueryTarget", in, out, opts...) + err := c.cc.Invoke(ctx, MgmtSvc_PoolQueryTarget_FullMethodName, in, out, opts...) if err != nil { return nil, err } @@ -210,7 +286,7 @@ func (c *mgmtSvcClient) PoolQueryTarget(ctx context.Context, in *PoolQueryTarget func (c *mgmtSvcClient) PoolSetProp(ctx context.Context, in *PoolSetPropReq, opts ...grpc.CallOption) (*PoolSetPropResp, error) { out := new(PoolSetPropResp) - err := c.cc.Invoke(ctx, "/mgmt.MgmtSvc/PoolSetProp", in, out, opts...) + err := c.cc.Invoke(ctx, MgmtSvc_PoolSetProp_FullMethodName, in, out, opts...) if err != nil { return nil, err } @@ -219,7 +295,7 @@ func (c *mgmtSvcClient) PoolSetProp(ctx context.Context, in *PoolSetPropReq, opt func (c *mgmtSvcClient) PoolGetProp(ctx context.Context, in *PoolGetPropReq, opts ...grpc.CallOption) (*PoolGetPropResp, error) { out := new(PoolGetPropResp) - err := c.cc.Invoke(ctx, "/mgmt.MgmtSvc/PoolGetProp", in, out, opts...) + err := c.cc.Invoke(ctx, MgmtSvc_PoolGetProp_FullMethodName, in, out, opts...) if err != nil { return nil, err } @@ -228,7 +304,7 @@ func (c *mgmtSvcClient) PoolGetProp(ctx context.Context, in *PoolGetPropReq, opt func (c *mgmtSvcClient) PoolGetACL(ctx context.Context, in *GetACLReq, opts ...grpc.CallOption) (*ACLResp, error) { out := new(ACLResp) - err := c.cc.Invoke(ctx, "/mgmt.MgmtSvc/PoolGetACL", in, out, opts...) + err := c.cc.Invoke(ctx, MgmtSvc_PoolGetACL_FullMethodName, in, out, opts...) if err != nil { return nil, err } @@ -237,7 +313,7 @@ func (c *mgmtSvcClient) PoolGetACL(ctx context.Context, in *GetACLReq, opts ...g func (c *mgmtSvcClient) PoolOverwriteACL(ctx context.Context, in *ModifyACLReq, opts ...grpc.CallOption) (*ACLResp, error) { out := new(ACLResp) - err := c.cc.Invoke(ctx, "/mgmt.MgmtSvc/PoolOverwriteACL", in, out, opts...) + err := c.cc.Invoke(ctx, MgmtSvc_PoolOverwriteACL_FullMethodName, in, out, opts...) if err != nil { return nil, err } @@ -246,7 +322,7 @@ func (c *mgmtSvcClient) PoolOverwriteACL(ctx context.Context, in *ModifyACLReq, func (c *mgmtSvcClient) PoolUpdateACL(ctx context.Context, in *ModifyACLReq, opts ...grpc.CallOption) (*ACLResp, error) { out := new(ACLResp) - err := c.cc.Invoke(ctx, "/mgmt.MgmtSvc/PoolUpdateACL", in, out, opts...) + err := c.cc.Invoke(ctx, MgmtSvc_PoolUpdateACL_FullMethodName, in, out, opts...) if err != nil { return nil, err } @@ -255,7 +331,7 @@ func (c *mgmtSvcClient) PoolUpdateACL(ctx context.Context, in *ModifyACLReq, opt func (c *mgmtSvcClient) PoolDeleteACL(ctx context.Context, in *DeleteACLReq, opts ...grpc.CallOption) (*ACLResp, error) { out := new(ACLResp) - err := c.cc.Invoke(ctx, "/mgmt.MgmtSvc/PoolDeleteACL", in, out, opts...) + err := c.cc.Invoke(ctx, MgmtSvc_PoolDeleteACL_FullMethodName, in, out, opts...) if err != nil { return nil, err } @@ -264,7 +340,7 @@ func (c *mgmtSvcClient) PoolDeleteACL(ctx context.Context, in *DeleteACLReq, opt func (c *mgmtSvcClient) GetAttachInfo(ctx context.Context, in *GetAttachInfoReq, opts ...grpc.CallOption) (*GetAttachInfoResp, error) { out := new(GetAttachInfoResp) - err := c.cc.Invoke(ctx, "/mgmt.MgmtSvc/GetAttachInfo", in, out, opts...) + err := c.cc.Invoke(ctx, MgmtSvc_GetAttachInfo_FullMethodName, in, out, opts...) if err != nil { return nil, err } @@ -273,7 +349,7 @@ func (c *mgmtSvcClient) GetAttachInfo(ctx context.Context, in *GetAttachInfoReq, func (c *mgmtSvcClient) ListPools(ctx context.Context, in *ListPoolsReq, opts ...grpc.CallOption) (*ListPoolsResp, error) { out := new(ListPoolsResp) - err := c.cc.Invoke(ctx, "/mgmt.MgmtSvc/ListPools", in, out, opts...) + err := c.cc.Invoke(ctx, MgmtSvc_ListPools_FullMethodName, in, out, opts...) if err != nil { return nil, err } @@ -282,7 +358,7 @@ func (c *mgmtSvcClient) ListPools(ctx context.Context, in *ListPoolsReq, opts .. func (c *mgmtSvcClient) ListContainers(ctx context.Context, in *ListContReq, opts ...grpc.CallOption) (*ListContResp, error) { out := new(ListContResp) - err := c.cc.Invoke(ctx, "/mgmt.MgmtSvc/ListContainers", in, out, opts...) + err := c.cc.Invoke(ctx, MgmtSvc_ListContainers_FullMethodName, in, out, opts...) if err != nil { return nil, err } @@ -291,7 +367,7 @@ func (c *mgmtSvcClient) ListContainers(ctx context.Context, in *ListContReq, opt func (c *mgmtSvcClient) ContSetOwner(ctx context.Context, in *ContSetOwnerReq, opts ...grpc.CallOption) (*ContSetOwnerResp, error) { out := new(ContSetOwnerResp) - err := c.cc.Invoke(ctx, "/mgmt.MgmtSvc/ContSetOwner", in, out, opts...) + err := c.cc.Invoke(ctx, MgmtSvc_ContSetOwner_FullMethodName, in, out, opts...) if err != nil { return nil, err } @@ -300,7 +376,7 @@ func (c *mgmtSvcClient) ContSetOwner(ctx context.Context, in *ContSetOwnerReq, o func (c *mgmtSvcClient) SystemQuery(ctx context.Context, in *SystemQueryReq, opts ...grpc.CallOption) (*SystemQueryResp, error) { out := new(SystemQueryResp) - err := c.cc.Invoke(ctx, "/mgmt.MgmtSvc/SystemQuery", in, out, opts...) + err := c.cc.Invoke(ctx, MgmtSvc_SystemQuery_FullMethodName, in, out, opts...) if err != nil { return nil, err } @@ -309,7 +385,7 @@ func (c *mgmtSvcClient) SystemQuery(ctx context.Context, in *SystemQueryReq, opt func (c *mgmtSvcClient) SystemStop(ctx context.Context, in *SystemStopReq, opts ...grpc.CallOption) (*SystemStopResp, error) { out := new(SystemStopResp) - err := c.cc.Invoke(ctx, "/mgmt.MgmtSvc/SystemStop", in, out, opts...) + err := c.cc.Invoke(ctx, MgmtSvc_SystemStop_FullMethodName, in, out, opts...) if err != nil { return nil, err } @@ -318,7 +394,7 @@ func (c *mgmtSvcClient) SystemStop(ctx context.Context, in *SystemStopReq, opts func (c *mgmtSvcClient) SystemStart(ctx context.Context, in *SystemStartReq, opts ...grpc.CallOption) (*SystemStartResp, error) { out := new(SystemStartResp) - err := c.cc.Invoke(ctx, "/mgmt.MgmtSvc/SystemStart", in, out, opts...) + err := c.cc.Invoke(ctx, MgmtSvc_SystemStart_FullMethodName, in, out, opts...) if err != nil { return nil, err } @@ -327,7 +403,7 @@ func (c *mgmtSvcClient) SystemStart(ctx context.Context, in *SystemStartReq, opt func (c *mgmtSvcClient) SystemExclude(ctx context.Context, in *SystemExcludeReq, opts ...grpc.CallOption) (*SystemExcludeResp, error) { out := new(SystemExcludeResp) - err := c.cc.Invoke(ctx, "/mgmt.MgmtSvc/SystemExclude", in, out, opts...) + err := c.cc.Invoke(ctx, MgmtSvc_SystemExclude_FullMethodName, in, out, opts...) if err != nil { return nil, err } @@ -336,7 +412,7 @@ func (c *mgmtSvcClient) SystemExclude(ctx context.Context, in *SystemExcludeReq, func (c *mgmtSvcClient) SystemErase(ctx context.Context, in *SystemEraseReq, opts ...grpc.CallOption) (*SystemEraseResp, error) { out := new(SystemEraseResp) - err := c.cc.Invoke(ctx, "/mgmt.MgmtSvc/SystemErase", in, out, opts...) + err := c.cc.Invoke(ctx, MgmtSvc_SystemErase_FullMethodName, in, out, opts...) if err != nil { return nil, err } @@ -345,7 +421,79 @@ func (c *mgmtSvcClient) SystemErase(ctx context.Context, in *SystemEraseReq, opt func (c *mgmtSvcClient) SystemCleanup(ctx context.Context, in *SystemCleanupReq, opts ...grpc.CallOption) (*SystemCleanupResp, error) { out := new(SystemCleanupResp) - err := c.cc.Invoke(ctx, "/mgmt.MgmtSvc/SystemCleanup", in, out, opts...) + err := c.cc.Invoke(ctx, MgmtSvc_SystemCleanup_FullMethodName, in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *mgmtSvcClient) SystemCheckEnable(ctx context.Context, in *CheckEnableReq, opts ...grpc.CallOption) (*DaosResp, error) { + out := new(DaosResp) + err := c.cc.Invoke(ctx, MgmtSvc_SystemCheckEnable_FullMethodName, in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *mgmtSvcClient) SystemCheckDisable(ctx context.Context, in *CheckDisableReq, opts ...grpc.CallOption) (*DaosResp, error) { + out := new(DaosResp) + err := c.cc.Invoke(ctx, MgmtSvc_SystemCheckDisable_FullMethodName, in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *mgmtSvcClient) SystemCheckStart(ctx context.Context, in *CheckStartReq, opts ...grpc.CallOption) (*CheckStartResp, error) { + out := new(CheckStartResp) + err := c.cc.Invoke(ctx, MgmtSvc_SystemCheckStart_FullMethodName, in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *mgmtSvcClient) SystemCheckStop(ctx context.Context, in *CheckStopReq, opts ...grpc.CallOption) (*CheckStopResp, error) { + out := new(CheckStopResp) + err := c.cc.Invoke(ctx, MgmtSvc_SystemCheckStop_FullMethodName, in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *mgmtSvcClient) SystemCheckQuery(ctx context.Context, in *CheckQueryReq, opts ...grpc.CallOption) (*CheckQueryResp, error) { + out := new(CheckQueryResp) + err := c.cc.Invoke(ctx, MgmtSvc_SystemCheckQuery_FullMethodName, in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *mgmtSvcClient) SystemCheckSetPolicy(ctx context.Context, in *CheckSetPolicyReq, opts ...grpc.CallOption) (*DaosResp, error) { + out := new(DaosResp) + err := c.cc.Invoke(ctx, MgmtSvc_SystemCheckSetPolicy_FullMethodName, in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *mgmtSvcClient) SystemCheckGetPolicy(ctx context.Context, in *CheckGetPolicyReq, opts ...grpc.CallOption) (*CheckGetPolicyResp, error) { + out := new(CheckGetPolicyResp) + err := c.cc.Invoke(ctx, MgmtSvc_SystemCheckGetPolicy_FullMethodName, in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *mgmtSvcClient) SystemCheckRepair(ctx context.Context, in *CheckActReq, opts ...grpc.CallOption) (*CheckActResp, error) { + out := new(CheckActResp) + err := c.cc.Invoke(ctx, MgmtSvc_SystemCheckRepair_FullMethodName, in, out, opts...) if err != nil { return nil, err } @@ -354,7 +502,7 @@ func (c *mgmtSvcClient) SystemCleanup(ctx context.Context, in *SystemCleanupReq, func (c *mgmtSvcClient) PoolUpgrade(ctx context.Context, in *PoolUpgradeReq, opts ...grpc.CallOption) (*PoolUpgradeResp, error) { out := new(PoolUpgradeResp) - err := c.cc.Invoke(ctx, "/mgmt.MgmtSvc/PoolUpgrade", in, out, opts...) + err := c.cc.Invoke(ctx, MgmtSvc_PoolUpgrade_FullMethodName, in, out, opts...) if err != nil { return nil, err } @@ -363,7 +511,7 @@ func (c *mgmtSvcClient) PoolUpgrade(ctx context.Context, in *PoolUpgradeReq, opt func (c *mgmtSvcClient) SystemSetAttr(ctx context.Context, in *SystemSetAttrReq, opts ...grpc.CallOption) (*DaosResp, error) { out := new(DaosResp) - err := c.cc.Invoke(ctx, "/mgmt.MgmtSvc/SystemSetAttr", in, out, opts...) + err := c.cc.Invoke(ctx, MgmtSvc_SystemSetAttr_FullMethodName, in, out, opts...) if err != nil { return nil, err } @@ -372,7 +520,7 @@ func (c *mgmtSvcClient) SystemSetAttr(ctx context.Context, in *SystemSetAttrReq, func (c *mgmtSvcClient) SystemGetAttr(ctx context.Context, in *SystemGetAttrReq, opts ...grpc.CallOption) (*SystemGetAttrResp, error) { out := new(SystemGetAttrResp) - err := c.cc.Invoke(ctx, "/mgmt.MgmtSvc/SystemGetAttr", in, out, opts...) + err := c.cc.Invoke(ctx, MgmtSvc_SystemGetAttr_FullMethodName, in, out, opts...) if err != nil { return nil, err } @@ -381,7 +529,7 @@ func (c *mgmtSvcClient) SystemGetAttr(ctx context.Context, in *SystemGetAttrReq, func (c *mgmtSvcClient) SystemSetProp(ctx context.Context, in *SystemSetPropReq, opts ...grpc.CallOption) (*DaosResp, error) { out := new(DaosResp) - err := c.cc.Invoke(ctx, "/mgmt.MgmtSvc/SystemSetProp", in, out, opts...) + err := c.cc.Invoke(ctx, MgmtSvc_SystemSetProp_FullMethodName, in, out, opts...) if err != nil { return nil, err } @@ -390,7 +538,34 @@ func (c *mgmtSvcClient) SystemSetProp(ctx context.Context, in *SystemSetPropReq, func (c *mgmtSvcClient) SystemGetProp(ctx context.Context, in *SystemGetPropReq, opts ...grpc.CallOption) (*SystemGetPropResp, error) { out := new(SystemGetPropResp) - err := c.cc.Invoke(ctx, "/mgmt.MgmtSvc/SystemGetProp", in, out, opts...) + err := c.cc.Invoke(ctx, MgmtSvc_SystemGetProp_FullMethodName, in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *mgmtSvcClient) FaultInjectReport(ctx context.Context, in *chk.CheckReport, opts ...grpc.CallOption) (*DaosResp, error) { + out := new(DaosResp) + err := c.cc.Invoke(ctx, MgmtSvc_FaultInjectReport_FullMethodName, in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *mgmtSvcClient) FaultInjectPoolFault(ctx context.Context, in *chk.Fault, opts ...grpc.CallOption) (*DaosResp, error) { + out := new(DaosResp) + err := c.cc.Invoke(ctx, MgmtSvc_FaultInjectPoolFault_FullMethodName, in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *mgmtSvcClient) FaultInjectMgmtPoolFault(ctx context.Context, in *chk.Fault, opts ...grpc.CallOption) (*DaosResp, error) { + out := new(DaosResp) + err := c.cc.Invoke(ctx, MgmtSvc_FaultInjectMgmtPoolFault_FullMethodName, in, out, opts...) if err != nil { return nil, err } @@ -458,6 +633,22 @@ type MgmtSvcServer interface { SystemErase(context.Context, *SystemEraseReq) (*SystemEraseResp, error) // Clean up leaked resources for a given node SystemCleanup(context.Context, *SystemCleanupReq) (*SystemCleanupResp, error) + // Enable system check mode + SystemCheckEnable(context.Context, *CheckEnableReq) (*DaosResp, error) + // Disable system check mode + SystemCheckDisable(context.Context, *CheckDisableReq) (*DaosResp, error) + // Initiate a system check + SystemCheckStart(context.Context, *CheckStartReq) (*CheckStartResp, error) + // Stop a system check + SystemCheckStop(context.Context, *CheckStopReq) (*CheckStopResp, error) + // Query a system check + SystemCheckQuery(context.Context, *CheckQueryReq) (*CheckQueryResp, error) + // Set system check properties + SystemCheckSetPolicy(context.Context, *CheckSetPolicyReq) (*DaosResp, error) + // Query system check properties + SystemCheckGetPolicy(context.Context, *CheckGetPolicyReq) (*CheckGetPolicyResp, error) + // Send the desired action to repair an inconsistency. + SystemCheckRepair(context.Context, *CheckActReq) (*CheckActResp, error) // PoolUpgrade queries a DAOS pool. PoolUpgrade(context.Context, *PoolUpgradeReq) (*PoolUpgradeResp, error) // Set a system attribute or attributes. @@ -468,6 +659,12 @@ type MgmtSvcServer interface { SystemSetProp(context.Context, *SystemSetPropReq) (*DaosResp, error) // Get a system property or properties. SystemGetProp(context.Context, *SystemGetPropReq) (*SystemGetPropResp, error) + // Fault injection handlers are only implemented in non-release builds. + // FaultInjectReport injects a checker report. + FaultInjectReport(context.Context, *chk.CheckReport) (*DaosResp, error) + // FaultInjectPoolFault creates a pool fault for testing the checker. + FaultInjectPoolFault(context.Context, *chk.Fault) (*DaosResp, error) + FaultInjectMgmtPoolFault(context.Context, *chk.Fault) (*DaosResp, error) mustEmbedUnimplementedMgmtSvcServer() } @@ -559,6 +756,30 @@ func (UnimplementedMgmtSvcServer) SystemErase(context.Context, *SystemEraseReq) func (UnimplementedMgmtSvcServer) SystemCleanup(context.Context, *SystemCleanupReq) (*SystemCleanupResp, error) { return nil, status.Errorf(codes.Unimplemented, "method SystemCleanup not implemented") } +func (UnimplementedMgmtSvcServer) SystemCheckEnable(context.Context, *CheckEnableReq) (*DaosResp, error) { + return nil, status.Errorf(codes.Unimplemented, "method SystemCheckEnable not implemented") +} +func (UnimplementedMgmtSvcServer) SystemCheckDisable(context.Context, *CheckDisableReq) (*DaosResp, error) { + return nil, status.Errorf(codes.Unimplemented, "method SystemCheckDisable not implemented") +} +func (UnimplementedMgmtSvcServer) SystemCheckStart(context.Context, *CheckStartReq) (*CheckStartResp, error) { + return nil, status.Errorf(codes.Unimplemented, "method SystemCheckStart not implemented") +} +func (UnimplementedMgmtSvcServer) SystemCheckStop(context.Context, *CheckStopReq) (*CheckStopResp, error) { + return nil, status.Errorf(codes.Unimplemented, "method SystemCheckStop not implemented") +} +func (UnimplementedMgmtSvcServer) SystemCheckQuery(context.Context, *CheckQueryReq) (*CheckQueryResp, error) { + return nil, status.Errorf(codes.Unimplemented, "method SystemCheckQuery not implemented") +} +func (UnimplementedMgmtSvcServer) SystemCheckSetPolicy(context.Context, *CheckSetPolicyReq) (*DaosResp, error) { + return nil, status.Errorf(codes.Unimplemented, "method SystemCheckSetPolicy not implemented") +} +func (UnimplementedMgmtSvcServer) SystemCheckGetPolicy(context.Context, *CheckGetPolicyReq) (*CheckGetPolicyResp, error) { + return nil, status.Errorf(codes.Unimplemented, "method SystemCheckGetPolicy not implemented") +} +func (UnimplementedMgmtSvcServer) SystemCheckRepair(context.Context, *CheckActReq) (*CheckActResp, error) { + return nil, status.Errorf(codes.Unimplemented, "method SystemCheckRepair not implemented") +} func (UnimplementedMgmtSvcServer) PoolUpgrade(context.Context, *PoolUpgradeReq) (*PoolUpgradeResp, error) { return nil, status.Errorf(codes.Unimplemented, "method PoolUpgrade not implemented") } @@ -574,6 +795,15 @@ func (UnimplementedMgmtSvcServer) SystemSetProp(context.Context, *SystemSetPropR func (UnimplementedMgmtSvcServer) SystemGetProp(context.Context, *SystemGetPropReq) (*SystemGetPropResp, error) { return nil, status.Errorf(codes.Unimplemented, "method SystemGetProp not implemented") } +func (UnimplementedMgmtSvcServer) FaultInjectReport(context.Context, *chk.CheckReport) (*DaosResp, error) { + return nil, status.Errorf(codes.Unimplemented, "method FaultInjectReport not implemented") +} +func (UnimplementedMgmtSvcServer) FaultInjectPoolFault(context.Context, *chk.Fault) (*DaosResp, error) { + return nil, status.Errorf(codes.Unimplemented, "method FaultInjectPoolFault not implemented") +} +func (UnimplementedMgmtSvcServer) FaultInjectMgmtPoolFault(context.Context, *chk.Fault) (*DaosResp, error) { + return nil, status.Errorf(codes.Unimplemented, "method FaultInjectMgmtPoolFault not implemented") +} func (UnimplementedMgmtSvcServer) mustEmbedUnimplementedMgmtSvcServer() {} // UnsafeMgmtSvcServer may be embedded to opt out of forward compatibility for this service. @@ -597,7 +827,7 @@ func _MgmtSvc_Join_Handler(srv interface{}, ctx context.Context, dec func(interf } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: "/mgmt.MgmtSvc/Join", + FullMethod: MgmtSvc_Join_FullMethodName, } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(MgmtSvcServer).Join(ctx, req.(*JoinReq)) @@ -615,7 +845,7 @@ func _MgmtSvc_ClusterEvent_Handler(srv interface{}, ctx context.Context, dec fun } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: "/mgmt.MgmtSvc/ClusterEvent", + FullMethod: MgmtSvc_ClusterEvent_FullMethodName, } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(MgmtSvcServer).ClusterEvent(ctx, req.(*shared.ClusterEventReq)) @@ -633,7 +863,7 @@ func _MgmtSvc_LeaderQuery_Handler(srv interface{}, ctx context.Context, dec func } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: "/mgmt.MgmtSvc/LeaderQuery", + FullMethod: MgmtSvc_LeaderQuery_FullMethodName, } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(MgmtSvcServer).LeaderQuery(ctx, req.(*LeaderQueryReq)) @@ -651,7 +881,7 @@ func _MgmtSvc_PoolCreate_Handler(srv interface{}, ctx context.Context, dec func( } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: "/mgmt.MgmtSvc/PoolCreate", + FullMethod: MgmtSvc_PoolCreate_FullMethodName, } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(MgmtSvcServer).PoolCreate(ctx, req.(*PoolCreateReq)) @@ -669,7 +899,7 @@ func _MgmtSvc_PoolDestroy_Handler(srv interface{}, ctx context.Context, dec func } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: "/mgmt.MgmtSvc/PoolDestroy", + FullMethod: MgmtSvc_PoolDestroy_FullMethodName, } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(MgmtSvcServer).PoolDestroy(ctx, req.(*PoolDestroyReq)) @@ -687,7 +917,7 @@ func _MgmtSvc_PoolEvict_Handler(srv interface{}, ctx context.Context, dec func(i } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: "/mgmt.MgmtSvc/PoolEvict", + FullMethod: MgmtSvc_PoolEvict_FullMethodName, } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(MgmtSvcServer).PoolEvict(ctx, req.(*PoolEvictReq)) @@ -705,7 +935,7 @@ func _MgmtSvc_PoolExclude_Handler(srv interface{}, ctx context.Context, dec func } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: "/mgmt.MgmtSvc/PoolExclude", + FullMethod: MgmtSvc_PoolExclude_FullMethodName, } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(MgmtSvcServer).PoolExclude(ctx, req.(*PoolExcludeReq)) @@ -723,7 +953,7 @@ func _MgmtSvc_PoolDrain_Handler(srv interface{}, ctx context.Context, dec func(i } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: "/mgmt.MgmtSvc/PoolDrain", + FullMethod: MgmtSvc_PoolDrain_FullMethodName, } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(MgmtSvcServer).PoolDrain(ctx, req.(*PoolDrainReq)) @@ -741,7 +971,7 @@ func _MgmtSvc_PoolExtend_Handler(srv interface{}, ctx context.Context, dec func( } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: "/mgmt.MgmtSvc/PoolExtend", + FullMethod: MgmtSvc_PoolExtend_FullMethodName, } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(MgmtSvcServer).PoolExtend(ctx, req.(*PoolExtendReq)) @@ -759,7 +989,7 @@ func _MgmtSvc_PoolReintegrate_Handler(srv interface{}, ctx context.Context, dec } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: "/mgmt.MgmtSvc/PoolReintegrate", + FullMethod: MgmtSvc_PoolReintegrate_FullMethodName, } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(MgmtSvcServer).PoolReintegrate(ctx, req.(*PoolReintegrateReq)) @@ -777,7 +1007,7 @@ func _MgmtSvc_PoolQuery_Handler(srv interface{}, ctx context.Context, dec func(i } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: "/mgmt.MgmtSvc/PoolQuery", + FullMethod: MgmtSvc_PoolQuery_FullMethodName, } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(MgmtSvcServer).PoolQuery(ctx, req.(*PoolQueryReq)) @@ -795,7 +1025,7 @@ func _MgmtSvc_PoolQueryTarget_Handler(srv interface{}, ctx context.Context, dec } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: "/mgmt.MgmtSvc/PoolQueryTarget", + FullMethod: MgmtSvc_PoolQueryTarget_FullMethodName, } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(MgmtSvcServer).PoolQueryTarget(ctx, req.(*PoolQueryTargetReq)) @@ -813,7 +1043,7 @@ func _MgmtSvc_PoolSetProp_Handler(srv interface{}, ctx context.Context, dec func } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: "/mgmt.MgmtSvc/PoolSetProp", + FullMethod: MgmtSvc_PoolSetProp_FullMethodName, } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(MgmtSvcServer).PoolSetProp(ctx, req.(*PoolSetPropReq)) @@ -831,7 +1061,7 @@ func _MgmtSvc_PoolGetProp_Handler(srv interface{}, ctx context.Context, dec func } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: "/mgmt.MgmtSvc/PoolGetProp", + FullMethod: MgmtSvc_PoolGetProp_FullMethodName, } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(MgmtSvcServer).PoolGetProp(ctx, req.(*PoolGetPropReq)) @@ -849,7 +1079,7 @@ func _MgmtSvc_PoolGetACL_Handler(srv interface{}, ctx context.Context, dec func( } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: "/mgmt.MgmtSvc/PoolGetACL", + FullMethod: MgmtSvc_PoolGetACL_FullMethodName, } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(MgmtSvcServer).PoolGetACL(ctx, req.(*GetACLReq)) @@ -867,7 +1097,7 @@ func _MgmtSvc_PoolOverwriteACL_Handler(srv interface{}, ctx context.Context, dec } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: "/mgmt.MgmtSvc/PoolOverwriteACL", + FullMethod: MgmtSvc_PoolOverwriteACL_FullMethodName, } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(MgmtSvcServer).PoolOverwriteACL(ctx, req.(*ModifyACLReq)) @@ -885,7 +1115,7 @@ func _MgmtSvc_PoolUpdateACL_Handler(srv interface{}, ctx context.Context, dec fu } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: "/mgmt.MgmtSvc/PoolUpdateACL", + FullMethod: MgmtSvc_PoolUpdateACL_FullMethodName, } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(MgmtSvcServer).PoolUpdateACL(ctx, req.(*ModifyACLReq)) @@ -903,7 +1133,7 @@ func _MgmtSvc_PoolDeleteACL_Handler(srv interface{}, ctx context.Context, dec fu } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: "/mgmt.MgmtSvc/PoolDeleteACL", + FullMethod: MgmtSvc_PoolDeleteACL_FullMethodName, } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(MgmtSvcServer).PoolDeleteACL(ctx, req.(*DeleteACLReq)) @@ -921,7 +1151,7 @@ func _MgmtSvc_GetAttachInfo_Handler(srv interface{}, ctx context.Context, dec fu } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: "/mgmt.MgmtSvc/GetAttachInfo", + FullMethod: MgmtSvc_GetAttachInfo_FullMethodName, } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(MgmtSvcServer).GetAttachInfo(ctx, req.(*GetAttachInfoReq)) @@ -939,7 +1169,7 @@ func _MgmtSvc_ListPools_Handler(srv interface{}, ctx context.Context, dec func(i } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: "/mgmt.MgmtSvc/ListPools", + FullMethod: MgmtSvc_ListPools_FullMethodName, } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(MgmtSvcServer).ListPools(ctx, req.(*ListPoolsReq)) @@ -957,7 +1187,7 @@ func _MgmtSvc_ListContainers_Handler(srv interface{}, ctx context.Context, dec f } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: "/mgmt.MgmtSvc/ListContainers", + FullMethod: MgmtSvc_ListContainers_FullMethodName, } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(MgmtSvcServer).ListContainers(ctx, req.(*ListContReq)) @@ -975,7 +1205,7 @@ func _MgmtSvc_ContSetOwner_Handler(srv interface{}, ctx context.Context, dec fun } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: "/mgmt.MgmtSvc/ContSetOwner", + FullMethod: MgmtSvc_ContSetOwner_FullMethodName, } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(MgmtSvcServer).ContSetOwner(ctx, req.(*ContSetOwnerReq)) @@ -993,7 +1223,7 @@ func _MgmtSvc_SystemQuery_Handler(srv interface{}, ctx context.Context, dec func } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: "/mgmt.MgmtSvc/SystemQuery", + FullMethod: MgmtSvc_SystemQuery_FullMethodName, } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(MgmtSvcServer).SystemQuery(ctx, req.(*SystemQueryReq)) @@ -1011,7 +1241,7 @@ func _MgmtSvc_SystemStop_Handler(srv interface{}, ctx context.Context, dec func( } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: "/mgmt.MgmtSvc/SystemStop", + FullMethod: MgmtSvc_SystemStop_FullMethodName, } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(MgmtSvcServer).SystemStop(ctx, req.(*SystemStopReq)) @@ -1029,7 +1259,7 @@ func _MgmtSvc_SystemStart_Handler(srv interface{}, ctx context.Context, dec func } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: "/mgmt.MgmtSvc/SystemStart", + FullMethod: MgmtSvc_SystemStart_FullMethodName, } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(MgmtSvcServer).SystemStart(ctx, req.(*SystemStartReq)) @@ -1047,7 +1277,7 @@ func _MgmtSvc_SystemExclude_Handler(srv interface{}, ctx context.Context, dec fu } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: "/mgmt.MgmtSvc/SystemExclude", + FullMethod: MgmtSvc_SystemExclude_FullMethodName, } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(MgmtSvcServer).SystemExclude(ctx, req.(*SystemExcludeReq)) @@ -1065,7 +1295,7 @@ func _MgmtSvc_SystemErase_Handler(srv interface{}, ctx context.Context, dec func } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: "/mgmt.MgmtSvc/SystemErase", + FullMethod: MgmtSvc_SystemErase_FullMethodName, } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(MgmtSvcServer).SystemErase(ctx, req.(*SystemEraseReq)) @@ -1083,7 +1313,7 @@ func _MgmtSvc_SystemCleanup_Handler(srv interface{}, ctx context.Context, dec fu } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: "/mgmt.MgmtSvc/SystemCleanup", + FullMethod: MgmtSvc_SystemCleanup_FullMethodName, } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(MgmtSvcServer).SystemCleanup(ctx, req.(*SystemCleanupReq)) @@ -1091,6 +1321,150 @@ func _MgmtSvc_SystemCleanup_Handler(srv interface{}, ctx context.Context, dec fu return interceptor(ctx, in, info, handler) } +func _MgmtSvc_SystemCheckEnable_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(CheckEnableReq) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(MgmtSvcServer).SystemCheckEnable(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: MgmtSvc_SystemCheckEnable_FullMethodName, + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(MgmtSvcServer).SystemCheckEnable(ctx, req.(*CheckEnableReq)) + } + return interceptor(ctx, in, info, handler) +} + +func _MgmtSvc_SystemCheckDisable_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(CheckDisableReq) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(MgmtSvcServer).SystemCheckDisable(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: MgmtSvc_SystemCheckDisable_FullMethodName, + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(MgmtSvcServer).SystemCheckDisable(ctx, req.(*CheckDisableReq)) + } + return interceptor(ctx, in, info, handler) +} + +func _MgmtSvc_SystemCheckStart_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(CheckStartReq) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(MgmtSvcServer).SystemCheckStart(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: MgmtSvc_SystemCheckStart_FullMethodName, + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(MgmtSvcServer).SystemCheckStart(ctx, req.(*CheckStartReq)) + } + return interceptor(ctx, in, info, handler) +} + +func _MgmtSvc_SystemCheckStop_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(CheckStopReq) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(MgmtSvcServer).SystemCheckStop(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: MgmtSvc_SystemCheckStop_FullMethodName, + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(MgmtSvcServer).SystemCheckStop(ctx, req.(*CheckStopReq)) + } + return interceptor(ctx, in, info, handler) +} + +func _MgmtSvc_SystemCheckQuery_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(CheckQueryReq) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(MgmtSvcServer).SystemCheckQuery(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: MgmtSvc_SystemCheckQuery_FullMethodName, + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(MgmtSvcServer).SystemCheckQuery(ctx, req.(*CheckQueryReq)) + } + return interceptor(ctx, in, info, handler) +} + +func _MgmtSvc_SystemCheckSetPolicy_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(CheckSetPolicyReq) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(MgmtSvcServer).SystemCheckSetPolicy(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: MgmtSvc_SystemCheckSetPolicy_FullMethodName, + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(MgmtSvcServer).SystemCheckSetPolicy(ctx, req.(*CheckSetPolicyReq)) + } + return interceptor(ctx, in, info, handler) +} + +func _MgmtSvc_SystemCheckGetPolicy_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(CheckGetPolicyReq) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(MgmtSvcServer).SystemCheckGetPolicy(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: MgmtSvc_SystemCheckGetPolicy_FullMethodName, + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(MgmtSvcServer).SystemCheckGetPolicy(ctx, req.(*CheckGetPolicyReq)) + } + return interceptor(ctx, in, info, handler) +} + +func _MgmtSvc_SystemCheckRepair_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(CheckActReq) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(MgmtSvcServer).SystemCheckRepair(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: MgmtSvc_SystemCheckRepair_FullMethodName, + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(MgmtSvcServer).SystemCheckRepair(ctx, req.(*CheckActReq)) + } + return interceptor(ctx, in, info, handler) +} + func _MgmtSvc_PoolUpgrade_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { in := new(PoolUpgradeReq) if err := dec(in); err != nil { @@ -1101,7 +1475,7 @@ func _MgmtSvc_PoolUpgrade_Handler(srv interface{}, ctx context.Context, dec func } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: "/mgmt.MgmtSvc/PoolUpgrade", + FullMethod: MgmtSvc_PoolUpgrade_FullMethodName, } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(MgmtSvcServer).PoolUpgrade(ctx, req.(*PoolUpgradeReq)) @@ -1119,7 +1493,7 @@ func _MgmtSvc_SystemSetAttr_Handler(srv interface{}, ctx context.Context, dec fu } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: "/mgmt.MgmtSvc/SystemSetAttr", + FullMethod: MgmtSvc_SystemSetAttr_FullMethodName, } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(MgmtSvcServer).SystemSetAttr(ctx, req.(*SystemSetAttrReq)) @@ -1137,7 +1511,7 @@ func _MgmtSvc_SystemGetAttr_Handler(srv interface{}, ctx context.Context, dec fu } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: "/mgmt.MgmtSvc/SystemGetAttr", + FullMethod: MgmtSvc_SystemGetAttr_FullMethodName, } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(MgmtSvcServer).SystemGetAttr(ctx, req.(*SystemGetAttrReq)) @@ -1155,7 +1529,7 @@ func _MgmtSvc_SystemSetProp_Handler(srv interface{}, ctx context.Context, dec fu } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: "/mgmt.MgmtSvc/SystemSetProp", + FullMethod: MgmtSvc_SystemSetProp_FullMethodName, } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(MgmtSvcServer).SystemSetProp(ctx, req.(*SystemSetPropReq)) @@ -1173,7 +1547,7 @@ func _MgmtSvc_SystemGetProp_Handler(srv interface{}, ctx context.Context, dec fu } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: "/mgmt.MgmtSvc/SystemGetProp", + FullMethod: MgmtSvc_SystemGetProp_FullMethodName, } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(MgmtSvcServer).SystemGetProp(ctx, req.(*SystemGetPropReq)) @@ -1181,6 +1555,60 @@ func _MgmtSvc_SystemGetProp_Handler(srv interface{}, ctx context.Context, dec fu return interceptor(ctx, in, info, handler) } +func _MgmtSvc_FaultInjectReport_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(chk.CheckReport) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(MgmtSvcServer).FaultInjectReport(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: MgmtSvc_FaultInjectReport_FullMethodName, + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(MgmtSvcServer).FaultInjectReport(ctx, req.(*chk.CheckReport)) + } + return interceptor(ctx, in, info, handler) +} + +func _MgmtSvc_FaultInjectPoolFault_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(chk.Fault) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(MgmtSvcServer).FaultInjectPoolFault(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: MgmtSvc_FaultInjectPoolFault_FullMethodName, + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(MgmtSvcServer).FaultInjectPoolFault(ctx, req.(*chk.Fault)) + } + return interceptor(ctx, in, info, handler) +} + +func _MgmtSvc_FaultInjectMgmtPoolFault_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(chk.Fault) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(MgmtSvcServer).FaultInjectMgmtPoolFault(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: MgmtSvc_FaultInjectMgmtPoolFault_FullMethodName, + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(MgmtSvcServer).FaultInjectMgmtPoolFault(ctx, req.(*chk.Fault)) + } + return interceptor(ctx, in, info, handler) +} + // MgmtSvc_ServiceDesc is the grpc.ServiceDesc for MgmtSvc service. // It's only intended for direct use with grpc.RegisterService, // and not to be introspected or modified (even as a copy) @@ -1300,6 +1728,38 @@ var MgmtSvc_ServiceDesc = grpc.ServiceDesc{ MethodName: "SystemCleanup", Handler: _MgmtSvc_SystemCleanup_Handler, }, + { + MethodName: "SystemCheckEnable", + Handler: _MgmtSvc_SystemCheckEnable_Handler, + }, + { + MethodName: "SystemCheckDisable", + Handler: _MgmtSvc_SystemCheckDisable_Handler, + }, + { + MethodName: "SystemCheckStart", + Handler: _MgmtSvc_SystemCheckStart_Handler, + }, + { + MethodName: "SystemCheckStop", + Handler: _MgmtSvc_SystemCheckStop_Handler, + }, + { + MethodName: "SystemCheckQuery", + Handler: _MgmtSvc_SystemCheckQuery_Handler, + }, + { + MethodName: "SystemCheckSetPolicy", + Handler: _MgmtSvc_SystemCheckSetPolicy_Handler, + }, + { + MethodName: "SystemCheckGetPolicy", + Handler: _MgmtSvc_SystemCheckGetPolicy_Handler, + }, + { + MethodName: "SystemCheckRepair", + Handler: _MgmtSvc_SystemCheckRepair_Handler, + }, { MethodName: "PoolUpgrade", Handler: _MgmtSvc_PoolUpgrade_Handler, @@ -1320,6 +1780,18 @@ var MgmtSvc_ServiceDesc = grpc.ServiceDesc{ MethodName: "SystemGetProp", Handler: _MgmtSvc_SystemGetProp_Handler, }, + { + MethodName: "FaultInjectReport", + Handler: _MgmtSvc_FaultInjectReport_Handler, + }, + { + MethodName: "FaultInjectPoolFault", + Handler: _MgmtSvc_FaultInjectPoolFault_Handler, + }, + { + MethodName: "FaultInjectMgmtPoolFault", + Handler: _MgmtSvc_FaultInjectMgmtPoolFault_Handler, + }, }, Streams: []grpc.StreamDesc{}, Metadata: "mgmt/mgmt.proto", diff --git a/src/control/common/proto/mgmt/svc.pb.go b/src/control/common/proto/mgmt/svc.pb.go index ff9425c7d66..57180e24229 100644 --- a/src/control/common/proto/mgmt/svc.pb.go +++ b/src/control/common/proto/mgmt/svc.pb.go @@ -29,8 +29,9 @@ const ( type JoinResp_State int32 const ( - JoinResp_IN JoinResp_State = 0 // Server in the system. - JoinResp_OUT JoinResp_State = 1 // Server excluded from the system. + JoinResp_IN JoinResp_State = 0 // Server in the system. + JoinResp_OUT JoinResp_State = 1 // Server excluded from the system. + JoinResp_CHECK JoinResp_State = 2 // Server should start in checker mode. ) // Enum value maps for JoinResp_State. @@ -38,10 +39,12 @@ var ( JoinResp_State_name = map[int32]string{ 0: "IN", 1: "OUT", + 2: "CHECK", } JoinResp_State_value = map[string]int32{ - "IN": 0, - "OUT": 1, + "IN": 0, + "OUT": 1, + "CHECK": 2, } ) @@ -238,6 +241,7 @@ type JoinReq struct { Incarnation uint64 `protobuf:"varint,9,opt,name=incarnation,proto3" json:"incarnation,omitempty"` // rank incarnation SecondaryUris []string `protobuf:"bytes,10,rep,name=secondary_uris,json=secondaryUris,proto3" json:"secondary_uris,omitempty"` // URIs for any secondary providers SecondaryNctxs []uint32 `protobuf:"varint,11,rep,packed,name=secondary_nctxs,json=secondaryNctxs,proto3" json:"secondary_nctxs,omitempty"` // CaRT context count for each secondary provider + CheckMode bool `protobuf:"varint,12,opt,name=check_mode,json=checkMode,proto3" json:"check_mode,omitempty"` // rank started in check mode } func (x *JoinReq) Reset() { @@ -349,6 +353,13 @@ func (x *JoinReq) GetSecondaryNctxs() []uint32 { return nil } +func (x *JoinReq) GetCheckMode() bool { + if x != nil { + return x.CheckMode + } + return false +} + type JoinResp struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -1217,7 +1228,7 @@ var file_mgmt_svc_proto_rawDesc = []byte{ 0x72, 0x6e, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x22, 0x29, 0x0a, 0x0f, 0x47, 0x72, 0x6f, 0x75, 0x70, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, - 0x75, 0x73, 0x22, 0xab, 0x02, 0x0a, 0x07, 0x4a, 0x6f, 0x69, 0x6e, 0x52, 0x65, 0x71, 0x12, 0x10, + 0x75, 0x73, 0x22, 0xca, 0x02, 0x0a, 0x07, 0x4a, 0x6f, 0x69, 0x6e, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x75, 0x75, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x75, 0x75, 0x69, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x03, 0x20, 0x01, @@ -1236,112 +1247,115 @@ var file_mgmt_svc_proto_rawDesc = []byte{ 0x61, 0x72, 0x79, 0x55, 0x72, 0x69, 0x73, 0x12, 0x27, 0x0a, 0x0f, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x61, 0x72, 0x79, 0x5f, 0x6e, 0x63, 0x74, 0x78, 0x73, 0x18, 0x0b, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x0e, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x61, 0x72, 0x79, 0x4e, 0x63, 0x74, 0x78, 0x73, - 0x22, 0xdd, 0x01, 0x0a, 0x08, 0x4a, 0x6f, 0x69, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, - 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, - 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x02, 0x20, - 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x12, 0x2a, 0x0a, 0x05, 0x73, 0x74, 0x61, - 0x74, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x14, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, - 0x4a, 0x6f, 0x69, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x05, - 0x73, 0x74, 0x61, 0x74, 0x65, 0x12, 0x20, 0x0a, 0x0b, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x44, 0x6f, - 0x6d, 0x61, 0x69, 0x6e, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x66, 0x61, 0x75, 0x6c, - 0x74, 0x44, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x12, 0x1c, 0x0a, 0x09, 0x6c, 0x6f, 0x63, 0x61, 0x6c, - 0x4a, 0x6f, 0x69, 0x6e, 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x6c, 0x6f, 0x63, 0x61, - 0x6c, 0x4a, 0x6f, 0x69, 0x6e, 0x12, 0x1f, 0x0a, 0x0b, 0x6d, 0x61, 0x70, 0x5f, 0x76, 0x65, 0x72, - 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0a, 0x6d, 0x61, 0x70, 0x56, - 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x22, 0x18, 0x0a, 0x05, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, - 0x06, 0x0a, 0x02, 0x49, 0x4e, 0x10, 0x00, 0x12, 0x07, 0x0a, 0x03, 0x4f, 0x55, 0x54, 0x10, 0x01, - 0x22, 0x38, 0x0a, 0x0e, 0x4c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, + 0x12, 0x1d, 0x0a, 0x0a, 0x63, 0x68, 0x65, 0x63, 0x6b, 0x5f, 0x6d, 0x6f, 0x64, 0x65, 0x18, 0x0c, + 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x63, 0x68, 0x65, 0x63, 0x6b, 0x4d, 0x6f, 0x64, 0x65, 0x22, + 0xe8, 0x01, 0x0a, 0x08, 0x4a, 0x6f, 0x69, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, + 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, + 0x61, 0x74, 0x75, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x02, 0x20, 0x01, + 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x12, 0x2a, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, + 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x14, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x4a, + 0x6f, 0x69, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x05, 0x73, + 0x74, 0x61, 0x74, 0x65, 0x12, 0x20, 0x0a, 0x0b, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x44, 0x6f, 0x6d, + 0x61, 0x69, 0x6e, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x66, 0x61, 0x75, 0x6c, 0x74, + 0x44, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x12, 0x1c, 0x0a, 0x09, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x4a, + 0x6f, 0x69, 0x6e, 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x6c, 0x6f, 0x63, 0x61, 0x6c, + 0x4a, 0x6f, 0x69, 0x6e, 0x12, 0x1f, 0x0a, 0x0b, 0x6d, 0x61, 0x70, 0x5f, 0x76, 0x65, 0x72, 0x73, + 0x69, 0x6f, 0x6e, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0a, 0x6d, 0x61, 0x70, 0x56, 0x65, + 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x22, 0x23, 0x0a, 0x05, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x06, + 0x0a, 0x02, 0x49, 0x4e, 0x10, 0x00, 0x12, 0x07, 0x0a, 0x03, 0x4f, 0x55, 0x54, 0x10, 0x01, 0x12, + 0x09, 0x0a, 0x05, 0x43, 0x48, 0x45, 0x43, 0x4b, 0x10, 0x02, 0x22, 0x38, 0x0a, 0x0e, 0x4c, 0x65, + 0x61, 0x64, 0x65, 0x72, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, + 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x14, + 0x0a, 0x05, 0x68, 0x6f, 0x73, 0x74, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x68, + 0x6f, 0x73, 0x74, 0x73, 0x22, 0x78, 0x0a, 0x0f, 0x4c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x51, 0x75, + 0x65, 0x72, 0x79, 0x52, 0x65, 0x73, 0x70, 0x12, 0x25, 0x0a, 0x0e, 0x63, 0x75, 0x72, 0x72, 0x65, + 0x6e, 0x74, 0x5f, 0x6c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x0d, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e, 0x74, 0x4c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x12, 0x1a, + 0x0a, 0x08, 0x72, 0x65, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x09, + 0x52, 0x08, 0x72, 0x65, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x73, 0x12, 0x22, 0x0a, 0x0c, 0x44, 0x6f, + 0x77, 0x6e, 0x52, 0x65, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x09, + 0x52, 0x0c, 0x44, 0x6f, 0x77, 0x6e, 0x52, 0x65, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x73, 0x22, 0x77, + 0x0a, 0x10, 0x47, 0x65, 0x74, 0x41, 0x74, 0x74, 0x61, 0x63, 0x68, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x03, 0x73, 0x79, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x68, 0x6f, 0x73, 0x74, 0x73, 0x18, 0x02, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x05, 0x68, 0x6f, 0x73, 0x74, 0x73, 0x22, 0x78, 0x0a, 0x0f, 0x4c, 0x65, - 0x61, 0x64, 0x65, 0x72, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, 0x65, 0x73, 0x70, 0x12, 0x25, 0x0a, - 0x0e, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e, 0x74, 0x5f, 0x6c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x18, - 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e, 0x74, 0x4c, 0x65, - 0x61, 0x64, 0x65, 0x72, 0x12, 0x1a, 0x0a, 0x08, 0x72, 0x65, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x73, - 0x18, 0x02, 0x20, 0x03, 0x28, 0x09, 0x52, 0x08, 0x72, 0x65, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x73, - 0x12, 0x22, 0x0a, 0x0c, 0x44, 0x6f, 0x77, 0x6e, 0x52, 0x65, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x73, - 0x18, 0x03, 0x20, 0x03, 0x28, 0x09, 0x52, 0x0c, 0x44, 0x6f, 0x77, 0x6e, 0x52, 0x65, 0x70, 0x6c, - 0x69, 0x63, 0x61, 0x73, 0x22, 0x77, 0x0a, 0x10, 0x47, 0x65, 0x74, 0x41, 0x74, 0x74, 0x61, 0x63, - 0x68, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, - 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x61, 0x6c, - 0x6c, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x08, 0x52, 0x08, 0x61, - 0x6c, 0x6c, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x1c, 0x0a, 0x09, 0x69, 0x6e, 0x74, 0x65, 0x72, - 0x66, 0x61, 0x63, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x69, 0x6e, 0x74, 0x65, - 0x72, 0x66, 0x61, 0x63, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x64, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x18, - 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x64, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x22, 0xb1, 0x02, - 0x0a, 0x0d, 0x43, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, 0x69, 0x6e, 0x74, 0x12, - 0x1a, 0x0a, 0x08, 0x70, 0x72, 0x6f, 0x76, 0x69, 0x64, 0x65, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x08, 0x70, 0x72, 0x6f, 0x76, 0x69, 0x64, 0x65, 0x72, 0x12, 0x1c, 0x0a, 0x09, 0x69, - 0x6e, 0x74, 0x65, 0x72, 0x66, 0x61, 0x63, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, - 0x69, 0x6e, 0x74, 0x65, 0x72, 0x66, 0x61, 0x63, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x64, 0x6f, 0x6d, - 0x61, 0x69, 0x6e, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x64, 0x6f, 0x6d, 0x61, 0x69, - 0x6e, 0x12, 0x2b, 0x0a, 0x12, 0x63, 0x72, 0x74, 0x5f, 0x63, 0x74, 0x78, 0x5f, 0x73, 0x68, 0x61, - 0x72, 0x65, 0x5f, 0x61, 0x64, 0x64, 0x72, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0f, 0x63, - 0x72, 0x74, 0x43, 0x74, 0x78, 0x53, 0x68, 0x61, 0x72, 0x65, 0x41, 0x64, 0x64, 0x72, 0x12, 0x1f, - 0x0a, 0x0b, 0x63, 0x72, 0x74, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x6f, 0x75, 0x74, 0x18, 0x05, 0x20, - 0x01, 0x28, 0x0d, 0x52, 0x0a, 0x63, 0x72, 0x74, 0x54, 0x69, 0x6d, 0x65, 0x6f, 0x75, 0x74, 0x12, - 0x22, 0x0a, 0x0d, 0x6e, 0x65, 0x74, 0x5f, 0x64, 0x65, 0x76, 0x5f, 0x63, 0x6c, 0x61, 0x73, 0x73, - 0x18, 0x06, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0b, 0x6e, 0x65, 0x74, 0x44, 0x65, 0x76, 0x43, 0x6c, - 0x61, 0x73, 0x73, 0x12, 0x1e, 0x0a, 0x0b, 0x73, 0x72, 0x76, 0x5f, 0x73, 0x72, 0x78, 0x5f, 0x73, - 0x65, 0x74, 0x18, 0x07, 0x20, 0x01, 0x28, 0x05, 0x52, 0x09, 0x73, 0x72, 0x76, 0x53, 0x72, 0x78, - 0x53, 0x65, 0x74, 0x12, 0x19, 0x0a, 0x08, 0x65, 0x6e, 0x76, 0x5f, 0x76, 0x61, 0x72, 0x73, 0x18, - 0x08, 0x20, 0x03, 0x28, 0x09, 0x52, 0x07, 0x65, 0x6e, 0x76, 0x56, 0x61, 0x72, 0x73, 0x12, 0x21, - 0x0a, 0x0c, 0x70, 0x72, 0x6f, 0x76, 0x69, 0x64, 0x65, 0x72, 0x5f, 0x69, 0x64, 0x78, 0x18, 0x09, - 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0b, 0x70, 0x72, 0x6f, 0x76, 0x69, 0x64, 0x65, 0x72, 0x49, 0x64, - 0x78, 0x22, 0x88, 0x04, 0x0a, 0x11, 0x47, 0x65, 0x74, 0x41, 0x74, 0x74, 0x61, 0x63, 0x68, 0x49, - 0x6e, 0x66, 0x6f, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, - 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, - 0x3c, 0x0a, 0x09, 0x72, 0x61, 0x6e, 0x6b, 0x5f, 0x75, 0x72, 0x69, 0x73, 0x18, 0x02, 0x20, 0x03, - 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x47, 0x65, 0x74, 0x41, 0x74, 0x74, - 0x61, 0x63, 0x68, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, 0x73, 0x70, 0x2e, 0x52, 0x61, 0x6e, 0x6b, - 0x55, 0x72, 0x69, 0x52, 0x08, 0x72, 0x61, 0x6e, 0x6b, 0x55, 0x72, 0x69, 0x73, 0x12, 0x19, 0x0a, - 0x08, 0x6d, 0x73, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, - 0x07, 0x6d, 0x73, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x3b, 0x0a, 0x0f, 0x63, 0x6c, 0x69, 0x65, - 0x6e, 0x74, 0x5f, 0x6e, 0x65, 0x74, 0x5f, 0x68, 0x69, 0x6e, 0x74, 0x18, 0x04, 0x20, 0x01, 0x28, - 0x0b, 0x32, 0x13, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x43, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4e, - 0x65, 0x74, 0x48, 0x69, 0x6e, 0x74, 0x52, 0x0d, 0x63, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4e, 0x65, - 0x74, 0x48, 0x69, 0x6e, 0x74, 0x12, 0x21, 0x0a, 0x0c, 0x64, 0x61, 0x74, 0x61, 0x5f, 0x76, 0x65, - 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x05, 0x20, 0x01, 0x28, 0x04, 0x52, 0x0b, 0x64, 0x61, 0x74, - 0x61, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, - 0x06, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x4f, 0x0a, 0x13, 0x73, 0x65, - 0x63, 0x6f, 0x6e, 0x64, 0x61, 0x72, 0x79, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x5f, 0x75, 0x72, 0x69, - 0x73, 0x18, 0x07, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x47, - 0x65, 0x74, 0x41, 0x74, 0x74, 0x61, 0x63, 0x68, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, 0x73, 0x70, - 0x2e, 0x52, 0x61, 0x6e, 0x6b, 0x55, 0x72, 0x69, 0x52, 0x11, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, - 0x61, 0x72, 0x79, 0x52, 0x61, 0x6e, 0x6b, 0x55, 0x72, 0x69, 0x73, 0x12, 0x50, 0x0a, 0x1a, 0x73, - 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x61, 0x72, 0x79, 0x5f, 0x63, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x5f, - 0x6e, 0x65, 0x74, 0x5f, 0x68, 0x69, 0x6e, 0x74, 0x73, 0x18, 0x08, 0x20, 0x03, 0x28, 0x0b, 0x32, - 0x13, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x43, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4e, 0x65, 0x74, - 0x48, 0x69, 0x6e, 0x74, 0x52, 0x17, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x61, 0x72, 0x79, 0x43, - 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, 0x69, 0x6e, 0x74, 0x73, 0x1a, 0x6d, 0x0a, - 0x07, 0x52, 0x61, 0x6e, 0x6b, 0x55, 0x72, 0x69, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, - 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x12, 0x10, 0x0a, 0x03, - 0x75, 0x72, 0x69, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x75, 0x72, 0x69, 0x12, 0x21, - 0x0a, 0x0c, 0x70, 0x72, 0x6f, 0x76, 0x69, 0x64, 0x65, 0x72, 0x5f, 0x69, 0x64, 0x78, 0x18, 0x03, - 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0b, 0x70, 0x72, 0x6f, 0x76, 0x69, 0x64, 0x65, 0x72, 0x49, 0x64, - 0x78, 0x12, 0x19, 0x0a, 0x08, 0x6e, 0x75, 0x6d, 0x5f, 0x63, 0x74, 0x78, 0x73, 0x18, 0x04, 0x20, - 0x01, 0x28, 0x0d, 0x52, 0x07, 0x6e, 0x75, 0x6d, 0x43, 0x74, 0x78, 0x73, 0x22, 0x25, 0x0a, 0x0f, - 0x50, 0x72, 0x65, 0x70, 0x53, 0x68, 0x75, 0x74, 0x64, 0x6f, 0x77, 0x6e, 0x52, 0x65, 0x71, 0x12, - 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, - 0x61, 0x6e, 0x6b, 0x22, 0x21, 0x0a, 0x0b, 0x50, 0x69, 0x6e, 0x67, 0x52, 0x61, 0x6e, 0x6b, 0x52, - 0x65, 0x71, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, - 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x22, 0x41, 0x0a, 0x0a, 0x53, 0x65, 0x74, 0x52, 0x61, 0x6e, - 0x6b, 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x01, 0x20, 0x01, - 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x12, 0x1f, 0x0a, 0x0b, 0x6d, 0x61, 0x70, 0x5f, - 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0a, 0x6d, - 0x61, 0x70, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x22, 0x7c, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, - 0x6c, 0x4d, 0x6f, 0x6e, 0x69, 0x74, 0x6f, 0x72, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, - 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x1a, 0x0a, - 0x08, 0x70, 0x6f, 0x6f, 0x6c, 0x55, 0x55, 0x49, 0x44, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x08, 0x70, 0x6f, 0x6f, 0x6c, 0x55, 0x55, 0x49, 0x44, 0x12, 0x26, 0x0a, 0x0e, 0x70, 0x6f, 0x6f, - 0x6c, 0x48, 0x61, 0x6e, 0x64, 0x6c, 0x65, 0x55, 0x55, 0x49, 0x44, 0x18, 0x03, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x0e, 0x70, 0x6f, 0x6f, 0x6c, 0x48, 0x61, 0x6e, 0x64, 0x6c, 0x65, 0x55, 0x55, 0x49, - 0x44, 0x12, 0x14, 0x0a, 0x05, 0x6a, 0x6f, 0x62, 0x69, 0x64, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, - 0x52, 0x05, 0x6a, 0x6f, 0x62, 0x69, 0x64, 0x42, 0x3a, 0x5a, 0x38, 0x67, 0x69, 0x74, 0x68, 0x75, - 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2d, 0x73, 0x74, 0x61, 0x63, 0x6b, - 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2f, 0x73, 0x72, 0x63, 0x2f, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, - 0x6c, 0x2f, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x6d, - 0x67, 0x6d, 0x74, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x03, 0x73, 0x79, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x61, 0x6c, 0x6c, 0x5f, 0x72, 0x61, 0x6e, 0x6b, + 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x08, 0x52, 0x08, 0x61, 0x6c, 0x6c, 0x52, 0x61, 0x6e, 0x6b, + 0x73, 0x12, 0x1c, 0x0a, 0x09, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x66, 0x61, 0x63, 0x65, 0x18, 0x03, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x66, 0x61, 0x63, 0x65, 0x12, + 0x16, 0x0a, 0x06, 0x64, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x06, 0x64, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x22, 0xb1, 0x02, 0x0a, 0x0d, 0x43, 0x6c, 0x69, 0x65, + 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, 0x69, 0x6e, 0x74, 0x12, 0x1a, 0x0a, 0x08, 0x70, 0x72, 0x6f, + 0x76, 0x69, 0x64, 0x65, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x70, 0x72, 0x6f, + 0x76, 0x69, 0x64, 0x65, 0x72, 0x12, 0x1c, 0x0a, 0x09, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x66, 0x61, + 0x63, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x66, + 0x61, 0x63, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x64, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x18, 0x03, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x06, 0x64, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x12, 0x2b, 0x0a, 0x12, 0x63, + 0x72, 0x74, 0x5f, 0x63, 0x74, 0x78, 0x5f, 0x73, 0x68, 0x61, 0x72, 0x65, 0x5f, 0x61, 0x64, 0x64, + 0x72, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0f, 0x63, 0x72, 0x74, 0x43, 0x74, 0x78, 0x53, + 0x68, 0x61, 0x72, 0x65, 0x41, 0x64, 0x64, 0x72, 0x12, 0x1f, 0x0a, 0x0b, 0x63, 0x72, 0x74, 0x5f, + 0x74, 0x69, 0x6d, 0x65, 0x6f, 0x75, 0x74, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0a, 0x63, + 0x72, 0x74, 0x54, 0x69, 0x6d, 0x65, 0x6f, 0x75, 0x74, 0x12, 0x22, 0x0a, 0x0d, 0x6e, 0x65, 0x74, + 0x5f, 0x64, 0x65, 0x76, 0x5f, 0x63, 0x6c, 0x61, 0x73, 0x73, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0d, + 0x52, 0x0b, 0x6e, 0x65, 0x74, 0x44, 0x65, 0x76, 0x43, 0x6c, 0x61, 0x73, 0x73, 0x12, 0x1e, 0x0a, + 0x0b, 0x73, 0x72, 0x76, 0x5f, 0x73, 0x72, 0x78, 0x5f, 0x73, 0x65, 0x74, 0x18, 0x07, 0x20, 0x01, + 0x28, 0x05, 0x52, 0x09, 0x73, 0x72, 0x76, 0x53, 0x72, 0x78, 0x53, 0x65, 0x74, 0x12, 0x19, 0x0a, + 0x08, 0x65, 0x6e, 0x76, 0x5f, 0x76, 0x61, 0x72, 0x73, 0x18, 0x08, 0x20, 0x03, 0x28, 0x09, 0x52, + 0x07, 0x65, 0x6e, 0x76, 0x56, 0x61, 0x72, 0x73, 0x12, 0x21, 0x0a, 0x0c, 0x70, 0x72, 0x6f, 0x76, + 0x69, 0x64, 0x65, 0x72, 0x5f, 0x69, 0x64, 0x78, 0x18, 0x09, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0b, + 0x70, 0x72, 0x6f, 0x76, 0x69, 0x64, 0x65, 0x72, 0x49, 0x64, 0x78, 0x22, 0x88, 0x04, 0x0a, 0x11, + 0x47, 0x65, 0x74, 0x41, 0x74, 0x74, 0x61, 0x63, 0x68, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, 0x73, + 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x3c, 0x0a, 0x09, 0x72, 0x61, 0x6e, + 0x6b, 0x5f, 0x75, 0x72, 0x69, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, + 0x67, 0x6d, 0x74, 0x2e, 0x47, 0x65, 0x74, 0x41, 0x74, 0x74, 0x61, 0x63, 0x68, 0x49, 0x6e, 0x66, + 0x6f, 0x52, 0x65, 0x73, 0x70, 0x2e, 0x52, 0x61, 0x6e, 0x6b, 0x55, 0x72, 0x69, 0x52, 0x08, 0x72, + 0x61, 0x6e, 0x6b, 0x55, 0x72, 0x69, 0x73, 0x12, 0x19, 0x0a, 0x08, 0x6d, 0x73, 0x5f, 0x72, 0x61, + 0x6e, 0x6b, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x07, 0x6d, 0x73, 0x52, 0x61, 0x6e, + 0x6b, 0x73, 0x12, 0x3b, 0x0a, 0x0f, 0x63, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x5f, 0x6e, 0x65, 0x74, + 0x5f, 0x68, 0x69, 0x6e, 0x74, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x13, 0x2e, 0x6d, 0x67, + 0x6d, 0x74, 0x2e, 0x43, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, 0x69, 0x6e, 0x74, + 0x52, 0x0d, 0x63, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, 0x69, 0x6e, 0x74, 0x12, + 0x21, 0x0a, 0x0c, 0x64, 0x61, 0x74, 0x61, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, + 0x05, 0x20, 0x01, 0x28, 0x04, 0x52, 0x0b, 0x64, 0x61, 0x74, 0x61, 0x56, 0x65, 0x72, 0x73, 0x69, + 0x6f, 0x6e, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x06, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x03, 0x73, 0x79, 0x73, 0x12, 0x4f, 0x0a, 0x13, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x61, 0x72, + 0x79, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x5f, 0x75, 0x72, 0x69, 0x73, 0x18, 0x07, 0x20, 0x03, 0x28, + 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x47, 0x65, 0x74, 0x41, 0x74, 0x74, 0x61, + 0x63, 0x68, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, 0x73, 0x70, 0x2e, 0x52, 0x61, 0x6e, 0x6b, 0x55, + 0x72, 0x69, 0x52, 0x11, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x61, 0x72, 0x79, 0x52, 0x61, 0x6e, + 0x6b, 0x55, 0x72, 0x69, 0x73, 0x12, 0x50, 0x0a, 0x1a, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x61, + 0x72, 0x79, 0x5f, 0x63, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x5f, 0x6e, 0x65, 0x74, 0x5f, 0x68, 0x69, + 0x6e, 0x74, 0x73, 0x18, 0x08, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x13, 0x2e, 0x6d, 0x67, 0x6d, 0x74, + 0x2e, 0x43, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, 0x69, 0x6e, 0x74, 0x52, 0x17, + 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x61, 0x72, 0x79, 0x43, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4e, + 0x65, 0x74, 0x48, 0x69, 0x6e, 0x74, 0x73, 0x1a, 0x6d, 0x0a, 0x07, 0x52, 0x61, 0x6e, 0x6b, 0x55, + 0x72, 0x69, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, + 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x12, 0x10, 0x0a, 0x03, 0x75, 0x72, 0x69, 0x18, 0x02, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x03, 0x75, 0x72, 0x69, 0x12, 0x21, 0x0a, 0x0c, 0x70, 0x72, 0x6f, 0x76, + 0x69, 0x64, 0x65, 0x72, 0x5f, 0x69, 0x64, 0x78, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0b, + 0x70, 0x72, 0x6f, 0x76, 0x69, 0x64, 0x65, 0x72, 0x49, 0x64, 0x78, 0x12, 0x19, 0x0a, 0x08, 0x6e, + 0x75, 0x6d, 0x5f, 0x63, 0x74, 0x78, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x07, 0x6e, + 0x75, 0x6d, 0x43, 0x74, 0x78, 0x73, 0x22, 0x25, 0x0a, 0x0f, 0x50, 0x72, 0x65, 0x70, 0x53, 0x68, + 0x75, 0x74, 0x64, 0x6f, 0x77, 0x6e, 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, + 0x6b, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x22, 0x21, 0x0a, + 0x0b, 0x50, 0x69, 0x6e, 0x67, 0x52, 0x61, 0x6e, 0x6b, 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, 0x04, + 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, + 0x22, 0x41, 0x0a, 0x0a, 0x53, 0x65, 0x74, 0x52, 0x61, 0x6e, 0x6b, 0x52, 0x65, 0x71, 0x12, 0x12, + 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, + 0x6e, 0x6b, 0x12, 0x1f, 0x0a, 0x0b, 0x6d, 0x61, 0x70, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, + 0x6e, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0a, 0x6d, 0x61, 0x70, 0x56, 0x65, 0x72, 0x73, + 0x69, 0x6f, 0x6e, 0x22, 0x7c, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x4d, 0x6f, 0x6e, 0x69, 0x74, + 0x6f, 0x72, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x1a, 0x0a, 0x08, 0x70, 0x6f, 0x6f, 0x6c, 0x55, + 0x55, 0x49, 0x44, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x70, 0x6f, 0x6f, 0x6c, 0x55, + 0x55, 0x49, 0x44, 0x12, 0x26, 0x0a, 0x0e, 0x70, 0x6f, 0x6f, 0x6c, 0x48, 0x61, 0x6e, 0x64, 0x6c, + 0x65, 0x55, 0x55, 0x49, 0x44, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x70, 0x6f, 0x6f, + 0x6c, 0x48, 0x61, 0x6e, 0x64, 0x6c, 0x65, 0x55, 0x55, 0x49, 0x44, 0x12, 0x14, 0x0a, 0x05, 0x6a, + 0x6f, 0x62, 0x69, 0x64, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6a, 0x6f, 0x62, 0x69, + 0x64, 0x42, 0x3a, 0x5a, 0x38, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, + 0x64, 0x61, 0x6f, 0x73, 0x2d, 0x73, 0x74, 0x61, 0x63, 0x6b, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2f, + 0x73, 0x72, 0x63, 0x2f, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x2f, 0x63, 0x6f, 0x6d, 0x6d, + 0x6f, 0x6e, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x6d, 0x67, 0x6d, 0x74, 0x62, 0x06, 0x70, + 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( diff --git a/src/control/common/proto/mgmt/system.pb.go b/src/control/common/proto/mgmt/system.pb.go index eb250dca2ba..9a23ad39fde 100644 --- a/src/control/common/proto/mgmt/system.pb.go +++ b/src/control/common/proto/mgmt/system.pb.go @@ -316,9 +316,10 @@ type SystemStartReq struct { sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - Sys string `protobuf:"bytes,1,opt,name=sys,proto3" json:"sys,omitempty"` // DAOS system name - Ranks string `protobuf:"bytes,2,opt,name=ranks,proto3" json:"ranks,omitempty"` // rankset to query - Hosts string `protobuf:"bytes,3,opt,name=hosts,proto3" json:"hosts,omitempty"` // hostset to query + Sys string `protobuf:"bytes,1,opt,name=sys,proto3" json:"sys,omitempty"` // DAOS system name + Ranks string `protobuf:"bytes,2,opt,name=ranks,proto3" json:"ranks,omitempty"` // rankset to query + Hosts string `protobuf:"bytes,3,opt,name=hosts,proto3" json:"hosts,omitempty"` // hostset to query + CheckMode bool `protobuf:"varint,4,opt,name=check_mode,json=checkMode,proto3" json:"check_mode,omitempty"` // start ranks in check mode } func (x *SystemStartReq) Reset() { @@ -374,6 +375,13 @@ func (x *SystemStartReq) GetHosts() string { return "" } +func (x *SystemStartReq) GetCheckMode() bool { + if x != nil { + return x.CheckMode + } + return false +} + // SystemStartResp returns status of restart attempt and results // of attempts to start system members. type SystemStartResp struct { @@ -1347,126 +1355,128 @@ var file_mgmt_system_proto_rawDesc = []byte{ 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x61, 0x62, 0x73, 0x65, 0x6e, 0x74, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x20, 0x0a, 0x0b, 0x61, 0x62, 0x73, 0x65, 0x6e, 0x74, 0x68, 0x6f, 0x73, 0x74, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x61, 0x62, - 0x73, 0x65, 0x6e, 0x74, 0x68, 0x6f, 0x73, 0x74, 0x73, 0x22, 0x4e, 0x0a, 0x0e, 0x53, 0x79, 0x73, + 0x73, 0x65, 0x6e, 0x74, 0x68, 0x6f, 0x73, 0x74, 0x73, 0x22, 0x6d, 0x0a, 0x0e, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x53, 0x74, 0x61, 0x72, 0x74, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x68, 0x6f, 0x73, 0x74, 0x73, 0x18, 0x03, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x05, 0x68, 0x6f, 0x73, 0x74, 0x73, 0x22, 0x83, 0x01, 0x0a, 0x0f, 0x53, 0x79, - 0x73, 0x74, 0x65, 0x6d, 0x53, 0x74, 0x61, 0x72, 0x74, 0x52, 0x65, 0x73, 0x70, 0x12, 0x2c, 0x0a, - 0x07, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, - 0x2e, 0x73, 0x68, 0x61, 0x72, 0x65, 0x64, 0x2e, 0x52, 0x61, 0x6e, 0x6b, 0x52, 0x65, 0x73, 0x75, - 0x6c, 0x74, 0x52, 0x07, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x73, 0x12, 0x20, 0x0a, 0x0b, 0x61, - 0x62, 0x73, 0x65, 0x6e, 0x74, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, - 0x52, 0x0b, 0x61, 0x62, 0x73, 0x65, 0x6e, 0x74, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x20, 0x0a, - 0x0b, 0x61, 0x62, 0x73, 0x65, 0x6e, 0x74, 0x68, 0x6f, 0x73, 0x74, 0x73, 0x18, 0x03, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x0b, 0x61, 0x62, 0x73, 0x65, 0x6e, 0x74, 0x68, 0x6f, 0x73, 0x74, 0x73, 0x22, - 0x66, 0x0a, 0x10, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x45, 0x78, 0x63, 0x6c, 0x75, 0x64, 0x65, - 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, - 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x02, - 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x68, - 0x6f, 0x73, 0x74, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x68, 0x6f, 0x73, 0x74, - 0x73, 0x12, 0x14, 0x0a, 0x05, 0x63, 0x6c, 0x65, 0x61, 0x72, 0x18, 0x04, 0x20, 0x01, 0x28, 0x08, - 0x52, 0x05, 0x63, 0x6c, 0x65, 0x61, 0x72, 0x22, 0x41, 0x0a, 0x11, 0x53, 0x79, 0x73, 0x74, 0x65, - 0x6d, 0x45, 0x78, 0x63, 0x6c, 0x75, 0x64, 0x65, 0x52, 0x65, 0x73, 0x70, 0x12, 0x2c, 0x0a, 0x07, + 0x28, 0x09, 0x52, 0x05, 0x68, 0x6f, 0x73, 0x74, 0x73, 0x12, 0x1d, 0x0a, 0x0a, 0x63, 0x68, 0x65, + 0x63, 0x6b, 0x5f, 0x6d, 0x6f, 0x64, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x63, + 0x68, 0x65, 0x63, 0x6b, 0x4d, 0x6f, 0x64, 0x65, 0x22, 0x83, 0x01, 0x0a, 0x0f, 0x53, 0x79, 0x73, + 0x74, 0x65, 0x6d, 0x53, 0x74, 0x61, 0x72, 0x74, 0x52, 0x65, 0x73, 0x70, 0x12, 0x2c, 0x0a, 0x07, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x73, 0x68, 0x61, 0x72, 0x65, 0x64, 0x2e, 0x52, 0x61, 0x6e, 0x6b, 0x52, 0x65, 0x73, 0x75, 0x6c, - 0x74, 0x52, 0x07, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x73, 0x22, 0x6d, 0x0a, 0x0e, 0x53, 0x79, - 0x73, 0x74, 0x65, 0x6d, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, - 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x14, - 0x0a, 0x05, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x72, - 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x68, 0x6f, 0x73, 0x74, 0x73, 0x18, 0x03, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x05, 0x68, 0x6f, 0x73, 0x74, 0x73, 0x12, 0x1d, 0x0a, 0x0a, 0x73, 0x74, - 0x61, 0x74, 0x65, 0x5f, 0x6d, 0x61, 0x73, 0x6b, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x09, - 0x73, 0x74, 0x61, 0x74, 0x65, 0x4d, 0x61, 0x73, 0x6b, 0x22, 0xc4, 0x01, 0x0a, 0x0f, 0x53, 0x79, - 0x73, 0x74, 0x65, 0x6d, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, 0x65, 0x73, 0x70, 0x12, 0x2c, 0x0a, - 0x07, 0x6d, 0x65, 0x6d, 0x62, 0x65, 0x72, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, - 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x4d, 0x65, 0x6d, 0x62, - 0x65, 0x72, 0x52, 0x07, 0x6d, 0x65, 0x6d, 0x62, 0x65, 0x72, 0x73, 0x12, 0x20, 0x0a, 0x0b, 0x61, - 0x62, 0x73, 0x65, 0x6e, 0x74, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, - 0x52, 0x0b, 0x61, 0x62, 0x73, 0x65, 0x6e, 0x74, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x20, 0x0a, - 0x0b, 0x61, 0x62, 0x73, 0x65, 0x6e, 0x74, 0x68, 0x6f, 0x73, 0x74, 0x73, 0x18, 0x03, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x0b, 0x61, 0x62, 0x73, 0x65, 0x6e, 0x74, 0x68, 0x6f, 0x73, 0x74, 0x73, 0x12, - 0x21, 0x0a, 0x0c, 0x64, 0x61, 0x74, 0x61, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, - 0x04, 0x20, 0x01, 0x28, 0x04, 0x52, 0x0b, 0x64, 0x61, 0x74, 0x61, 0x56, 0x65, 0x72, 0x73, 0x69, - 0x6f, 0x6e, 0x12, 0x1c, 0x0a, 0x09, 0x70, 0x72, 0x6f, 0x76, 0x69, 0x64, 0x65, 0x72, 0x73, 0x18, - 0x05, 0x20, 0x03, 0x28, 0x09, 0x52, 0x09, 0x70, 0x72, 0x6f, 0x76, 0x69, 0x64, 0x65, 0x72, 0x73, - 0x22, 0x22, 0x0a, 0x0e, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x45, 0x72, 0x61, 0x73, 0x65, 0x52, + 0x74, 0x52, 0x07, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x73, 0x12, 0x20, 0x0a, 0x0b, 0x61, 0x62, + 0x73, 0x65, 0x6e, 0x74, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x0b, 0x61, 0x62, 0x73, 0x65, 0x6e, 0x74, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x20, 0x0a, 0x0b, + 0x61, 0x62, 0x73, 0x65, 0x6e, 0x74, 0x68, 0x6f, 0x73, 0x74, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x0b, 0x61, 0x62, 0x73, 0x65, 0x6e, 0x74, 0x68, 0x6f, 0x73, 0x74, 0x73, 0x22, 0x66, + 0x0a, 0x10, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x45, 0x78, 0x63, 0x6c, 0x75, 0x64, 0x65, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x03, 0x73, 0x79, 0x73, 0x22, 0x3f, 0x0a, 0x0f, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x45, 0x72, - 0x61, 0x73, 0x65, 0x52, 0x65, 0x73, 0x70, 0x12, 0x2c, 0x0a, 0x07, 0x72, 0x65, 0x73, 0x75, 0x6c, - 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x73, 0x68, 0x61, 0x72, 0x65, - 0x64, 0x2e, 0x52, 0x61, 0x6e, 0x6b, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x52, 0x07, 0x72, 0x65, - 0x73, 0x75, 0x6c, 0x74, 0x73, 0x22, 0x3e, 0x0a, 0x10, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x43, - 0x6c, 0x65, 0x61, 0x6e, 0x75, 0x70, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, - 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x6d, - 0x61, 0x63, 0x68, 0x69, 0x6e, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, 0x61, - 0x63, 0x68, 0x69, 0x6e, 0x65, 0x22, 0xbe, 0x01, 0x0a, 0x11, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, - 0x43, 0x6c, 0x65, 0x61, 0x6e, 0x75, 0x70, 0x52, 0x65, 0x73, 0x70, 0x12, 0x3f, 0x0a, 0x07, 0x72, - 0x65, 0x73, 0x75, 0x6c, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x25, 0x2e, 0x6d, - 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x43, 0x6c, 0x65, 0x61, 0x6e, 0x75, - 0x70, 0x52, 0x65, 0x73, 0x70, 0x2e, 0x43, 0x6c, 0x65, 0x61, 0x6e, 0x75, 0x70, 0x52, 0x65, 0x73, - 0x75, 0x6c, 0x74, 0x52, 0x07, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x73, 0x1a, 0x68, 0x0a, 0x0d, - 0x43, 0x6c, 0x65, 0x61, 0x6e, 0x75, 0x70, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x16, 0x0a, - 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, - 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x10, 0x0a, 0x03, 0x6d, 0x73, 0x67, 0x18, 0x02, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x03, 0x6d, 0x73, 0x67, 0x12, 0x17, 0x0a, 0x07, 0x70, 0x6f, 0x6f, 0x6c, 0x5f, - 0x69, 0x64, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x70, 0x6f, 0x6f, 0x6c, 0x49, 0x64, - 0x12, 0x14, 0x0a, 0x05, 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0d, 0x52, - 0x05, 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x22, 0xab, 0x01, 0x0a, 0x10, 0x53, 0x79, 0x73, 0x74, 0x65, - 0x6d, 0x53, 0x65, 0x74, 0x41, 0x74, 0x74, 0x72, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, - 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x46, 0x0a, - 0x0a, 0x61, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x65, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, - 0x0b, 0x32, 0x26, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x53, - 0x65, 0x74, 0x41, 0x74, 0x74, 0x72, 0x52, 0x65, 0x71, 0x2e, 0x41, 0x74, 0x74, 0x72, 0x69, 0x62, - 0x75, 0x74, 0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x0a, 0x61, 0x74, 0x74, 0x72, 0x69, - 0x62, 0x75, 0x74, 0x65, 0x73, 0x1a, 0x3d, 0x0a, 0x0f, 0x41, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, - 0x74, 0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, - 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, - 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, - 0x3a, 0x02, 0x38, 0x01, 0x22, 0x38, 0x0a, 0x10, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x47, 0x65, - 0x74, 0x41, 0x74, 0x74, 0x72, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, - 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x6b, 0x65, - 0x79, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x09, 0x52, 0x04, 0x6b, 0x65, 0x79, 0x73, 0x22, 0x9b, - 0x01, 0x0a, 0x11, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x47, 0x65, 0x74, 0x41, 0x74, 0x74, 0x72, - 0x52, 0x65, 0x73, 0x70, 0x12, 0x47, 0x0a, 0x0a, 0x61, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, - 0x65, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x27, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, - 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x47, 0x65, 0x74, 0x41, 0x74, 0x74, 0x72, 0x52, 0x65, 0x73, - 0x70, 0x2e, 0x41, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, - 0x79, 0x52, 0x0a, 0x61, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x65, 0x73, 0x1a, 0x3d, 0x0a, - 0x0f, 0x41, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, - 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, - 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0xab, 0x01, 0x0a, - 0x10, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x53, 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, + 0x03, 0x73, 0x79, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x02, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x05, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x68, 0x6f, + 0x73, 0x74, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x68, 0x6f, 0x73, 0x74, 0x73, + 0x12, 0x14, 0x0a, 0x05, 0x63, 0x6c, 0x65, 0x61, 0x72, 0x18, 0x04, 0x20, 0x01, 0x28, 0x08, 0x52, + 0x05, 0x63, 0x6c, 0x65, 0x61, 0x72, 0x22, 0x41, 0x0a, 0x11, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, + 0x45, 0x78, 0x63, 0x6c, 0x75, 0x64, 0x65, 0x52, 0x65, 0x73, 0x70, 0x12, 0x2c, 0x0a, 0x07, 0x72, + 0x65, 0x73, 0x75, 0x6c, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x73, + 0x68, 0x61, 0x72, 0x65, 0x64, 0x2e, 0x52, 0x61, 0x6e, 0x6b, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, + 0x52, 0x07, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x73, 0x22, 0x6d, 0x0a, 0x0e, 0x53, 0x79, 0x73, + 0x74, 0x65, 0x6d, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, + 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x14, 0x0a, + 0x05, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x72, 0x61, + 0x6e, 0x6b, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x68, 0x6f, 0x73, 0x74, 0x73, 0x18, 0x03, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x05, 0x68, 0x6f, 0x73, 0x74, 0x73, 0x12, 0x1d, 0x0a, 0x0a, 0x73, 0x74, 0x61, + 0x74, 0x65, 0x5f, 0x6d, 0x61, 0x73, 0x6b, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x09, 0x73, + 0x74, 0x61, 0x74, 0x65, 0x4d, 0x61, 0x73, 0x6b, 0x22, 0xc4, 0x01, 0x0a, 0x0f, 0x53, 0x79, 0x73, + 0x74, 0x65, 0x6d, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, 0x65, 0x73, 0x70, 0x12, 0x2c, 0x0a, 0x07, + 0x6d, 0x65, 0x6d, 0x62, 0x65, 0x72, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, + 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x4d, 0x65, 0x6d, 0x62, 0x65, + 0x72, 0x52, 0x07, 0x6d, 0x65, 0x6d, 0x62, 0x65, 0x72, 0x73, 0x12, 0x20, 0x0a, 0x0b, 0x61, 0x62, + 0x73, 0x65, 0x6e, 0x74, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x0b, 0x61, 0x62, 0x73, 0x65, 0x6e, 0x74, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x20, 0x0a, 0x0b, + 0x61, 0x62, 0x73, 0x65, 0x6e, 0x74, 0x68, 0x6f, 0x73, 0x74, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x0b, 0x61, 0x62, 0x73, 0x65, 0x6e, 0x74, 0x68, 0x6f, 0x73, 0x74, 0x73, 0x12, 0x21, + 0x0a, 0x0c, 0x64, 0x61, 0x74, 0x61, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x04, + 0x20, 0x01, 0x28, 0x04, 0x52, 0x0b, 0x64, 0x61, 0x74, 0x61, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, + 0x6e, 0x12, 0x1c, 0x0a, 0x09, 0x70, 0x72, 0x6f, 0x76, 0x69, 0x64, 0x65, 0x72, 0x73, 0x18, 0x05, + 0x20, 0x03, 0x28, 0x09, 0x52, 0x09, 0x70, 0x72, 0x6f, 0x76, 0x69, 0x64, 0x65, 0x72, 0x73, 0x22, + 0x22, 0x0a, 0x0e, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x45, 0x72, 0x61, 0x73, 0x65, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, - 0x73, 0x79, 0x73, 0x12, 0x46, 0x0a, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, - 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x26, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, - 0x79, 0x73, 0x74, 0x65, 0x6d, 0x53, 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, 0x71, 0x2e, - 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, - 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x1a, 0x3d, 0x0a, 0x0f, 0x50, - 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, - 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, - 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0x38, 0x0a, 0x10, 0x53, 0x79, - 0x73, 0x74, 0x65, 0x6d, 0x47, 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, 0x71, 0x12, 0x10, - 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, - 0x12, 0x12, 0x0a, 0x04, 0x6b, 0x65, 0x79, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x09, 0x52, 0x04, - 0x6b, 0x65, 0x79, 0x73, 0x22, 0x9b, 0x01, 0x0a, 0x11, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x47, - 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, 0x73, 0x70, 0x12, 0x47, 0x0a, 0x0a, 0x70, 0x72, - 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x27, - 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x47, 0x65, 0x74, 0x50, - 0x72, 0x6f, 0x70, 0x52, 0x65, 0x73, 0x70, 0x2e, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, - 0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, - 0x69, 0x65, 0x73, 0x1a, 0x3d, 0x0a, 0x0f, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, - 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, - 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, - 0x38, 0x01, 0x42, 0x3a, 0x5a, 0x38, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, - 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2d, 0x73, 0x74, 0x61, 0x63, 0x6b, 0x2f, 0x64, 0x61, 0x6f, 0x73, - 0x2f, 0x73, 0x72, 0x63, 0x2f, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x2f, 0x63, 0x6f, 0x6d, - 0x6d, 0x6f, 0x6e, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x6d, 0x67, 0x6d, 0x74, 0x62, 0x06, - 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x73, 0x79, 0x73, 0x22, 0x3f, 0x0a, 0x0f, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x45, 0x72, 0x61, + 0x73, 0x65, 0x52, 0x65, 0x73, 0x70, 0x12, 0x2c, 0x0a, 0x07, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, + 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x73, 0x68, 0x61, 0x72, 0x65, 0x64, + 0x2e, 0x52, 0x61, 0x6e, 0x6b, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x52, 0x07, 0x72, 0x65, 0x73, + 0x75, 0x6c, 0x74, 0x73, 0x22, 0x3e, 0x0a, 0x10, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x43, 0x6c, + 0x65, 0x61, 0x6e, 0x75, 0x70, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, + 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x61, + 0x63, 0x68, 0x69, 0x6e, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, 0x61, 0x63, + 0x68, 0x69, 0x6e, 0x65, 0x22, 0xbe, 0x01, 0x0a, 0x11, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x43, + 0x6c, 0x65, 0x61, 0x6e, 0x75, 0x70, 0x52, 0x65, 0x73, 0x70, 0x12, 0x3f, 0x0a, 0x07, 0x72, 0x65, + 0x73, 0x75, 0x6c, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x25, 0x2e, 0x6d, 0x67, + 0x6d, 0x74, 0x2e, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x43, 0x6c, 0x65, 0x61, 0x6e, 0x75, 0x70, + 0x52, 0x65, 0x73, 0x70, 0x2e, 0x43, 0x6c, 0x65, 0x61, 0x6e, 0x75, 0x70, 0x52, 0x65, 0x73, 0x75, + 0x6c, 0x74, 0x52, 0x07, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x73, 0x1a, 0x68, 0x0a, 0x0d, 0x43, + 0x6c, 0x65, 0x61, 0x6e, 0x75, 0x70, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x16, 0x0a, 0x06, + 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, + 0x61, 0x74, 0x75, 0x73, 0x12, 0x10, 0x0a, 0x03, 0x6d, 0x73, 0x67, 0x18, 0x02, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x03, 0x6d, 0x73, 0x67, 0x12, 0x17, 0x0a, 0x07, 0x70, 0x6f, 0x6f, 0x6c, 0x5f, 0x69, + 0x64, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x70, 0x6f, 0x6f, 0x6c, 0x49, 0x64, 0x12, + 0x14, 0x0a, 0x05, 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x05, + 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x22, 0xab, 0x01, 0x0a, 0x10, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, + 0x53, 0x65, 0x74, 0x41, 0x74, 0x74, 0x72, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, + 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x46, 0x0a, 0x0a, + 0x61, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x65, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, + 0x32, 0x26, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x53, 0x65, + 0x74, 0x41, 0x74, 0x74, 0x72, 0x52, 0x65, 0x71, 0x2e, 0x41, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, + 0x74, 0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x0a, 0x61, 0x74, 0x74, 0x72, 0x69, 0x62, + 0x75, 0x74, 0x65, 0x73, 0x1a, 0x3d, 0x0a, 0x0f, 0x41, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, + 0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, + 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, + 0x02, 0x38, 0x01, 0x22, 0x38, 0x0a, 0x10, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x47, 0x65, 0x74, + 0x41, 0x74, 0x74, 0x72, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x6b, 0x65, 0x79, + 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x09, 0x52, 0x04, 0x6b, 0x65, 0x79, 0x73, 0x22, 0x9b, 0x01, + 0x0a, 0x11, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x47, 0x65, 0x74, 0x41, 0x74, 0x74, 0x72, 0x52, + 0x65, 0x73, 0x70, 0x12, 0x47, 0x0a, 0x0a, 0x61, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x65, + 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x27, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, + 0x79, 0x73, 0x74, 0x65, 0x6d, 0x47, 0x65, 0x74, 0x41, 0x74, 0x74, 0x72, 0x52, 0x65, 0x73, 0x70, + 0x2e, 0x41, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, + 0x52, 0x0a, 0x61, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x65, 0x73, 0x1a, 0x3d, 0x0a, 0x0f, + 0x41, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, + 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, + 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0xab, 0x01, 0x0a, 0x10, + 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x53, 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, 0x71, + 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, + 0x79, 0x73, 0x12, 0x46, 0x0a, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, + 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x26, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x79, + 0x73, 0x74, 0x65, 0x6d, 0x53, 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, 0x71, 0x2e, 0x50, + 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x0a, + 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x1a, 0x3d, 0x0a, 0x0f, 0x50, 0x72, + 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, + 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, + 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, + 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0x38, 0x0a, 0x10, 0x53, 0x79, 0x73, + 0x74, 0x65, 0x6d, 0x47, 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, + 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, + 0x12, 0x0a, 0x04, 0x6b, 0x65, 0x79, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x09, 0x52, 0x04, 0x6b, + 0x65, 0x79, 0x73, 0x22, 0x9b, 0x01, 0x0a, 0x11, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x47, 0x65, + 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, 0x73, 0x70, 0x12, 0x47, 0x0a, 0x0a, 0x70, 0x72, 0x6f, + 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x27, 0x2e, + 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x47, 0x65, 0x74, 0x50, 0x72, + 0x6f, 0x70, 0x52, 0x65, 0x73, 0x70, 0x2e, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, + 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, + 0x65, 0x73, 0x1a, 0x3d, 0x0a, 0x0f, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, + 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, + 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, + 0x01, 0x42, 0x3a, 0x5a, 0x38, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, + 0x64, 0x61, 0x6f, 0x73, 0x2d, 0x73, 0x74, 0x61, 0x63, 0x6b, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2f, + 0x73, 0x72, 0x63, 0x2f, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x2f, 0x63, 0x6f, 0x6d, 0x6d, + 0x6f, 0x6e, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x6d, 0x67, 0x6d, 0x74, 0x62, 0x06, 0x70, + 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( diff --git a/src/control/common/proto/srv/srv.pb.go b/src/control/common/proto/srv/srv.pb.go index 4ccb7b0fcc3..ae26aad0aad 100644 --- a/src/control/common/proto/srv/srv.pb.go +++ b/src/control/common/proto/srv/srv.pb.go @@ -15,6 +15,7 @@ package srv import ( + chk "github.com/daos-stack/daos/src/control/common/proto/chk" protoreflect "google.golang.org/protobuf/reflect/protoreflect" protoimpl "google.golang.org/protobuf/runtime/protoimpl" reflect "reflect" @@ -41,6 +42,7 @@ type NotifyReadyReq struct { Incarnation uint64 `protobuf:"varint,6,opt,name=incarnation,proto3" json:"incarnation,omitempty"` // HLC incarnation number SecondaryUris []string `protobuf:"bytes,7,rep,name=secondaryUris,proto3" json:"secondaryUris,omitempty"` // secondary CaRT URIs SecondaryNctxs []uint32 `protobuf:"varint,8,rep,packed,name=secondaryNctxs,proto3" json:"secondaryNctxs,omitempty"` // number of CaRT contexts for each secondary provider + CheckMode bool `protobuf:"varint,9,opt,name=check_mode,json=checkMode,proto3" json:"check_mode,omitempty"` // True if engine started in checker mode } func (x *NotifyReadyReq) Reset() { @@ -131,6 +133,13 @@ func (x *NotifyReadyReq) GetSecondaryNctxs() []uint32 { return nil } +func (x *NotifyReadyReq) GetCheckMode() bool { + if x != nil { + return x.CheckMode + } + return false +} + type GetPoolSvcReq struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -343,47 +352,558 @@ func (x *PoolFindByLabelResp) GetSvcreps() []uint32 { return nil } +// List all the known pools from MS. +type CheckListPoolReq struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields +} + +func (x *CheckListPoolReq) Reset() { + *x = CheckListPoolReq{} + if protoimpl.UnsafeEnabled { + mi := &file_srv_srv_proto_msgTypes[5] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CheckListPoolReq) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CheckListPoolReq) ProtoMessage() {} + +func (x *CheckListPoolReq) ProtoReflect() protoreflect.Message { + mi := &file_srv_srv_proto_msgTypes[5] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CheckListPoolReq.ProtoReflect.Descriptor instead. +func (*CheckListPoolReq) Descriptor() ([]byte, []int) { + return file_srv_srv_proto_rawDescGZIP(), []int{5} +} + +type CheckListPoolResp struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Status int32 `protobuf:"varint,1,opt,name=status,proto3" json:"status,omitempty"` // DAOS error code. + Pools []*CheckListPoolResp_OnePool `protobuf:"bytes,2,rep,name=pools,proto3" json:"pools,omitempty"` // The list of pools. +} + +func (x *CheckListPoolResp) Reset() { + *x = CheckListPoolResp{} + if protoimpl.UnsafeEnabled { + mi := &file_srv_srv_proto_msgTypes[6] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CheckListPoolResp) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CheckListPoolResp) ProtoMessage() {} + +func (x *CheckListPoolResp) ProtoReflect() protoreflect.Message { + mi := &file_srv_srv_proto_msgTypes[6] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CheckListPoolResp.ProtoReflect.Descriptor instead. +func (*CheckListPoolResp) Descriptor() ([]byte, []int) { + return file_srv_srv_proto_rawDescGZIP(), []int{6} +} + +func (x *CheckListPoolResp) GetStatus() int32 { + if x != nil { + return x.Status + } + return 0 +} + +func (x *CheckListPoolResp) GetPools() []*CheckListPoolResp_OnePool { + if x != nil { + return x.Pools + } + return nil +} + +// Register pool to MS. +type CheckRegPoolReq struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Seq uint64 `protobuf:"varint,1,opt,name=seq,proto3" json:"seq,omitempty"` // DAOS Check event sequence, unique for the instance. + Uuid string `protobuf:"bytes,2,opt,name=uuid,proto3" json:"uuid,omitempty"` // Pool UUID. + Label string `protobuf:"bytes,3,opt,name=label,proto3" json:"label,omitempty"` // Pool label. + Svcreps []uint32 `protobuf:"varint,4,rep,packed,name=svcreps,proto3" json:"svcreps,omitempty"` // Pool service replica ranks. +} + +func (x *CheckRegPoolReq) Reset() { + *x = CheckRegPoolReq{} + if protoimpl.UnsafeEnabled { + mi := &file_srv_srv_proto_msgTypes[7] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CheckRegPoolReq) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CheckRegPoolReq) ProtoMessage() {} + +func (x *CheckRegPoolReq) ProtoReflect() protoreflect.Message { + mi := &file_srv_srv_proto_msgTypes[7] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CheckRegPoolReq.ProtoReflect.Descriptor instead. +func (*CheckRegPoolReq) Descriptor() ([]byte, []int) { + return file_srv_srv_proto_rawDescGZIP(), []int{7} +} + +func (x *CheckRegPoolReq) GetSeq() uint64 { + if x != nil { + return x.Seq + } + return 0 +} + +func (x *CheckRegPoolReq) GetUuid() string { + if x != nil { + return x.Uuid + } + return "" +} + +func (x *CheckRegPoolReq) GetLabel() string { + if x != nil { + return x.Label + } + return "" +} + +func (x *CheckRegPoolReq) GetSvcreps() []uint32 { + if x != nil { + return x.Svcreps + } + return nil +} + +type CheckRegPoolResp struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Status int32 `protobuf:"varint,1,opt,name=status,proto3" json:"status,omitempty"` // DAOS error code. +} + +func (x *CheckRegPoolResp) Reset() { + *x = CheckRegPoolResp{} + if protoimpl.UnsafeEnabled { + mi := &file_srv_srv_proto_msgTypes[8] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CheckRegPoolResp) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CheckRegPoolResp) ProtoMessage() {} + +func (x *CheckRegPoolResp) ProtoReflect() protoreflect.Message { + mi := &file_srv_srv_proto_msgTypes[8] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CheckRegPoolResp.ProtoReflect.Descriptor instead. +func (*CheckRegPoolResp) Descriptor() ([]byte, []int) { + return file_srv_srv_proto_rawDescGZIP(), []int{8} +} + +func (x *CheckRegPoolResp) GetStatus() int32 { + if x != nil { + return x.Status + } + return 0 +} + +// Deregister pool from MS. +type CheckDeregPoolReq struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Seq uint64 `protobuf:"varint,1,opt,name=seq,proto3" json:"seq,omitempty"` // DAOS Check event sequence, unique for the instance. + Uuid string `protobuf:"bytes,2,opt,name=uuid,proto3" json:"uuid,omitempty"` // The pool to be deregistered. +} + +func (x *CheckDeregPoolReq) Reset() { + *x = CheckDeregPoolReq{} + if protoimpl.UnsafeEnabled { + mi := &file_srv_srv_proto_msgTypes[9] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CheckDeregPoolReq) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CheckDeregPoolReq) ProtoMessage() {} + +func (x *CheckDeregPoolReq) ProtoReflect() protoreflect.Message { + mi := &file_srv_srv_proto_msgTypes[9] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CheckDeregPoolReq.ProtoReflect.Descriptor instead. +func (*CheckDeregPoolReq) Descriptor() ([]byte, []int) { + return file_srv_srv_proto_rawDescGZIP(), []int{9} +} + +func (x *CheckDeregPoolReq) GetSeq() uint64 { + if x != nil { + return x.Seq + } + return 0 +} + +func (x *CheckDeregPoolReq) GetUuid() string { + if x != nil { + return x.Uuid + } + return "" +} + +type CheckDeregPoolResp struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Status int32 `protobuf:"varint,1,opt,name=status,proto3" json:"status,omitempty"` // DAOS error code. +} + +func (x *CheckDeregPoolResp) Reset() { + *x = CheckDeregPoolResp{} + if protoimpl.UnsafeEnabled { + mi := &file_srv_srv_proto_msgTypes[10] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CheckDeregPoolResp) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CheckDeregPoolResp) ProtoMessage() {} + +func (x *CheckDeregPoolResp) ProtoReflect() protoreflect.Message { + mi := &file_srv_srv_proto_msgTypes[10] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CheckDeregPoolResp.ProtoReflect.Descriptor instead. +func (*CheckDeregPoolResp) Descriptor() ([]byte, []int) { + return file_srv_srv_proto_rawDescGZIP(), []int{10} +} + +func (x *CheckDeregPoolResp) GetStatus() int32 { + if x != nil { + return x.Status + } + return 0 +} + +type CheckReportReq struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Report *chk.CheckReport `protobuf:"bytes,1,opt,name=report,proto3" json:"report,omitempty"` // Report payload +} + +func (x *CheckReportReq) Reset() { + *x = CheckReportReq{} + if protoimpl.UnsafeEnabled { + mi := &file_srv_srv_proto_msgTypes[11] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CheckReportReq) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CheckReportReq) ProtoMessage() {} + +func (x *CheckReportReq) ProtoReflect() protoreflect.Message { + mi := &file_srv_srv_proto_msgTypes[11] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CheckReportReq.ProtoReflect.Descriptor instead. +func (*CheckReportReq) Descriptor() ([]byte, []int) { + return file_srv_srv_proto_rawDescGZIP(), []int{11} +} + +func (x *CheckReportReq) GetReport() *chk.CheckReport { + if x != nil { + return x.Report + } + return nil +} + +type CheckReportResp struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Status int32 `protobuf:"varint,1,opt,name=status,proto3" json:"status,omitempty"` // DAOS error code. +} + +func (x *CheckReportResp) Reset() { + *x = CheckReportResp{} + if protoimpl.UnsafeEnabled { + mi := &file_srv_srv_proto_msgTypes[12] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CheckReportResp) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CheckReportResp) ProtoMessage() {} + +func (x *CheckReportResp) ProtoReflect() protoreflect.Message { + mi := &file_srv_srv_proto_msgTypes[12] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CheckReportResp.ProtoReflect.Descriptor instead. +func (*CheckReportResp) Descriptor() ([]byte, []int) { + return file_srv_srv_proto_rawDescGZIP(), []int{12} +} + +func (x *CheckReportResp) GetStatus() int32 { + if x != nil { + return x.Status + } + return 0 +} + +type CheckListPoolResp_OnePool struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Uuid string `protobuf:"bytes,1,opt,name=uuid,proto3" json:"uuid,omitempty"` // Pool UUID. + Label string `protobuf:"bytes,2,opt,name=label,proto3" json:"label,omitempty"` // Pool label. + Svcreps []uint32 `protobuf:"varint,3,rep,packed,name=svcreps,proto3" json:"svcreps,omitempty"` // Pool service replica ranks. +} + +func (x *CheckListPoolResp_OnePool) Reset() { + *x = CheckListPoolResp_OnePool{} + if protoimpl.UnsafeEnabled { + mi := &file_srv_srv_proto_msgTypes[13] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CheckListPoolResp_OnePool) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CheckListPoolResp_OnePool) ProtoMessage() {} + +func (x *CheckListPoolResp_OnePool) ProtoReflect() protoreflect.Message { + mi := &file_srv_srv_proto_msgTypes[13] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CheckListPoolResp_OnePool.ProtoReflect.Descriptor instead. +func (*CheckListPoolResp_OnePool) Descriptor() ([]byte, []int) { + return file_srv_srv_proto_rawDescGZIP(), []int{6, 0} +} + +func (x *CheckListPoolResp_OnePool) GetUuid() string { + if x != nil { + return x.Uuid + } + return "" +} + +func (x *CheckListPoolResp_OnePool) GetLabel() string { + if x != nil { + return x.Label + } + return "" +} + +func (x *CheckListPoolResp_OnePool) GetSvcreps() []uint32 { + if x != nil { + return x.Svcreps + } + return nil +} + var File_srv_srv_proto protoreflect.FileDescriptor var file_srv_srv_proto_rawDesc = []byte{ 0x0a, 0x0d, 0x73, 0x72, 0x76, 0x2f, 0x73, 0x72, 0x76, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, - 0x03, 0x73, 0x72, 0x76, 0x22, 0x8c, 0x02, 0x0a, 0x0e, 0x4e, 0x6f, 0x74, 0x69, 0x66, 0x79, 0x52, - 0x65, 0x61, 0x64, 0x79, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x75, 0x72, 0x69, 0x18, 0x01, - 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x75, 0x72, 0x69, 0x12, 0x14, 0x0a, 0x05, 0x6e, 0x63, 0x74, - 0x78, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x05, 0x6e, 0x63, 0x74, 0x78, 0x73, 0x12, - 0x2a, 0x0a, 0x10, 0x64, 0x72, 0x70, 0x63, 0x4c, 0x69, 0x73, 0x74, 0x65, 0x6e, 0x65, 0x72, 0x53, - 0x6f, 0x63, 0x6b, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x10, 0x64, 0x72, 0x70, 0x63, 0x4c, - 0x69, 0x73, 0x74, 0x65, 0x6e, 0x65, 0x72, 0x53, 0x6f, 0x63, 0x6b, 0x12, 0x20, 0x0a, 0x0b, 0x69, - 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x63, 0x65, 0x49, 0x64, 0x78, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0d, - 0x52, 0x0b, 0x69, 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x63, 0x65, 0x49, 0x64, 0x78, 0x12, 0x14, 0x0a, - 0x05, 0x6e, 0x74, 0x67, 0x74, 0x73, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x05, 0x6e, 0x74, - 0x67, 0x74, 0x73, 0x12, 0x20, 0x0a, 0x0b, 0x69, 0x6e, 0x63, 0x61, 0x72, 0x6e, 0x61, 0x74, 0x69, - 0x6f, 0x6e, 0x18, 0x06, 0x20, 0x01, 0x28, 0x04, 0x52, 0x0b, 0x69, 0x6e, 0x63, 0x61, 0x72, 0x6e, - 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x24, 0x0a, 0x0d, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x61, - 0x72, 0x79, 0x55, 0x72, 0x69, 0x73, 0x18, 0x07, 0x20, 0x03, 0x28, 0x09, 0x52, 0x0d, 0x73, 0x65, - 0x63, 0x6f, 0x6e, 0x64, 0x61, 0x72, 0x79, 0x55, 0x72, 0x69, 0x73, 0x12, 0x26, 0x0a, 0x0e, 0x73, - 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x61, 0x72, 0x79, 0x4e, 0x63, 0x74, 0x78, 0x73, 0x18, 0x08, 0x20, - 0x03, 0x28, 0x0d, 0x52, 0x0e, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x61, 0x72, 0x79, 0x4e, 0x63, - 0x74, 0x78, 0x73, 0x22, 0x23, 0x0a, 0x0d, 0x47, 0x65, 0x74, 0x50, 0x6f, 0x6f, 0x6c, 0x53, 0x76, - 0x63, 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, 0x04, 0x75, 0x75, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x04, 0x75, 0x75, 0x69, 0x64, 0x22, 0x42, 0x0a, 0x0e, 0x47, 0x65, 0x74, 0x50, - 0x6f, 0x6f, 0x6c, 0x53, 0x76, 0x63, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, - 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, - 0x75, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x73, 0x76, 0x63, 0x72, 0x65, 0x70, 0x73, 0x18, 0x02, 0x20, - 0x03, 0x28, 0x0d, 0x52, 0x07, 0x73, 0x76, 0x63, 0x72, 0x65, 0x70, 0x73, 0x22, 0x2a, 0x0a, 0x12, - 0x50, 0x6f, 0x6f, 0x6c, 0x46, 0x69, 0x6e, 0x64, 0x42, 0x79, 0x4c, 0x61, 0x62, 0x65, 0x6c, 0x52, - 0x65, 0x71, 0x12, 0x14, 0x0a, 0x05, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x05, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x22, 0x5b, 0x0a, 0x13, 0x50, 0x6f, 0x6f, 0x6c, - 0x46, 0x69, 0x6e, 0x64, 0x42, 0x79, 0x4c, 0x61, 0x62, 0x65, 0x6c, 0x52, 0x65, 0x73, 0x70, 0x12, - 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, - 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x75, 0x75, 0x69, 0x64, 0x18, - 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x75, 0x75, 0x69, 0x64, 0x12, 0x18, 0x0a, 0x07, 0x73, - 0x76, 0x63, 0x72, 0x65, 0x70, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x07, 0x73, 0x76, - 0x63, 0x72, 0x65, 0x70, 0x73, 0x42, 0x39, 0x5a, 0x37, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, - 0x63, 0x6f, 0x6d, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2d, 0x73, 0x74, 0x61, 0x63, 0x6b, 0x2f, 0x64, - 0x61, 0x6f, 0x73, 0x2f, 0x73, 0x72, 0x63, 0x2f, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x2f, - 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x73, 0x72, 0x76, - 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x03, 0x73, 0x72, 0x76, 0x1a, 0x0d, 0x63, 0x68, 0x6b, 0x2f, 0x63, 0x68, 0x6b, 0x2e, 0x70, 0x72, + 0x6f, 0x74, 0x6f, 0x22, 0xab, 0x02, 0x0a, 0x0e, 0x4e, 0x6f, 0x74, 0x69, 0x66, 0x79, 0x52, 0x65, + 0x61, 0x64, 0x79, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x75, 0x72, 0x69, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x03, 0x75, 0x72, 0x69, 0x12, 0x14, 0x0a, 0x05, 0x6e, 0x63, 0x74, 0x78, + 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x05, 0x6e, 0x63, 0x74, 0x78, 0x73, 0x12, 0x2a, + 0x0a, 0x10, 0x64, 0x72, 0x70, 0x63, 0x4c, 0x69, 0x73, 0x74, 0x65, 0x6e, 0x65, 0x72, 0x53, 0x6f, + 0x63, 0x6b, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x10, 0x64, 0x72, 0x70, 0x63, 0x4c, 0x69, + 0x73, 0x74, 0x65, 0x6e, 0x65, 0x72, 0x53, 0x6f, 0x63, 0x6b, 0x12, 0x20, 0x0a, 0x0b, 0x69, 0x6e, + 0x73, 0x74, 0x61, 0x6e, 0x63, 0x65, 0x49, 0x64, 0x78, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0d, 0x52, + 0x0b, 0x69, 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x63, 0x65, 0x49, 0x64, 0x78, 0x12, 0x14, 0x0a, 0x05, + 0x6e, 0x74, 0x67, 0x74, 0x73, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x05, 0x6e, 0x74, 0x67, + 0x74, 0x73, 0x12, 0x20, 0x0a, 0x0b, 0x69, 0x6e, 0x63, 0x61, 0x72, 0x6e, 0x61, 0x74, 0x69, 0x6f, + 0x6e, 0x18, 0x06, 0x20, 0x01, 0x28, 0x04, 0x52, 0x0b, 0x69, 0x6e, 0x63, 0x61, 0x72, 0x6e, 0x61, + 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x24, 0x0a, 0x0d, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x61, 0x72, + 0x79, 0x55, 0x72, 0x69, 0x73, 0x18, 0x07, 0x20, 0x03, 0x28, 0x09, 0x52, 0x0d, 0x73, 0x65, 0x63, + 0x6f, 0x6e, 0x64, 0x61, 0x72, 0x79, 0x55, 0x72, 0x69, 0x73, 0x12, 0x26, 0x0a, 0x0e, 0x73, 0x65, + 0x63, 0x6f, 0x6e, 0x64, 0x61, 0x72, 0x79, 0x4e, 0x63, 0x74, 0x78, 0x73, 0x18, 0x08, 0x20, 0x03, + 0x28, 0x0d, 0x52, 0x0e, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x61, 0x72, 0x79, 0x4e, 0x63, 0x74, + 0x78, 0x73, 0x12, 0x1d, 0x0a, 0x0a, 0x63, 0x68, 0x65, 0x63, 0x6b, 0x5f, 0x6d, 0x6f, 0x64, 0x65, + 0x18, 0x09, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x63, 0x68, 0x65, 0x63, 0x6b, 0x4d, 0x6f, 0x64, + 0x65, 0x22, 0x23, 0x0a, 0x0d, 0x47, 0x65, 0x74, 0x50, 0x6f, 0x6f, 0x6c, 0x53, 0x76, 0x63, 0x52, + 0x65, 0x71, 0x12, 0x12, 0x0a, 0x04, 0x75, 0x75, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x04, 0x75, 0x75, 0x69, 0x64, 0x22, 0x42, 0x0a, 0x0e, 0x47, 0x65, 0x74, 0x50, 0x6f, 0x6f, + 0x6c, 0x53, 0x76, 0x63, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, + 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, + 0x12, 0x18, 0x0a, 0x07, 0x73, 0x76, 0x63, 0x72, 0x65, 0x70, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, + 0x0d, 0x52, 0x07, 0x73, 0x76, 0x63, 0x72, 0x65, 0x70, 0x73, 0x22, 0x2a, 0x0a, 0x12, 0x50, 0x6f, + 0x6f, 0x6c, 0x46, 0x69, 0x6e, 0x64, 0x42, 0x79, 0x4c, 0x61, 0x62, 0x65, 0x6c, 0x52, 0x65, 0x71, + 0x12, 0x14, 0x0a, 0x05, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x05, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x22, 0x5b, 0x0a, 0x13, 0x50, 0x6f, 0x6f, 0x6c, 0x46, 0x69, + 0x6e, 0x64, 0x42, 0x79, 0x4c, 0x61, 0x62, 0x65, 0x6c, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, + 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, + 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x75, 0x75, 0x69, 0x64, 0x18, 0x02, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x04, 0x75, 0x75, 0x69, 0x64, 0x12, 0x18, 0x0a, 0x07, 0x73, 0x76, 0x63, + 0x72, 0x65, 0x70, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x07, 0x73, 0x76, 0x63, 0x72, + 0x65, 0x70, 0x73, 0x22, 0x12, 0x0a, 0x10, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x4c, 0x69, 0x73, 0x74, + 0x50, 0x6f, 0x6f, 0x6c, 0x52, 0x65, 0x71, 0x22, 0xb0, 0x01, 0x0a, 0x11, 0x43, 0x68, 0x65, 0x63, + 0x6b, 0x4c, 0x69, 0x73, 0x74, 0x50, 0x6f, 0x6f, 0x6c, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, + 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, + 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x34, 0x0a, 0x05, 0x70, 0x6f, 0x6f, 0x6c, 0x73, 0x18, 0x02, + 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1e, 0x2e, 0x73, 0x72, 0x76, 0x2e, 0x43, 0x68, 0x65, 0x63, 0x6b, + 0x4c, 0x69, 0x73, 0x74, 0x50, 0x6f, 0x6f, 0x6c, 0x52, 0x65, 0x73, 0x70, 0x2e, 0x4f, 0x6e, 0x65, + 0x50, 0x6f, 0x6f, 0x6c, 0x52, 0x05, 0x70, 0x6f, 0x6f, 0x6c, 0x73, 0x1a, 0x4d, 0x0a, 0x07, 0x4f, + 0x6e, 0x65, 0x50, 0x6f, 0x6f, 0x6c, 0x12, 0x12, 0x0a, 0x04, 0x75, 0x75, 0x69, 0x64, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x75, 0x75, 0x69, 0x64, 0x12, 0x14, 0x0a, 0x05, 0x6c, 0x61, + 0x62, 0x65, 0x6c, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6c, 0x61, 0x62, 0x65, 0x6c, + 0x12, 0x18, 0x0a, 0x07, 0x73, 0x76, 0x63, 0x72, 0x65, 0x70, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, + 0x0d, 0x52, 0x07, 0x73, 0x76, 0x63, 0x72, 0x65, 0x70, 0x73, 0x22, 0x67, 0x0a, 0x0f, 0x43, 0x68, + 0x65, 0x63, 0x6b, 0x52, 0x65, 0x67, 0x50, 0x6f, 0x6f, 0x6c, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, + 0x03, 0x73, 0x65, 0x71, 0x18, 0x01, 0x20, 0x01, 0x28, 0x04, 0x52, 0x03, 0x73, 0x65, 0x71, 0x12, + 0x12, 0x0a, 0x04, 0x75, 0x75, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x75, + 0x75, 0x69, 0x64, 0x12, 0x14, 0x0a, 0x05, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x18, 0x03, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x05, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x12, 0x18, 0x0a, 0x07, 0x73, 0x76, 0x63, + 0x72, 0x65, 0x70, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x07, 0x73, 0x76, 0x63, 0x72, + 0x65, 0x70, 0x73, 0x22, 0x2a, 0x0a, 0x10, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x52, 0x65, 0x67, 0x50, + 0x6f, 0x6f, 0x6c, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, + 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x22, + 0x39, 0x0a, 0x11, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x44, 0x65, 0x72, 0x65, 0x67, 0x50, 0x6f, 0x6f, + 0x6c, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x65, 0x71, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x04, 0x52, 0x03, 0x73, 0x65, 0x71, 0x12, 0x12, 0x0a, 0x04, 0x75, 0x75, 0x69, 0x64, 0x18, 0x02, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x75, 0x75, 0x69, 0x64, 0x22, 0x2c, 0x0a, 0x12, 0x43, 0x68, + 0x65, 0x63, 0x6b, 0x44, 0x65, 0x72, 0x65, 0x67, 0x50, 0x6f, 0x6f, 0x6c, 0x52, 0x65, 0x73, 0x70, + 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, + 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x22, 0x3a, 0x0a, 0x0e, 0x43, 0x68, 0x65, 0x63, + 0x6b, 0x52, 0x65, 0x70, 0x6f, 0x72, 0x74, 0x52, 0x65, 0x71, 0x12, 0x28, 0x0a, 0x06, 0x72, 0x65, + 0x70, 0x6f, 0x72, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x10, 0x2e, 0x63, 0x68, 0x6b, + 0x2e, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x52, 0x65, 0x70, 0x6f, 0x72, 0x74, 0x52, 0x06, 0x72, 0x65, + 0x70, 0x6f, 0x72, 0x74, 0x22, 0x29, 0x0a, 0x0f, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x52, 0x65, 0x70, + 0x6f, 0x72, 0x74, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, + 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x42, + 0x39, 0x5a, 0x37, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x64, 0x61, + 0x6f, 0x73, 0x2d, 0x73, 0x74, 0x61, 0x63, 0x6b, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2f, 0x73, 0x72, + 0x63, 0x2f, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x2f, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, + 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x73, 0x72, 0x76, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, + 0x6f, 0x33, } var ( @@ -398,20 +918,32 @@ func file_srv_srv_proto_rawDescGZIP() []byte { return file_srv_srv_proto_rawDescData } -var file_srv_srv_proto_msgTypes = make([]protoimpl.MessageInfo, 5) +var file_srv_srv_proto_msgTypes = make([]protoimpl.MessageInfo, 14) var file_srv_srv_proto_goTypes = []interface{}{ - (*NotifyReadyReq)(nil), // 0: srv.NotifyReadyReq - (*GetPoolSvcReq)(nil), // 1: srv.GetPoolSvcReq - (*GetPoolSvcResp)(nil), // 2: srv.GetPoolSvcResp - (*PoolFindByLabelReq)(nil), // 3: srv.PoolFindByLabelReq - (*PoolFindByLabelResp)(nil), // 4: srv.PoolFindByLabelResp + (*NotifyReadyReq)(nil), // 0: srv.NotifyReadyReq + (*GetPoolSvcReq)(nil), // 1: srv.GetPoolSvcReq + (*GetPoolSvcResp)(nil), // 2: srv.GetPoolSvcResp + (*PoolFindByLabelReq)(nil), // 3: srv.PoolFindByLabelReq + (*PoolFindByLabelResp)(nil), // 4: srv.PoolFindByLabelResp + (*CheckListPoolReq)(nil), // 5: srv.CheckListPoolReq + (*CheckListPoolResp)(nil), // 6: srv.CheckListPoolResp + (*CheckRegPoolReq)(nil), // 7: srv.CheckRegPoolReq + (*CheckRegPoolResp)(nil), // 8: srv.CheckRegPoolResp + (*CheckDeregPoolReq)(nil), // 9: srv.CheckDeregPoolReq + (*CheckDeregPoolResp)(nil), // 10: srv.CheckDeregPoolResp + (*CheckReportReq)(nil), // 11: srv.CheckReportReq + (*CheckReportResp)(nil), // 12: srv.CheckReportResp + (*CheckListPoolResp_OnePool)(nil), // 13: srv.CheckListPoolResp.OnePool + (*chk.CheckReport)(nil), // 14: chk.CheckReport } var file_srv_srv_proto_depIdxs = []int32{ - 0, // [0:0] is the sub-list for method output_type - 0, // [0:0] is the sub-list for method input_type - 0, // [0:0] is the sub-list for extension type_name - 0, // [0:0] is the sub-list for extension extendee - 0, // [0:0] is the sub-list for field type_name + 13, // 0: srv.CheckListPoolResp.pools:type_name -> srv.CheckListPoolResp.OnePool + 14, // 1: srv.CheckReportReq.report:type_name -> chk.CheckReport + 2, // [2:2] is the sub-list for method output_type + 2, // [2:2] is the sub-list for method input_type + 2, // [2:2] is the sub-list for extension type_name + 2, // [2:2] is the sub-list for extension extendee + 0, // [0:2] is the sub-list for field type_name } func init() { file_srv_srv_proto_init() } @@ -480,6 +1012,114 @@ func file_srv_srv_proto_init() { return nil } } + file_srv_srv_proto_msgTypes[5].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CheckListPoolReq); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_srv_srv_proto_msgTypes[6].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CheckListPoolResp); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_srv_srv_proto_msgTypes[7].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CheckRegPoolReq); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_srv_srv_proto_msgTypes[8].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CheckRegPoolResp); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_srv_srv_proto_msgTypes[9].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CheckDeregPoolReq); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_srv_srv_proto_msgTypes[10].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CheckDeregPoolResp); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_srv_srv_proto_msgTypes[11].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CheckReportReq); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_srv_srv_proto_msgTypes[12].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CheckReportResp); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_srv_srv_proto_msgTypes[13].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CheckListPoolResp_OnePool); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } } type x struct{} out := protoimpl.TypeBuilder{ @@ -487,7 +1127,7 @@ func file_srv_srv_proto_init() { GoPackagePath: reflect.TypeOf(x{}).PkgPath(), RawDescriptor: file_srv_srv_proto_rawDesc, NumEnums: 0, - NumMessages: 5, + NumMessages: 14, NumExtensions: 0, NumServices: 0, }, diff --git a/src/control/drpc/drpc_server.go b/src/control/drpc/drpc_server.go index 35f85ef5758..bf352ab3a19 100644 --- a/src/control/drpc/drpc_server.go +++ b/src/control/drpc/drpc_server.go @@ -24,7 +24,7 @@ import ( // we need to restrict the maximum message size so we can preallocate a // buffer to put all of the information in. Corresponding C definition is // found in include/daos/drpc.h -const MaxMsgSize = 1 << 17 +const MaxMsgSize = 1 << 20 // DomainSocketServer is the object that listens for incoming dRPC connections, // maintains the connections for sessions, and manages the message processing. diff --git a/src/control/drpc/modules.go b/src/control/drpc/modules.go index 5fd25a27334..a81192ff2a3 100644 --- a/src/control/drpc/modules.go +++ b/src/control/drpc/modules.go @@ -240,6 +240,16 @@ const ( MethodNotifyExit MgmtMethod = C.DRPC_METHOD_MGMT_NOTIFY_EXIT // MethodPoolGetProp defines a method for getting pool properties MethodPoolGetProp MgmtMethod = C.DRPC_METHOD_MGMT_POOL_GET_PROP + // MethodCheckerStart defines a method for starting the checker + MethodCheckerStart MgmtMethod = C.DRPC_METHOD_MGMT_CHK_START + // MethodCheckerStop defines a method for stopping the checker + MethodCheckerStop MgmtMethod = C.DRPC_METHOD_MGMT_CHK_STOP + // MethodCheckerQuery defines a method for getting the checker status + MethodCheckerQuery MgmtMethod = C.DRPC_METHOD_MGMT_CHK_QUERY + // MethodCheckerProp defines a method for getting the checker properties + MethodCheckerProp MgmtMethod = C.DRPC_METHOD_MGMT_CHK_PROP + // MethodCheckerAction defines a method for specifying a checker action + MethodCheckerAction MgmtMethod = C.DRPC_METHOD_MGMT_CHK_ACT // MethodPoolUpgrade defines a method for upgrade pool MethodPoolUpgrade MgmtMethod = C.DRPC_METHOD_MGMT_POOL_UPGRADE // MethodLedManage defines a method to manage a VMD device LED state @@ -287,6 +297,14 @@ const ( MethodPoolFindByLabel srvMethod = C.DRPC_METHOD_SRV_POOL_FIND_BYLABEL // MethodClusterEvent notifies of a cluster event in the I/O Engine. MethodClusterEvent srvMethod = C.DRPC_METHOD_SRV_CLUSTER_EVENT + // MethodCheckerListPools requests the list of pools from the MS + MethodCheckerListPools srvMethod = C.DRPC_METHOD_CHK_LIST_POOL + // MethodCheckerRegisterPool registers a pool with the MS + MethodCheckerRegisterPool srvMethod = C.DRPC_METHOD_CHK_REG_POOL + // MethodCheckerDeregisterPool deregisters a pool with the MS + MethodCheckerDeregisterPool srvMethod = C.DRPC_METHOD_CHK_DEREG_POOL + // MethodCheckerReport reports a checker finding to the MS + MethodCheckerReport srvMethod = C.DRPC_METHOD_CHK_REPORT ) type securityMethod int32 diff --git a/src/control/fault/code/codes.go b/src/control/fault/code/codes.go index f444453c6b7..a6389a862bf 100644 --- a/src/control/fault/code/codes.go +++ b/src/control/fault/code/codes.go @@ -214,3 +214,11 @@ const ( ControlMetadataUnknown Code = iota + 1000 ControlMetadataBadFilesystem ) + +// System Checker codes +const ( + SystemCheckerUnknown Code = iota + 1100 + SystemCheckerInvalidMemberStates + SystemCheckerNotEnabled + SystemCheckerEnabled +) diff --git a/src/control/go.mod b/src/control/go.mod index a242783e459..17c0702f6fb 100644 --- a/src/control/go.mod +++ b/src/control/go.mod @@ -4,6 +4,7 @@ go 1.17 require ( github.com/Jille/raft-grpc-transport v1.2.0 + github.com/desertbit/grumble v1.1.3 github.com/dustin/go-humanize v1.0.0 github.com/google/go-cmp v0.5.9 github.com/google/uuid v1.3.0 @@ -29,10 +30,16 @@ require ( github.com/armon/go-metrics v0.4.0 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/cespare/xxhash/v2 v2.2.0 // indirect + github.com/desertbit/closer/v3 v3.1.2 // indirect + github.com/desertbit/columnize v2.1.0+incompatible // indirect + github.com/desertbit/go-shlex v0.1.1 // indirect + github.com/desertbit/readline v1.5.1 // indirect github.com/fatih/color v1.13.0 // indirect github.com/golang/protobuf v1.5.3 // indirect + github.com/hashicorp/errwrap v1.1.0 // indirect github.com/hashicorp/go-immutable-radix v1.3.1 // indirect github.com/hashicorp/go-msgpack v1.1.5 // indirect + github.com/hashicorp/go-multierror v1.1.0 // indirect github.com/hashicorp/go-uuid v1.0.1 // indirect github.com/hashicorp/golang-lru v0.5.4 // indirect github.com/mattn/go-colorable v0.1.12 // indirect diff --git a/src/control/go.sum b/src/control/go.sum index 29241f3b195..c7534008e79 100644 --- a/src/control/go.sum +++ b/src/control/go.sum @@ -601,6 +601,8 @@ github.com/DataDog/datadog-go v2.2.0+incompatible/go.mod h1:LButxg5PwREeZtORoXG3 github.com/DataDog/datadog-go v3.2.0+incompatible/go.mod h1:LButxg5PwREeZtORoXG3tL4fMGNddJ+vMq1mwgfaqoQ= github.com/Jille/raft-grpc-transport v1.2.0 h1:W/YSPz8IsirEyomjKmDog5Xk71o9+l4KhyMEX2TsgSs= github.com/Jille/raft-grpc-transport v1.2.0/go.mod h1:GQGUXJfjlzwA390Ox1AyVYpjCLhtGd6yqY9Sb5hpQfc= +github.com/Netflix/go-expect v0.0.0-20180615182759-c93bf25de8e8/go.mod h1:oX5x61PbNXchhh0oikYAH+4Pcfw5LKv21+Jnpr6r6Pc= +github.com/Netflix/go-expect v0.0.0-20190729225929-0e00d9168667/go.mod h1:oX5x61PbNXchhh0oikYAH+4Pcfw5LKv21+Jnpr6r6Pc= github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c/go.mod h1:X0CRv0ky0k6m906ixxpzmDRLvX58TFUKS2eePweuyxk= github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= github.com/ajstarks/deck v0.0.0-20200831202436-30c9fc6549a9/go.mod h1:JynElWSGnm/4RlzPXRlREEwqTHAN3T56Bv2ITsFT3gY= @@ -634,10 +636,12 @@ github.com/cespare/xxhash v1.1.0 h1:a6HrQnmkObjyL+Gs60czilIUGqrzKutQD6XZog3p+ko= github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/cespare/xxhash/v2 v2.1.2/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/chzyer/logex v1.1.10 h1:Swpa1K6QvQznwJRcfTfQJmTE72DqScAa40E+fbHEXEE= github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= +github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1 h1:q763qf9huN11kDQavWsoZXJNW3xEE4JJyHa5Q25/sd8= github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= github.com/circonus-labs/circonus-gometrics v2.3.1+incompatible/go.mod h1:nmEj6Dob7S7YxXgwXpfOuvO54S+tGdZdw9fuRZt25Ag= github.com/circonus-labs/circonusllhist v0.1.3/go.mod h1:kMXHVDlOchFAehlya5ePtbp5jckzBHf4XRpQvBOLI+I= @@ -652,6 +656,20 @@ github.com/cncf/xds/go v0.0.0-20210805033703-aa0b78936158/go.mod h1:eXthEFrGJvWH github.com/cncf/xds/go v0.0.0-20210922020428-25de7278fc84/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= github.com/cncf/xds/go v0.0.0-20211001041855-01bcc9b48dfe/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= github.com/cncf/xds/go v0.0.0-20211011173535-cb28da3451f1/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= +github.com/creack/pty v1.1.7/go.mod h1:lj5s0c3V2DBrqTV7llrYr5NG6My20zk30Fl46Y7DoTY= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/desertbit/closer/v3 v3.1.2 h1:a6+2DmwIcNygW04XXWYq+Qp2X9uIk9QbZCP9//qEkb0= +github.com/desertbit/closer/v3 v3.1.2/go.mod h1:AAC4KRd8DC40nwvV967J/kDFhujMEiuwIKQfN0IDxXw= +github.com/desertbit/columnize v2.1.0+incompatible h1:h55rYmdrWoTj7w9aAnCkxzM3C2Eb8zuFa2W41t0o5j0= +github.com/desertbit/columnize v2.1.0+incompatible/go.mod h1:5kPrzQwKbQ8E5D28nvTVPqIBJyj+8jvJzwt6HXZvXgI= +github.com/desertbit/go-shlex v0.1.1 h1:c65HnbgX1QyC6kPL1dMzUpZ4puNUE6ai/eVucWNLNsk= +github.com/desertbit/go-shlex v0.1.1/go.mod h1:Qbb+mJNud5AypgHZ81EL8syOGaWlwvAOTqS7XmWI4pQ= +github.com/desertbit/grumble v1.1.3 h1:gbdgVGWsHmNraJ7Gn6Q4TiUEIHU/UHfbc1KUSbBlgYU= +github.com/desertbit/grumble v1.1.3/go.mod h1:r7j3ShNy5EmOsegRD2DzTutIaGiLiA3M5yBTXXeLwcs= +github.com/desertbit/readline v1.5.1 h1:/wOIZkWYl1s+IvJm/5bOknfUgs6MhS9svRNZpFM53Os= +github.com/desertbit/readline v1.5.1/go.mod h1:pHQgTsCFs9Cpfh5mlSUFi9Xa5kkL4d8L1Jo4UVWzPw0= github.com/cncf/xds/go v0.0.0-20220314180256-7f1daf1720fc/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= github.com/cncf/xds/go v0.0.0-20230105202645-06c439db220b/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= github.com/cncf/xds/go v0.0.0-20230607035331-e9ce68804cb4/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= @@ -678,9 +696,12 @@ github.com/envoyproxy/protoc-gen-validate v0.6.7/go.mod h1:dyJXwwfPK2VSqiB9Klm1J github.com/envoyproxy/protoc-gen-validate v0.9.1/go.mod h1:OKNgG7TCp5pF4d6XftA0++PMirau2/yoOwVac3AbF2w= github.com/envoyproxy/protoc-gen-validate v0.10.1/go.mod h1:DRjgyB0I43LtJapqN6NiRwroiAU2PaFuvk/vjgh61ss= github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4= +github.com/fatih/color v1.10.0/go.mod h1:ELkj/draVOlAH/xkhN6mQ50Qd0MPOk5AAr3maGEBuJM= github.com/fatih/color v1.12.0/go.mod h1:ELkj/draVOlAH/xkhN6mQ50Qd0MPOk5AAr3maGEBuJM= github.com/fatih/color v1.13.0 h1:8LOYc1KYPPmyKMuN8QV2DNRWNbLo6LZ0iLs8+mlH53w= github.com/fatih/color v1.13.0/go.mod h1:kLAiJbzzSOZDVNGyDpeOxJ47H46qBXwg5ILebYFFOfk= +github.com/flynn/go-shlex v0.0.0-20150515145356-3f9db97f8568 h1:BHsljHzVlRcyQhjrss6TZTdY2VfCqZPbv5k3iBFa2ZQ= +github.com/flynn/go-shlex v0.0.0-20150515145356-3f9db97f8568/go.mod h1:xEzjJPgXI435gkrCt3MPfRiAkVrwSbHsst4LCFVfpJc= github.com/fogleman/gg v1.2.1-0.20190220221249-0403632d5b90/go.mod h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k= github.com/fogleman/gg v1.3.0/go.mod h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= @@ -807,6 +828,9 @@ github.com/googleapis/gax-go/v2 v2.7.1/go.mod h1:4orTrqY6hXxxaUL4LHIPl6lGo8vAE38 github.com/googleapis/go-type-adapters v1.0.0/go.mod h1:zHW75FOG2aur7gAO2B+MLby+cLsWGBF62rFAi7WjWO4= github.com/googleapis/google-cloud-go-testing v0.0.0-20200911160855-bcd43fbb19e8/go.mod h1:dvDLG8qkwmyD9a/MJJN3XJcT3xFxOKAvTZGvuZmac9g= github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= +github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= +github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I= +github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.0/go.mod h1:hgWBS7lorOAVIJEQMi4ZsPv9hVvWI6+ch50m39Pf2Ks= github.com/grpc-ecosystem/grpc-gateway/v2 v2.11.3/go.mod h1:o//XUCC/F+yRGJoPO/VU0GSB0f8Nhgmxx0VIRUvaC0w= github.com/hashicorp/go-cleanhttp v0.5.0/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80= @@ -820,6 +844,9 @@ github.com/hashicorp/go-immutable-radix v1.3.1/go.mod h1:0y9vanUI8NX6FsYoO3zeMjh github.com/hashicorp/go-msgpack v0.5.5/go.mod h1:ahLV/dePpqEmjfWmKiqvPkv/twdG7iPBM1vqhUKIvfM= github.com/hashicorp/go-msgpack v1.1.5 h1:9byZdVjKTe5mce63pRVNP1L7UAmdHOTEMGehn6KvJWs= github.com/hashicorp/go-msgpack v1.1.5/go.mod h1:gWVc3sv/wbDmR3rQsj1CAktEZzoz1YNK9NfGLXJ69/4= +github.com/hashicorp/go-multierror v1.0.0/go.mod h1:dHtQlpGsu+cZNNAkkCN/P3hoUDHhCYQXV3UM06sGGrk= +github.com/hashicorp/go-multierror v1.1.0 h1:B9UzwGQJehnUY1yNrnwREHc3fGbC2xefo8g4TbElacI= +github.com/hashicorp/go-multierror v1.1.0/go.mod h1:spPvp8C1qA32ftKqdAHm4hHTbPw+vmowP0z+KUhOZdA= github.com/hashicorp/go-retryablehttp v0.5.3/go.mod h1:9B5zBasrRhHXnJnui7y6sL7es7NDiJgTc6Er0maI1Xs= github.com/hashicorp/go-uuid v1.0.0/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= github.com/hashicorp/go-uuid v1.0.1 h1:fv1ep09latC32wFoVwnqcnKJGnMSdBanPczbHAYm1BE= @@ -834,6 +861,8 @@ github.com/hashicorp/raft v1.3.9 h1:9yuo1aR0bFTr1cw7pj3S2Bk6MhJCsnr2NAxvIBrP2x4= github.com/hashicorp/raft v1.3.9/go.mod h1:4Ak7FSPnuvmb0GV6vgIAJ4vYT4bek9bb6Q+7HVbyzqM= github.com/hashicorp/raft-boltdb/v2 v2.0.0-20210409134258-03c10cc3d4ea h1:pXD01QLdHmn4Ij82g1vksWbZXwSH6il7Svrm/rdUk18= github.com/hashicorp/raft-boltdb/v2 v2.0.0-20210409134258-03c10cc3d4ea/go.mod h1:kiPs9g148eLShc2TYagUAyKDnD+dH9U+CQKsXzlY9xo= +github.com/hinshun/vt10x v0.0.0-20180616224451-1954e6464174/go.mod h1:DqJ97dSdRW1W22yXSB90986pcOyQ7r45iio1KN2ez1A= +github.com/hinshun/vt10x v0.0.0-20180809195222-d55458df857c/go.mod h1:DqJ97dSdRW1W22yXSB90986pcOyQ7r45iio1KN2ez1A= github.com/iancoleman/strcase v0.2.0/go.mod h1:iwCmte+B7n89clKwxIoIXy/HfoL7AsD47ZCWhYzw7ho= github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= @@ -866,8 +895,10 @@ github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfn github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0= github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/pty v1.1.8/go.mod h1:O1sed60cT9XZ5uDucP5qwvh+TE3NnUj51EiZO/lmSfw= github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/lyft/protoc-gen-star v0.6.0/go.mod h1:TGAoBVkt8w7MPG72TrKIu85MIdXwDuzJYeZuUPFPNwA= @@ -878,6 +909,7 @@ github.com/mattn/go-colorable v0.1.8/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope github.com/mattn/go-colorable v0.1.9/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= github.com/mattn/go-colorable v0.1.12 h1:jF+Du6AlPIjs2BiUiQlKOX0rt3SujHxPnksPKZbaA40= github.com/mattn/go-colorable v0.1.12/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb520KVy5xxl4= +github.com/mattn/go-isatty v0.0.3/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4= github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s= github.com/mattn/go-isatty v0.0.10/go.mod h1:qgIWMr58cqv1PHHyhnkY9lrL7etaEgOFcMEpPG5Rm84= github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= @@ -888,6 +920,7 @@ github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/ github.com/mattn/go-sqlite3 v1.14.14/go.mod h1:NyWgC/yNuGj7Q9rpYnZvas74GogHl5/Z4A/KQRfk6bU= github.com/matttproud/golang_protobuf_extensions v1.0.1 h1:4hp9jkHxhMHkqkrB3Ix0jegS5sx/RkqARlsWZ6pIwiU= github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= +github.com/mgutz/ansi v0.0.0-20170206155736-9520e82c474b/go.mod h1:01TrycV0kFyexm33Z7vhZRXopbI8J3TDReVlkTgMUxE= github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8/go.mod h1:mC1jAcsrzbxHt8iiaC+zU4b1ylILSosueou12R++wfY= github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3/go.mod h1:RagcQ7I8IeTMnF8JTXieKnO4Z6JCsikNEzj0DwauVzE= github.com/mitchellh/hashstructure/v2 v2.0.2 h1:vGKWl0YJqUNxE8d+h8f6NJLcCJrgbhC4NcD46KavDd4= @@ -899,6 +932,7 @@ github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3Rllmb github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= +github.com/nbutton23/zxcvbn-go v0.0.0-20180912185939-ae427f1e4c1d/go.mod h1:o96djdrsSGy3AWPyBgZMAGfxZNfgntdJG+11KU4QvbU= github.com/pascaldekloe/goe v0.1.0 h1:cBOtyMzM9HTpWjXfbbunk26uA6nG3a8n06Wieeh0MwY= github.com/pascaldekloe/goe v0.1.0/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= github.com/phpdave11/gofpdf v1.4.2/go.mod h1:zpO6xFn9yxo3YLyMvW8HcKWVdbNqgIfOOp2dXMnm1mY= @@ -960,6 +994,7 @@ github.com/spf13/afero v1.6.0/go.mod h1:Ai8FlHk4v/PARR026UzYexafAt9roJ7LcLMAmO6Z github.com/spf13/afero v1.9.2/go.mod h1:iUV7ddyEEZPO5gA3zD4fJt6iStLlL+Lg4m2cihcDf8Y= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.2.1/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= @@ -999,9 +1034,11 @@ go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqe go.opentelemetry.io/proto/otlp v0.15.0/go.mod h1:H7XAot3MsfNsj7EXtrA2q5xSNQ10UqI405h3+duxN4U= go.opentelemetry.io/proto/otlp v0.19.0/go.mod h1:H7XAot3MsfNsj7EXtrA2q5xSNQ10UqI405h3+duxN4U= golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= +golang.org/x/crypto v0.0.0-20190123085648-057139ce5d2b/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20190820162420-60c769a6c586/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= @@ -1173,6 +1210,7 @@ golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220601150217-0de741cfad7f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20180606202747-9527bec2660b/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sync v0.0.0-20220819030929-7fc1605a5dde/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220929204114-8fcdb60fdcc0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -1212,7 +1250,9 @@ golang.org/x/sys v0.0.0-20200625212154-ddb9806d33ae/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20200803210538-64077c9b5642/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200905004654-be1d3432aa8f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201009025420-dfb3f7c4e634/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201214210602-f9fddec55a1e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201201145000-ef89a241ccb3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210104204734-6f8348627aad/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210119212857-b64e53b001e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -1635,6 +1675,7 @@ google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp0 google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= +gopkg.in/AlecAivazis/survey.v1 v1.8.5/go.mod h1:iBNOmqKz/NUbZx3bA+4hAGLRC7fSK7tgtVDT4tB22XA= google.golang.org/protobuf v1.28.1/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= google.golang.org/protobuf v1.29.1/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= google.golang.org/protobuf v1.30.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= diff --git a/src/control/lib/control/check.go b/src/control/lib/control/check.go new file mode 100644 index 00000000000..af50f17bacb --- /dev/null +++ b/src/control/lib/control/check.go @@ -0,0 +1,727 @@ +// +// (C) Copyright 2022-2023 Intel Corporation. +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// + +package control + +import ( + "context" + "encoding/json" + "fmt" + "strings" + "time" + + "github.com/pkg/errors" + "google.golang.org/grpc" + "google.golang.org/protobuf/proto" + + "github.com/daos-stack/daos/src/control/common" + pbutil "github.com/daos-stack/daos/src/control/common/proto" + chkpb "github.com/daos-stack/daos/src/control/common/proto/chk" + mgmtpb "github.com/daos-stack/daos/src/control/common/proto/mgmt" + "github.com/daos-stack/daos/src/control/lib/ranklist" +) + +type SystemCheckEnableReq struct { + unaryRequest + msRequest + + mgmtpb.CheckEnableReq +} + +// SystemCheckEnable enables the system checker. +func SystemCheckEnable(ctx context.Context, rpcClient UnaryInvoker, req *SystemCheckEnableReq) error { + if req == nil { + return errors.Errorf("nil %T", req) + } + + req.CheckEnableReq.Sys = req.getSystem(rpcClient) + req.setRPC(func(ctx context.Context, conn *grpc.ClientConn) (proto.Message, error) { + return mgmtpb.NewMgmtSvcClient(conn).SystemCheckEnable(ctx, &req.CheckEnableReq) + }) + + rpcClient.Debugf("DAOS system checker enable request: %s", pbutil.Debug(&req.CheckEnableReq)) + ur, err := rpcClient.InvokeUnaryRPC(ctx, req) + if err != nil { + return err + } + + return ur.getMSError() +} + +type SystemCheckDisableReq struct { + unaryRequest + msRequest + + mgmtpb.CheckDisableReq +} + +// SystemCheckDisable disables the system checker. +func SystemCheckDisable(ctx context.Context, rpcClient UnaryInvoker, req *SystemCheckDisableReq) error { + if req == nil { + return errors.Errorf("nil %T", req) + } + + req.CheckDisableReq.Sys = req.getSystem(rpcClient) + req.setRPC(func(ctx context.Context, conn *grpc.ClientConn) (proto.Message, error) { + return mgmtpb.NewMgmtSvcClient(conn).SystemCheckDisable(ctx, &req.CheckDisableReq) + }) + + rpcClient.Debugf("DAOS system checker disable request: %s", pbutil.Debug(&req.CheckDisableReq)) + ur, err := rpcClient.InvokeUnaryRPC(ctx, req) + if err != nil { + return err + } + + return ur.getMSError() +} + +const ( + SystemCheckFlagDryRun = SystemCheckFlags(chkpb.CheckFlag_CF_DRYRUN) + SystemCheckFlagReset = SystemCheckFlags(chkpb.CheckFlag_CF_RESET) + SystemCheckFlagFailout = SystemCheckFlags(chkpb.CheckFlag_CF_FAILOUT) + SystemCheckFlagAuto = SystemCheckFlags(chkpb.CheckFlag_CF_AUTO) + SystemCheckFlagFindOrphans = SystemCheckFlags(chkpb.CheckFlag_CF_ORPHAN_POOL) + SystemCheckFlagDisableFailout = SystemCheckFlags(chkpb.CheckFlag_CF_NO_FAILOUT) + SystemCheckFlagDisableAuto = SystemCheckFlags(chkpb.CheckFlag_CF_NO_AUTO) + + incClassPrefix = "CIC_" + incActionPrefix = "CIA_" + incStatusPrefix = "CIS_" + incScanPhasePrefix = "CSP_" +) + +type SystemCheckFlags uint32 + +func (f SystemCheckFlags) String() string { + var flags []string + if f&SystemCheckFlagDryRun != 0 { + flags = append(flags, "dry-run") + } + if f&SystemCheckFlagReset != 0 { + flags = append(flags, "reset") + } + if f&SystemCheckFlagFailout != 0 { + flags = append(flags, "failout") + } + if f&SystemCheckFlagAuto != 0 { + flags = append(flags, "auto") + } + if f&SystemCheckFlagFindOrphans != 0 { + flags = append(flags, "find-pool-orphans") + } + if f&SystemCheckFlagDisableFailout != 0 { + flags = append(flags, "reset-failout") + } + if f&SystemCheckFlagDisableAuto != 0 { + flags = append(flags, "reset-auto") + } + if len(flags) == 0 { + return "none" + } + return strings.Join(flags, ",") +} + +func (f SystemCheckFlags) MarshalJSON() ([]byte, error) { + return []byte(`"` + f.String() + `"`), nil +} + +type SystemCheckFindingClass chkpb.CheckInconsistClass + +func (c SystemCheckFindingClass) String() string { + return strings.TrimPrefix(chkpb.CheckInconsistClass(c).String(), incClassPrefix) +} + +func (c *SystemCheckFindingClass) FromString(in string) error { + if !strings.HasPrefix(in, incClassPrefix) { + in = incClassPrefix + in + } + if cls, ok := chkpb.CheckInconsistClass_value[in]; ok { + *c = SystemCheckFindingClass(cls) + return nil + } + return errors.Errorf("invalid inconsistency class %q", in) +} + +type SystemCheckRepairAction chkpb.CheckInconsistAction + +func (a SystemCheckRepairAction) String() string { + return strings.TrimPrefix(chkpb.CheckInconsistAction(a).String(), incActionPrefix) +} + +func (a *SystemCheckRepairAction) FromString(in string) error { + if !strings.HasPrefix(in, incActionPrefix) { + in = incActionPrefix + in + } + if act, ok := chkpb.CheckInconsistAction_value[in]; ok { + *a = SystemCheckRepairAction(act) + return nil + } + return errors.Errorf("invalid inconsistency action %q", in) +} + +type SystemCheckPolicy struct { + FindingClass SystemCheckFindingClass + RepairAction SystemCheckRepairAction +} + +func NewSystemCheckPolicy(cls, act string) (*SystemCheckPolicy, error) { + p := &SystemCheckPolicy{} + if err := p.FindingClass.FromString(cls); err != nil { + return nil, err + } + if err := p.RepairAction.FromString(act); err != nil { + return nil, err + } + return p, nil +} + +func (p *SystemCheckPolicy) String() string { + return p.FindingClass.String() + ":" + p.RepairAction.String() +} + +func (p *SystemCheckPolicy) MarshalJSON() ([]byte, error) { + return []byte(`"` + p.String() + `"`), nil +} + +func (p *SystemCheckPolicy) UnmarshalJSON(in []byte) error { + parts := strings.Split(strings.Trim(string(in), `"`), ":") + if len(parts) != 2 { + return errors.Errorf("invalid policy %q", in) + } + pol, err := NewSystemCheckPolicy(parts[0], parts[1]) + if err != nil { + return err + } + *p = *pol + + return nil +} + +func (p *SystemCheckPolicy) toPB() *mgmtpb.CheckInconsistPolicy { + return &mgmtpb.CheckInconsistPolicy{ + InconsistCas: chkpb.CheckInconsistClass(p.FindingClass), + InconsistAct: chkpb.CheckInconsistAction(p.RepairAction), + } +} + +func policyFromPB(pb *mgmtpb.CheckInconsistPolicy) *SystemCheckPolicy { + return &SystemCheckPolicy{ + FindingClass: SystemCheckFindingClass(pb.InconsistCas), + RepairAction: SystemCheckRepairAction(pb.InconsistAct), + } +} + +func CheckerPolicyClasses() []SystemCheckFindingClass { + classes := make([]SystemCheckFindingClass, 0, len(chkpb.CheckInconsistClass_value)) + for _, val := range chkpb.CheckInconsistClass_value { + classes = append(classes, SystemCheckFindingClass(val)) + } + return classes +} + +func CheckerPolicyActions() []SystemCheckRepairAction { + actions := make([]SystemCheckRepairAction, 0, len(chkpb.CheckInconsistAction_value)) + for _, val := range chkpb.CheckInconsistAction_value { + actions = append(actions, SystemCheckRepairAction(val)) + } + return actions +} + +type SystemCheckStartReq struct { + unaryRequest + msRequest + + Policies []*SystemCheckPolicy + mgmtpb.CheckStartReq +} + +func checkSetFlags(setFlags uint32, incompatFlags ...chkpb.CheckFlag) error { + strFlags := make([]string, 0, len(incompatFlags)) + for _, flag := range incompatFlags { + if setFlags&uint32(flag) != 0 { + strFlags = append(strFlags, flag.String()) + } + } + if len(strFlags) <= 1 { + return nil + } + + return errors.Errorf("flags %s are mutually exclusive", strings.Join(strFlags, ", ")) +} + +// SystemCheckStart starts the system checker. +func SystemCheckStart(ctx context.Context, rpcClient UnaryInvoker, req *SystemCheckStartReq) error { + if req == nil { + return errors.Errorf("nil %T", req) + } + if err := checkSetFlags(req.Flags, chkpb.CheckFlag_CF_FAILOUT, chkpb.CheckFlag_CF_NO_FAILOUT); err != nil { + return err + } + if err := checkSetFlags(req.Flags, chkpb.CheckFlag_CF_AUTO, chkpb.CheckFlag_CF_NO_AUTO); err != nil { + return err + } + + req.CheckStartReq.Sys = req.getSystem(rpcClient) + for _, p := range req.Policies { + req.CheckStartReq.Policies = append(req.CheckStartReq.Policies, p.toPB()) + } + req.setRPC(func(ctx context.Context, conn *grpc.ClientConn) (proto.Message, error) { + return mgmtpb.NewMgmtSvcClient(conn).SystemCheckStart(ctx, &req.CheckStartReq) + }) + + rpcClient.Debugf("DAOS system check start request: %s", pbutil.Debug(&req.CheckStartReq)) + ur, err := rpcClient.InvokeUnaryRPC(ctx, req) + if err != nil { + return err + } + + return ur.getMSError() +} + +type SystemCheckStopReq struct { + unaryRequest + msRequest + + mgmtpb.CheckStopReq +} + +// SystemCheckStop stops the system checker. +func SystemCheckStop(ctx context.Context, rpcClient UnaryInvoker, req *SystemCheckStopReq) error { + if req == nil { + return errors.Errorf("nil %T", req) + } + + req.CheckStopReq.Sys = req.getSystem(rpcClient) + req.setRPC(func(ctx context.Context, conn *grpc.ClientConn) (proto.Message, error) { + return mgmtpb.NewMgmtSvcClient(conn).SystemCheckStop(ctx, &req.CheckStopReq) + }) + + rpcClient.Debugf("DAOS system check stop request: %s", pbutil.Debug(&req.CheckStopReq)) + ur, err := rpcClient.InvokeUnaryRPC(ctx, req) + if err != nil { + return err + } + + return ur.getMSError() +} + +type SystemCheckQueryReq struct { + unaryRequest + msRequest + + mgmtpb.CheckQueryReq +} + +type SystemCheckStatus chkpb.CheckInstStatus + +func (s SystemCheckStatus) String() string { + return strings.TrimPrefix(chkpb.CheckInstStatus(s).String(), incStatusPrefix) +} + +func (p SystemCheckStatus) MarshalJSON() ([]byte, error) { + return []byte(`"` + p.String() + `"`), nil +} + +const ( + SystemCheckStatusInit = SystemCheckStatus(chkpb.CheckInstStatus_CIS_INIT) + SystemCheckStatusRunning = SystemCheckStatus(chkpb.CheckInstStatus_CIS_RUNNING) + SystemCheckStatusCompleted = SystemCheckStatus(chkpb.CheckInstStatus_CIS_COMPLETED) + SystemCheckStatusStopped = SystemCheckStatus(chkpb.CheckInstStatus_CIS_STOPPED) + SystemCheckStatusFailed = SystemCheckStatus(chkpb.CheckInstStatus_CIS_FAILED) + SystemCheckStatusPaused = SystemCheckStatus(chkpb.CheckInstStatus_CIS_PAUSED) + SystemCheckStatusImplicated = SystemCheckStatus(chkpb.CheckInstStatus_CIS_IMPLICATED) +) + +type SystemCheckScanPhase chkpb.CheckScanPhase + +func (p SystemCheckScanPhase) String() string { + return strings.TrimPrefix(chkpb.CheckScanPhase(p).String(), incScanPhasePrefix) +} + +func (p SystemCheckScanPhase) Description() string { + switch p { + case SystemCheckScanPhasePrepare: + return "Preparing check engine" + case SystemCheckScanPhasePoolList: + return "Comparing pool list on MS and storage nodes" + case SystemCheckScanPhasePoolMembership: + return "Comparing pool membership on MS and storage nodes" + case SystemCheckScanPhasePoolCleanup: + return "Cleaning up pool entries" + case SystemCheckScanPhaseContainerList: + return "Comparing container list on PS and storage nodes" + case SystemCheckScanPhaseContainerCleanup: + return "Cleaning up container entries" + case SystemCheckScanPhaseDtxResync: + return "DTX resync and cleanup" + case SystemCheckScanPhaseObjectScrub: + return "Scrubbing objects" + case SystemCheckScanPhaseObjectRebuild: + return "Rebuilding objects" + case SystemCheckScanPhaseAggregation: + return "EC and VOS aggregation" + case SystemCheckScanPhaseDone: + return "Check completed" + default: + return fmt.Sprintf("Unknown (%s)", p) + } +} + +func (p SystemCheckScanPhase) MarshalJSON() ([]byte, error) { + return []byte(`"` + p.String() + `"`), nil +} + +const ( + SystemCheckScanPhasePrepare = SystemCheckScanPhase(chkpb.CheckScanPhase_CSP_PREPARE) + SystemCheckScanPhasePoolList = SystemCheckScanPhase(chkpb.CheckScanPhase_CSP_POOL_LIST) + SystemCheckScanPhasePoolMembership = SystemCheckScanPhase(chkpb.CheckScanPhase_CSP_POOL_MBS) + SystemCheckScanPhasePoolCleanup = SystemCheckScanPhase(chkpb.CheckScanPhase_CSP_POOL_CLEANUP) + SystemCheckScanPhaseContainerList = SystemCheckScanPhase(chkpb.CheckScanPhase_CSP_CONT_LIST) + SystemCheckScanPhaseContainerCleanup = SystemCheckScanPhase(chkpb.CheckScanPhase_CSP_CONT_CLEANUP) + SystemCheckScanPhaseDtxResync = SystemCheckScanPhase(chkpb.CheckScanPhase_CSP_DTX_RESYNC) + SystemCheckScanPhaseObjectScrub = SystemCheckScanPhase(chkpb.CheckScanPhase_CSP_OBJ_SCRUB) + SystemCheckScanPhaseObjectRebuild = SystemCheckScanPhase(chkpb.CheckScanPhase_CSP_REBUILD) + SystemCheckScanPhaseAggregation = SystemCheckScanPhase(chkpb.CheckScanPhase_CSP_AGGREGATION) + SystemCheckScanPhaseDone = SystemCheckScanPhase(chkpb.CheckScanPhase_CSP_DONE) +) + +// SystemCheckRepairChoice describes a possible means to repair a checker error. +type SystemCheckRepairChoice struct { + Action SystemCheckRepairAction + Info string +} + +// SystemCheckReport contains the results of a system check. +type SystemCheckReport struct { + chkpb.CheckReport +} + +// RepairChoices lists all possible repair options for this particular report. +func (r *SystemCheckReport) RepairChoices() []*SystemCheckRepairChoice { + if r == nil { + return nil + } + + choices := make([]*SystemCheckRepairChoice, len(r.ActChoices)) + for i, c := range r.ActChoices { + info := r.ActMsgs[i] + // FIXME DAOS-12189: Use the details instead because the messages + // are too generic. Longer-term, only the messages should be + // user-visible. + if len(strings.Fields(r.ActDetails[i])) > 1 { + info = r.ActDetails[i] + } + choices[i] = &SystemCheckRepairChoice{ + Action: SystemCheckRepairAction(c), + Info: info, + } + } + + return choices +} + +// IsInteractive indicates whether this report requires user interaction to make a repair choice. +func (r *SystemCheckReport) IsInteractive() bool { + return r.Action == chkpb.CheckInconsistAction_CIA_INTERACT +} + +// IsRemovedPool indicates whether the error detected in this report indicates a missing pool. +func (r *SystemCheckReport) IsRemovedPool() bool { + return r.Action == chkpb.CheckInconsistAction_CIA_DISCARD && + (r.Class == chkpb.CheckInconsistClass_CIC_POOL_NONEXIST_ON_ENGINE || + r.Class == chkpb.CheckInconsistClass_CIC_POOL_NONEXIST_ON_MS) +} + +// Resolution returns a string describing the action taken to resolve this report. +func (r *SystemCheckReport) Resolution() string { + msg := SystemCheckRepairAction(r.Action).String() + if len(r.ActMsgs) == 1 { + msg += ": " + r.ActMsgs[0] + } + return msg +} + +type rawRankMap map[ranklist.Rank]*mgmtpb.CheckQueryPool + +type SystemCheckPoolInfo struct { + RawRankInfo rawRankMap `json:"-"` + UUID string `json:"uuid"` + Label string `json:"label"` + Status string `json:"status"` + Phase string `json:"phase"` + StartTime time.Time `json:"-"` + Remaining time.Duration `json:"-"` + Elapsed time.Duration `json:"-"` +} + +func (p *SystemCheckPoolInfo) MarshalJSON() ([]byte, error) { + type toJSON SystemCheckPoolInfo + return json.Marshal(&struct { + *toJSON + RankCount int `json:"rank_count"` + StartTime string `json:"start_time"` + Remaining float64 `json:"remaining"` + Elapsed float64 `json:"elapsed"` + }{ + toJSON: (*toJSON)(p), + RankCount: len(p.RawRankInfo), + StartTime: common.FormatTime(p.StartTime), + Remaining: p.Remaining.Seconds(), + Elapsed: p.Elapsed.Seconds(), + }) +} + +func (p *SystemCheckPoolInfo) String() string { + var remOrElapsed string + if p.Elapsed > 0 { + remOrElapsed = fmt.Sprintf(" elapsed: %s", p.Elapsed) + } else if p.Remaining > 0 { + remOrElapsed = fmt.Sprintf(" remaining: %s", p.Remaining) + } + timeStr := "" + if !p.StartTime.IsZero() { + timeStr = fmt.Sprintf(", started: %s%s", common.FormatTime(p.StartTime), remOrElapsed) + } + return fmt.Sprintf("Pool %s: %d ranks, status: %s, phase: %s%s", + p.UUID, len(p.RawRankInfo), p.Status, p.Phase, timeStr) +} + +func (p *SystemCheckPoolInfo) Unchecked() bool { + return p.Status == chkpb.CheckPoolStatus_CPS_UNCHECKED.String() +} + +func getQueryPoolRank(pool *mgmtpb.CheckQueryPool) ranklist.Rank { + if len(pool.Targets) == 0 { + return ranklist.NilRank + } + return ranklist.Rank(pool.Targets[0].Rank) +} + +func roe(f string, status chkpb.CheckPoolStatus, val uint64) time.Duration { + if f == "r" && status != chkpb.CheckPoolStatus_CPS_CHECKING { + return 0 + } + if f == "e" && status == chkpb.CheckPoolStatus_CPS_CHECKING { + return 0 + } + return time.Duration(val) * time.Second +} + +func getPoolCheckInfo(pbPools []*mgmtpb.CheckQueryPool) map[string]*SystemCheckPoolInfo { + pools := make(map[string]*SystemCheckPoolInfo) + + for _, pbPool := range pbPools { + if _, found := pools[pbPool.Uuid]; !found { + pools[pbPool.Uuid] = &SystemCheckPoolInfo{ + RawRankInfo: make(rawRankMap), + UUID: pbPool.Uuid, + // For the moment, ignore potential differences in these details + // across multiple ranks. + Status: pbPool.Status.String(), + Phase: pbPool.Phase.String(), + StartTime: time.Unix(int64(pbPool.Time.StartTime), 0), + Remaining: roe("r", pbPool.Status, pbPool.Time.MiscTime), + Elapsed: roe("e", pbPool.Status, pbPool.Time.MiscTime), + } + } + pools[pbPool.Uuid].RawRankInfo[getQueryPoolRank(pbPool)] = pbPool + } + + return pools +} + +type SystemCheckQueryResp struct { + Status SystemCheckStatus `json:"status"` + ScanPhase SystemCheckScanPhase `json:"scan_phase"` + StartTime time.Time `json:"start_time"` + + Pools map[string]*SystemCheckPoolInfo `json:"pools"` + Reports []*SystemCheckReport `json:"reports"` +} + +func (r *SystemCheckQueryResp) MarshalJSON() ([]byte, error) { + type toJSON SystemCheckQueryResp + return json.Marshal(struct { + StartTime string `json:"start_time"` + *toJSON + }{ + StartTime: common.FormatTime(r.StartTime), + toJSON: (*toJSON)(r), + }) +} + +// SystemCheckQuery queries the system checker status. +func SystemCheckQuery(ctx context.Context, rpcClient UnaryInvoker, req *SystemCheckQueryReq) (*SystemCheckQueryResp, error) { + if req == nil { + return nil, errors.Errorf("nil %T", req) + } + + req.CheckQueryReq.Sys = req.getSystem(rpcClient) + req.setRPC(func(ctx context.Context, conn *grpc.ClientConn) (proto.Message, error) { + return mgmtpb.NewMgmtSvcClient(conn).SystemCheckQuery(ctx, &req.CheckQueryReq) + }) + + rpcClient.Debugf("DAOS system check query request: %s", pbutil.Debug(&req.CheckQueryReq)) + ur, err := rpcClient.InvokeUnaryRPC(ctx, req) + if err != nil { + return nil, err + } + pbResp := new(mgmtpb.CheckQueryResp) + if err := convertMSResponse(ur, pbResp); err != nil { + return nil, err + } + + resp := &SystemCheckQueryResp{ + Status: SystemCheckStatus(pbResp.GetInsStatus()), + ScanPhase: SystemCheckScanPhase(pbResp.GetInsPhase()), + StartTime: time.Unix(int64(pbResp.GetTime().GetStartTime()), 0), + Pools: getPoolCheckInfo(pbResp.GetPools()), + } + for _, pbReport := range pbResp.GetReports() { + rpt := new(SystemCheckReport) + proto.Merge(rpt, pbReport) + resp.Reports = append(resp.Reports, rpt) + } + return resp, nil +} + +type SystemCheckGetPolicyReq struct { + unaryRequest + msRequest + + mgmtpb.CheckGetPolicyReq +} + +func (r *SystemCheckGetPolicyReq) SetClasses(classes []SystemCheckFindingClass) { + for _, cls := range classes { + r.CheckGetPolicyReq.Classes = append(r.CheckGetPolicyReq.Classes, chkpb.CheckInconsistClass(cls)) + } +} + +type SystemCheckGetPolicyResp struct { + CheckerFlags SystemCheckFlags `json:"checker_flags"` + Policies []*SystemCheckPolicy `json:"policies"` +} + +// SystemCheckGetPolicy queries the system checker properties. +func SystemCheckGetPolicy(ctx context.Context, rpcClient UnaryInvoker, req *SystemCheckGetPolicyReq) (*SystemCheckGetPolicyResp, error) { + if req == nil { + return nil, errors.Errorf("nil %T", req) + } + + req.CheckGetPolicyReq.Sys = req.getSystem(rpcClient) + req.setRPC(func(ctx context.Context, conn *grpc.ClientConn) (proto.Message, error) { + return mgmtpb.NewMgmtSvcClient(conn).SystemCheckGetPolicy(ctx, &req.CheckGetPolicyReq) + }) + + rpcClient.Debugf("DAOS system check get-policy request: %s", pbutil.Debug(&req.CheckGetPolicyReq)) + ur, err := rpcClient.InvokeUnaryRPC(ctx, req) + if err != nil { + return nil, err + } + ms, err := ur.getMSResponse() + if err != nil { + return nil, err + } + + resp := new(SystemCheckGetPolicyResp) + if pbResp, ok := ms.(*mgmtpb.CheckGetPolicyResp); ok { + resp.CheckerFlags = SystemCheckFlags(pbResp.Flags) + for _, p := range pbResp.Policies { + resp.Policies = append(resp.Policies, policyFromPB(p)) + } + } else { + return nil, errors.Errorf("unexpected response type %T", ms) + } + return resp, nil +} + +type SystemCheckSetPolicyReq struct { + unaryRequest + msRequest + + ResetToDefaults bool + AllInteractive bool + Policies []*SystemCheckPolicy + mgmtpb.CheckSetPolicyReq +} + +// SystemCheckSetPolicy sets the system checker properties. +func SystemCheckSetPolicy(ctx context.Context, rpcClient UnaryInvoker, req *SystemCheckSetPolicyReq) error { + switch { + case req == nil: + return errors.Errorf("nil %T", req) + case len(req.Policies) == 0 && !(req.AllInteractive || req.ResetToDefaults): + return errors.New("no policies specified") + case len(req.Policies) > 0 && (req.AllInteractive || req.ResetToDefaults): + return errors.New("cannot specify policy list and AllInteractive or ResetToDefaults") + } + + req.CheckSetPolicyReq.Sys = req.getSystem(rpcClient) + if req.AllInteractive || req.ResetToDefaults { + action := chkpb.CheckInconsistAction_CIA_INTERACT + if req.ResetToDefaults { + action = chkpb.CheckInconsistAction_CIA_DEFAULT + } + for _, cls := range CheckerPolicyClasses() { + req.CheckSetPolicyReq.Policies = append(req.CheckSetPolicyReq.Policies, &mgmtpb.CheckInconsistPolicy{ + InconsistCas: chkpb.CheckInconsistClass(cls), + InconsistAct: action, + }) + } + } else { + for _, p := range req.Policies { + req.CheckSetPolicyReq.Policies = append(req.CheckSetPolicyReq.Policies, p.toPB()) + } + } + req.setRPC(func(ctx context.Context, conn *grpc.ClientConn) (proto.Message, error) { + return mgmtpb.NewMgmtSvcClient(conn).SystemCheckSetPolicy(ctx, &req.CheckSetPolicyReq) + }) + + rpcClient.Debugf("DAOS system check set-policy request: %s", pbutil.Debug(&req.CheckSetPolicyReq)) + ur, err := rpcClient.InvokeUnaryRPC(ctx, req) + if err != nil { + return err + } + + return ur.getMSError() +} + +type SystemCheckRepairReq struct { + unaryRequest + msRequest + + mgmtpb.CheckActReq +} + +func (r *SystemCheckRepairReq) SetAction(action int32) error { + if _, ok := chkpb.CheckInconsistAction_name[action]; !ok { + return errors.Errorf("invalid action %d", action) + } + r.Act = chkpb.CheckInconsistAction(action) + return nil +} + +// SystemCheckRepair sends a request to the system checker to indicate +// what the desired repair action is for a reported inconsistency. +func SystemCheckRepair(ctx context.Context, rpcClient UnaryInvoker, req *SystemCheckRepairReq) error { + if req == nil { + return errors.Errorf("nil %T", req) + } + + req.CheckActReq.Sys = req.getSystem(rpcClient) + req.setRPC(func(ctx context.Context, conn *grpc.ClientConn) (proto.Message, error) { + return mgmtpb.NewMgmtSvcClient(conn).SystemCheckRepair(ctx, &req.CheckActReq) + }) + + rpcClient.Debugf("DAOS system check repair request: %s", pbutil.Debug(&req.CheckActReq)) + ur, err := rpcClient.InvokeUnaryRPC(ctx, req) + if err != nil { + return err + } + + return ur.getMSError() +} diff --git a/src/control/lib/control/check_test.go b/src/control/lib/control/check_test.go new file mode 100644 index 00000000000..a5deea80211 --- /dev/null +++ b/src/control/lib/control/check_test.go @@ -0,0 +1,103 @@ +// +// (C) Copyright 2023 Intel Corporation. +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// + +package control + +import ( + "testing" + + "github.com/google/go-cmp/cmp" + + chkpb "github.com/daos-stack/daos/src/control/common/proto/chk" +) + +func TestControl_SystemCheckReport_RepairChoices(t *testing.T) { + for name, tc := range map[string]struct { + report *SystemCheckReport + expChoices []*SystemCheckRepairChoice + }{ + "nil": {}, + "no choices": { + report: &SystemCheckReport{}, + expChoices: []*SystemCheckRepairChoice{}, + }, + "no details": { + report: &SystemCheckReport{ + CheckReport: chkpb.CheckReport{ + ActChoices: []chkpb.CheckInconsistAction{chkpb.CheckInconsistAction_CIA_TRUST_MS}, + ActMsgs: []string{"action message"}, + ActDetails: []string{""}, + }, + }, + expChoices: []*SystemCheckRepairChoice{ + { + Action: SystemCheckRepairAction(chkpb.CheckInconsistAction_CIA_TRUST_MS), + Info: "action message", + }, + }, + }, + "has details": { + report: &SystemCheckReport{ + CheckReport: chkpb.CheckReport{ + ActChoices: []chkpb.CheckInconsistAction{chkpb.CheckInconsistAction_CIA_TRUST_MS}, + ActMsgs: []string{"action message"}, + ActDetails: []string{"action details"}, + }, + }, + expChoices: []*SystemCheckRepairChoice{ + { + Action: SystemCheckRepairAction(chkpb.CheckInconsistAction_CIA_TRUST_MS), + Info: "action details", + }, + }, + }, + "same order": { + report: &SystemCheckReport{ + CheckReport: chkpb.CheckReport{ + ActChoices: []chkpb.CheckInconsistAction{ + chkpb.CheckInconsistAction_CIA_TRUST_PS, + chkpb.CheckInconsistAction_CIA_TRUST_MS, + chkpb.CheckInconsistAction_CIA_TRUST_OLDEST, + chkpb.CheckInconsistAction_CIA_TRUST_LATEST, + }, + ActMsgs: []string{ + "trust PS", + "trust MS", + "trust oldest", + "trust latest", + }, + ActDetails: []string{"", "", "", ""}, + }, + }, + expChoices: []*SystemCheckRepairChoice{ + { + Action: SystemCheckRepairAction(chkpb.CheckInconsistAction_CIA_TRUST_PS), + Info: "trust PS", + }, + { + Action: SystemCheckRepairAction(chkpb.CheckInconsistAction_CIA_TRUST_MS), + Info: "trust MS", + }, + { + Action: SystemCheckRepairAction(chkpb.CheckInconsistAction_CIA_TRUST_OLDEST), + Info: "trust oldest", + }, + { + Action: SystemCheckRepairAction(chkpb.CheckInconsistAction_CIA_TRUST_LATEST), + Info: "trust latest", + }, + }, + }, + } { + t.Run(name, func(t *testing.T) { + result := tc.report.RepairChoices() + + if diff := cmp.Diff(tc.expChoices, result); diff != "" { + t.Fatalf("want-, got+:\n%s", diff) + } + }) + } +} diff --git a/src/control/lib/control/fi.go b/src/control/lib/control/fi.go new file mode 100644 index 00000000000..585b3f33a7b --- /dev/null +++ b/src/control/lib/control/fi.go @@ -0,0 +1,35 @@ +// +// (C) Copyright 2022 Intel Corporation. +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// +//go:build fault_injection +// +build fault_injection + +package control + +import ( + "context" + + "google.golang.org/protobuf/proto" +) + +type faultInjectionReq struct { + msRequest + unaryRequest +} + +// InvokeFaultRPC is meant to be used during fault injection tests. It +// provides a bare-bones RPC client that can be used to invoke any RPC +// directly without translation between protobuf messages and native types. +func InvokeFaultRPC(ctx context.Context, rpcClient UnaryInvoker, rpc unaryRPC) (proto.Message, error) { + req := new(faultInjectionReq) + req.setRPC(rpc) + + ur, err := rpcClient.InvokeUnaryRPC(ctx, req) + if err != nil { + return nil, err + } + + return ur.getMSResponse() +} diff --git a/src/control/lib/control/system.go b/src/control/lib/control/system.go index 3e714cc8946..73afa873c9f 100644 --- a/src/control/lib/control/system.go +++ b/src/control/lib/control/system.go @@ -119,6 +119,7 @@ type SystemJoinReq struct { FaultDomain *system.FaultDomain `json:"srv_fault_domain"` InstanceIdx uint32 `json:"idx"` Incarnation uint64 `json:"incarnation"` + CheckMode bool `json:"check_mode"` } // MarshalJSON packs SystemJoinResp struct into a JSON message. @@ -145,6 +146,30 @@ type SystemJoinResp struct { MapVersion uint32 `json:"map_version"` } +func (resp *SystemJoinResp) UnmarshalJSON(data []byte) error { + type fromJSON SystemJoinResp + aux := &struct { + State mgmtpb.JoinResp_State `json:"state"` + *fromJSON + }{ + fromJSON: (*fromJSON)(resp), + } + if err := json.Unmarshal(data, &aux); err != nil { + return err + } + + switch aux.State { + case mgmtpb.JoinResp_IN: + resp.State = system.MemberStateJoined + case mgmtpb.JoinResp_OUT: + resp.State = system.MemberStateExcluded + case mgmtpb.JoinResp_CHECK: + resp.State = system.MemberStateCheckerStarted + } + + return nil +} + // SystemJoin will attempt to join a new member to the DAOS system. func SystemJoin(ctx context.Context, rpcClient UnaryInvoker, req *SystemJoinReq) (*SystemJoinResp, error) { pbReq := new(mgmtpb.JoinReq) @@ -356,11 +381,11 @@ func SystemStart(ctx context.Context, rpcClient UnaryInvoker, req *SystemStartRe return nil, errors.Errorf("nil %T request", req) } - pbReq := new(mgmtpb.SystemStartReq) - pbReq.Hosts = req.Hosts.String() - pbReq.Ranks = req.Ranks.String() - pbReq.Sys = req.getSystem(rpcClient) - + pbReq := &mgmtpb.SystemStartReq{ + Hosts: req.Hosts.String(), + Ranks: req.Ranks.String(), + Sys: req.getSystem(rpcClient), + } req.setRPC(func(ctx context.Context, conn *grpc.ClientConn) (proto.Message, error) { return mgmtpb.NewMgmtSvcClient(conn).SystemStart(ctx, pbReq) }) @@ -690,8 +715,9 @@ func LeaderQuery(ctx context.Context, rpcClient UnaryInvoker, req *LeaderQueryRe type RanksReq struct { unaryRequest respReportCb HostResponseReportFn - Ranks string - Force bool + Ranks string `json:"ranks"` + Force bool `json:"force"` + CheckMode bool `json:"check_mode"` } func (r *RanksReq) reportResponse(resp *HostResponse) { diff --git a/src/control/lib/control/system_test.go b/src/control/lib/control/system_test.go index 9a0a8222148..3a85eaa1d1a 100644 --- a/src/control/lib/control/system_test.go +++ b/src/control/lib/control/system_test.go @@ -1368,7 +1368,7 @@ func TestControl_SystemJoin_RetryableErrors(t *testing.T) { t.Fatalf("unexpected error: %v", gotErr) } - expResp := &SystemJoinResp{Rank: 42} + expResp := &SystemJoinResp{Rank: 42, State: system.MemberStateJoined} if diff := cmp.Diff(expResp, gotResp, defResCmpOpts()...); diff != "" { t.Fatalf("unexpected response (-want, +got):\n%s\n", diff) } @@ -1422,7 +1422,7 @@ func TestControl_SystemJoin_Timeouts(t *testing.T) { }, }, }, - expResp: &SystemJoinResp{Rank: 42}, + expResp: &SystemJoinResp{Rank: 42, State: system.MemberStateJoined}, }, "MS response contains timeout; request is retried": { mic: &MockInvokerConfig{ @@ -1447,7 +1447,7 @@ func TestControl_SystemJoin_Timeouts(t *testing.T) { MockMSResponse("", nil, &mgmtpb.JoinResp{Rank: 42}), }, }, - expResp: &SystemJoinResp{Rank: 42}, + expResp: &SystemJoinResp{Rank: 42, State: system.MemberStateJoined}, }, } { t.Run(name, func(t *testing.T) { diff --git a/src/control/lib/ui/bool_flags.go b/src/control/lib/ui/bool_flags.go new file mode 100644 index 00000000000..0b6447890aa --- /dev/null +++ b/src/control/lib/ui/bool_flags.go @@ -0,0 +1,35 @@ +// +// (C) Copyright 2022 Intel Corporation. +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// + +package ui + +import ( + "strings" + + "github.com/pkg/errors" +) + +// EnabledFlag allows a flag to be optionally set to a boolean value. +type EnabledFlag struct { + Set bool + Enabled bool +} + +// UnmarshalFlag implements the flags.Unmarshaler interface. +func (f *EnabledFlag) UnmarshalFlag(fv string) error { + f.Set = true + + switch strings.ToLower(fv) { + case "true", "1", "yes", "on": + f.Enabled = true + case "false", "0", "no", "off": + f.Enabled = false + default: + return errors.Errorf("invalid boolean value %q", fv) + } + + return nil +} diff --git a/src/control/lib/ui/bool_flags_test.go b/src/control/lib/ui/bool_flags_test.go new file mode 100644 index 00000000000..805b8ad8d00 --- /dev/null +++ b/src/control/lib/ui/bool_flags_test.go @@ -0,0 +1,40 @@ +// +// (C) Copyright 2022 Intel Corporation. +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// + +package ui + +import ( + "testing" + + "github.com/daos-stack/daos/src/control/common/test" +) + +func TestUI_EnabledFlag(t *testing.T) { + trueVals := []string{"true", "1", "yes", "on"} + falseVals := []string{"false", "0", "no", "off"} + + for _, val := range trueVals { + testFlag := EnabledFlag{} + t.Run(val, func(t *testing.T) { + test.CmpErr(t, nil, testFlag.UnmarshalFlag(val)) + }) + test.AssertTrue(t, testFlag.Set, "not set") + test.AssertTrue(t, testFlag.Enabled, "not enabled") + } + for _, val := range falseVals { + testFlag := EnabledFlag{} + t.Run(val, func(t *testing.T) { + test.CmpErr(t, nil, testFlag.UnmarshalFlag(val)) + }) + test.AssertTrue(t, testFlag.Set, "not set") + test.AssertFalse(t, testFlag.Enabled, "enabled") + } + + testFlag := EnabledFlag{} + if err := testFlag.UnmarshalFlag("invalid"); err == nil { + t.Fatal("expected error") + } +} diff --git a/src/control/lib/ui/list_flags_test.go b/src/control/lib/ui/list_flags_test.go index d2ae6ce2aeb..8fd47af4aa2 100644 --- a/src/control/lib/ui/list_flags_test.go +++ b/src/control/lib/ui/list_flags_test.go @@ -169,7 +169,7 @@ func TestUI_MemberStateSetFlag(t *testing.T) { }, }, "full list": { - arg: "Joined,Excluded,Stopped,Stopping,Ready,Starting,AwaitFormat,AdminExcluded,Errored,Unresponsive", + arg: "Joined,Excluded,Stopped,Stopping,Ready,Starting,AwaitFormat,AdminExcluded,Errored,Unresponsive,CheckerStarted", expFlag: &ui.MemberStateSetFlag{ States: system.MemberState(int(system.MemberStateMax) - 1), }, @@ -205,7 +205,7 @@ func TestUI_MemberStateSetFlag_Complete(t *testing.T) { }{ "empty string; suggest all": { expComplStrs: []string{ - "AdminExcluded", "AwaitFormat", "Errored", "Excluded", + "AdminExcluded", "AwaitFormat", "CheckerStarted", "Errored", "Excluded", "Joined", "Ready", "Starting", "Stopped", "Stopping", "Unresponsive", }, @@ -226,6 +226,7 @@ func TestUI_MemberStateSetFlag_Complete(t *testing.T) { arg: "Starting,", expComplStrs: []string{ "Starting,AdminExcluded", "Starting,AwaitFormat", + "Starting,CheckerStarted", "Starting,Errored", "Starting,Excluded", "Starting,Joined", "Starting,Ready", "Starting,Stopped", "Starting,Stopping", "Starting,Unresponsive", diff --git a/src/control/run_go_tests.sh b/src/control/run_go_tests.sh index 1e77d0f9f5f..ec8204dd308 100755 --- a/src/control/run_go_tests.sh +++ b/src/control/run_go_tests.sh @@ -99,16 +99,19 @@ function setup_environment() # allow cgo to find and link to third-party libs LD_LIBRARY_PATH=${SL_PREFIX+${SL_PREFIX}/lib} LD_LIBRARY_PATH+="${SL_PREFIX+:${SL_PREFIX}/lib64}" + LD_LIBRARY_PATH+="${SL_PREFIX+:${SL_PREFIX}/lib64/daos_srv}" LD_LIBRARY_PATH+="${SL_SPDK_PREFIX+:${SL_SPDK_PREFIX}/lib}" LD_LIBRARY_PATH+="${SL_OFI_PREFIX+:${SL_OFI_PREFIX}/lib}" CGO_LDFLAGS=${SL_PREFIX+-L${SL_PREFIX}/lib} CGO_LDFLAGS+="${SL_PREFIX+ -L${SL_PREFIX}/lib64}" + CGO_LDFLAGS+="${SL_PREFIX+ -L${SL_PREFIX}/lib64/daos_srv}" CGO_LDFLAGS+="${SL_BUILD_DIR+ -L${SL_BUILD_DIR}/src/control/lib/spdk}" CGO_LDFLAGS+="${SL_SPDK_PREFIX+ -L${SL_SPDK_PREFIX}/lib}" CGO_LDFLAGS+="${SL_OFI_PREFIX+ -L${SL_OFI_PREFIX}/lib}" CGO_CFLAGS=${SL_PREFIX+-I${SL_PREFIX}/include} CGO_CFLAGS+="${SL_SPDK_PREFIX+ -I${SL_SPDK_PREFIX}/include}" CGO_CFLAGS+="${SL_OFI_PREFIX+ -I${SL_OFI_PREFIX}/include}" + CGO_CFLAGS+="${SL_ARGOBOTS_PREFIX+ -I${SL_ARGOBOTS_PREFIX}/include}" src_include="$(dirname "$build_source")/src/include" if [ -d "$src_include" ]; then @@ -144,6 +147,7 @@ function check_formatting() { srcdir=${1:-"./"} output=$(find "$srcdir/" -name '*.go' -and -not -path '*vendor*' \ + -and -not -name '*.pb.go' \ -print0 | xargs -0 gofmt -d) if [ -n "$output" ]; then errmsg="ERROR: Your code hasn't been run through gofmt! @@ -163,7 +167,7 @@ $output function get_test_runner() { - test_args="-mod vendor -race -cover -v ./... -tags firmware" + test_args="-mod vendor -race -cover -v ./... -tags firmware,fault_injection" test_runner="go test" if which gotestsum >/dev/null; then diff --git a/src/control/security/grpc_authorization.go b/src/control/security/grpc_authorization.go index fd21e40298c..2fc913a20e7 100644 --- a/src/control/security/grpc_authorization.go +++ b/src/control/security/grpc_authorization.go @@ -28,59 +28,70 @@ func (c Component) String() string { // methodAuthorizations is the map for checking which components are authorized to make the specific method call. var methodAuthorizations = map[string][]Component{ - "/ctl.CtlSvc/StorageScan": {ComponentAdmin}, - "/ctl.CtlSvc/StorageFormat": {ComponentAdmin}, - "/ctl.CtlSvc/StorageNvmeRebind": {ComponentAdmin}, - "/ctl.CtlSvc/StorageNvmeAddDevice": {ComponentAdmin}, - "/ctl.CtlSvc/NetworkScan": {ComponentAdmin}, - "/ctl.CtlSvc/CollectLog": {ComponentAdmin}, - "/ctl.CtlSvc/FirmwareQuery": {ComponentAdmin}, - "/ctl.CtlSvc/FirmwareUpdate": {ComponentAdmin}, - "/ctl.CtlSvc/SmdQuery": {ComponentAdmin}, - "/ctl.CtlSvc/SmdManage": {ComponentAdmin}, - "/ctl.CtlSvc/SetEngineLogMasks": {ComponentAdmin}, - "/ctl.CtlSvc/PrepShutdownRanks": {ComponentServer}, - "/ctl.CtlSvc/StopRanks": {ComponentServer}, - "/ctl.CtlSvc/ResetFormatRanks": {ComponentServer}, - "/ctl.CtlSvc/StartRanks": {ComponentServer}, - "/mgmt.MgmtSvc/Join": {ComponentServer}, - "/mgmt.MgmtSvc/ClusterEvent": {ComponentServer}, - "/mgmt.MgmtSvc/LeaderQuery": {ComponentAdmin}, - "/mgmt.MgmtSvc/SystemQuery": {ComponentAdmin}, - "/mgmt.MgmtSvc/SystemErase": {ComponentAdmin}, - "/mgmt.MgmtSvc/SystemStart": {ComponentAdmin}, - "/mgmt.MgmtSvc/SystemStop": {ComponentAdmin}, - "/mgmt.MgmtSvc/SystemExclude": {ComponentAdmin}, - "/mgmt.MgmtSvc/PoolCreate": {ComponentAdmin}, - "/mgmt.MgmtSvc/PoolDestroy": {ComponentAdmin}, - "/mgmt.MgmtSvc/PoolQuery": {ComponentAdmin}, - "/mgmt.MgmtSvc/PoolQueryTarget": {ComponentAdmin}, - "/mgmt.MgmtSvc/PoolSetProp": {ComponentAdmin}, - "/mgmt.MgmtSvc/PoolGetProp": {ComponentAdmin}, - "/mgmt.MgmtSvc/PoolGetACL": {ComponentAdmin}, - "/mgmt.MgmtSvc/PoolOverwriteACL": {ComponentAdmin}, - "/mgmt.MgmtSvc/PoolUpdateACL": {ComponentAdmin}, - "/mgmt.MgmtSvc/PoolDeleteACL": {ComponentAdmin}, - "/mgmt.MgmtSvc/PoolExclude": {ComponentAdmin}, - "/mgmt.MgmtSvc/PoolDrain": {ComponentAdmin}, - "/mgmt.MgmtSvc/PoolReintegrate": {ComponentAdmin}, - "/mgmt.MgmtSvc/PoolEvict": {ComponentAdmin, ComponentAgent}, - "/mgmt.MgmtSvc/PoolExtend": {ComponentAdmin}, - "/mgmt.MgmtSvc/GetAttachInfo": {ComponentAgent}, - "/mgmt.MgmtSvc/ListPools": {ComponentAdmin}, - "/mgmt.MgmtSvc/ListContainers": {ComponentAdmin}, - "/mgmt.MgmtSvc/ContSetOwner": {ComponentAdmin}, - "/mgmt.MgmtSvc/SystemCleanup": {ComponentAdmin, ComponentAgent}, - "/mgmt.MgmtSvc/PoolUpgrade": {ComponentAdmin}, - "/mgmt.MgmtSvc/SystemSetAttr": {ComponentAdmin}, - "/mgmt.MgmtSvc/SystemGetAttr": {ComponentAdmin}, - "/mgmt.MgmtSvc/SystemSetProp": {ComponentAdmin}, - "/mgmt.MgmtSvc/SystemGetProp": {ComponentAdmin}, - "/RaftTransport/AppendEntries": {ComponentServer}, - "/RaftTransport/AppendEntriesPipeline": {ComponentServer}, - "/RaftTransport/RequestVote": {ComponentServer}, - "/RaftTransport/TimeoutNow": {ComponentServer}, - "/RaftTransport/InstallSnapshot": {ComponentServer}, + "/ctl.CtlSvc/StorageScan": {ComponentAdmin}, + "/ctl.CtlSvc/StorageFormat": {ComponentAdmin}, + "/ctl.CtlSvc/StorageNvmeRebind": {ComponentAdmin}, + "/ctl.CtlSvc/StorageNvmeAddDevice": {ComponentAdmin}, + "/ctl.CtlSvc/NetworkScan": {ComponentAdmin}, + "/ctl.CtlSvc/CollectLog": {ComponentAdmin}, + "/ctl.CtlSvc/FirmwareQuery": {ComponentAdmin}, + "/ctl.CtlSvc/FirmwareUpdate": {ComponentAdmin}, + "/ctl.CtlSvc/SmdQuery": {ComponentAdmin}, + "/ctl.CtlSvc/SmdManage": {ComponentAdmin}, + "/ctl.CtlSvc/SetEngineLogMasks": {ComponentAdmin}, + "/ctl.CtlSvc/PrepShutdownRanks": {ComponentServer}, + "/ctl.CtlSvc/StopRanks": {ComponentServer}, + "/ctl.CtlSvc/ResetFormatRanks": {ComponentServer}, + "/ctl.CtlSvc/StartRanks": {ComponentServer}, + "/mgmt.MgmtSvc/Join": {ComponentServer}, + "/mgmt.MgmtSvc/ClusterEvent": {ComponentServer}, + "/mgmt.MgmtSvc/LeaderQuery": {ComponentAdmin}, + "/mgmt.MgmtSvc/SystemQuery": {ComponentAdmin}, + "/mgmt.MgmtSvc/SystemErase": {ComponentAdmin}, + "/mgmt.MgmtSvc/SystemStart": {ComponentAdmin}, + "/mgmt.MgmtSvc/SystemStop": {ComponentAdmin}, + "/mgmt.MgmtSvc/SystemExclude": {ComponentAdmin}, + "/mgmt.MgmtSvc/PoolCreate": {ComponentAdmin}, + "/mgmt.MgmtSvc/PoolDestroy": {ComponentAdmin}, + "/mgmt.MgmtSvc/PoolQuery": {ComponentAdmin}, + "/mgmt.MgmtSvc/PoolQueryTarget": {ComponentAdmin}, + "/mgmt.MgmtSvc/PoolSetProp": {ComponentAdmin}, + "/mgmt.MgmtSvc/PoolGetProp": {ComponentAdmin}, + "/mgmt.MgmtSvc/PoolGetACL": {ComponentAdmin}, + "/mgmt.MgmtSvc/PoolOverwriteACL": {ComponentAdmin}, + "/mgmt.MgmtSvc/PoolUpdateACL": {ComponentAdmin}, + "/mgmt.MgmtSvc/PoolDeleteACL": {ComponentAdmin}, + "/mgmt.MgmtSvc/PoolExclude": {ComponentAdmin}, + "/mgmt.MgmtSvc/PoolDrain": {ComponentAdmin}, + "/mgmt.MgmtSvc/PoolReintegrate": {ComponentAdmin}, + "/mgmt.MgmtSvc/PoolEvict": {ComponentAdmin, ComponentAgent}, + "/mgmt.MgmtSvc/PoolExtend": {ComponentAdmin}, + "/mgmt.MgmtSvc/GetAttachInfo": {ComponentAgent}, + "/mgmt.MgmtSvc/ListPools": {ComponentAdmin}, + "/mgmt.MgmtSvc/ListContainers": {ComponentAdmin}, + "/mgmt.MgmtSvc/ContSetOwner": {ComponentAdmin}, + "/mgmt.MgmtSvc/SystemCleanup": {ComponentAdmin, ComponentAgent}, + "/mgmt.MgmtSvc/SystemCheckEnable": {ComponentAdmin}, + "/mgmt.MgmtSvc/SystemCheckDisable": {ComponentAdmin}, + "/mgmt.MgmtSvc/SystemCheckStart": {ComponentAdmin}, + "/mgmt.MgmtSvc/SystemCheckStop": {ComponentAdmin}, + "/mgmt.MgmtSvc/SystemCheckQuery": {ComponentAdmin}, + "/mgmt.MgmtSvc/SystemCheckSetPolicy": {ComponentAdmin}, + "/mgmt.MgmtSvc/SystemCheckGetPolicy": {ComponentAdmin}, + "/mgmt.MgmtSvc/SystemCheckRepair": {ComponentAdmin}, + "/mgmt.MgmtSvc/FaultInjectReport": {ComponentAdmin}, + "/mgmt.MgmtSvc/FaultInjectPoolFault": {ComponentAdmin}, + "/mgmt.MgmtSvc/FaultInjectMgmtPoolFault": {ComponentAdmin}, + "/mgmt.MgmtSvc/PoolUpgrade": {ComponentAdmin}, + "/mgmt.MgmtSvc/SystemSetAttr": {ComponentAdmin}, + "/mgmt.MgmtSvc/SystemGetAttr": {ComponentAdmin}, + "/mgmt.MgmtSvc/SystemSetProp": {ComponentAdmin}, + "/mgmt.MgmtSvc/SystemGetProp": {ComponentAdmin}, + "/RaftTransport/AppendEntries": {ComponentServer}, + "/RaftTransport/AppendEntriesPipeline": {ComponentServer}, + "/RaftTransport/RequestVote": {ComponentServer}, + "/RaftTransport/TimeoutNow": {ComponentServer}, + "/RaftTransport/InstallSnapshot": {ComponentServer}, } func methodToComponent(method string, methodAuthorizations map[string][]Component) (build.Component, error) { diff --git a/src/control/security/grpc_authorization_test.go b/src/control/security/grpc_authorization_test.go index 89bc1b53b2b..d054c54ed08 100644 --- a/src/control/security/grpc_authorization_test.go +++ b/src/control/security/grpc_authorization_test.go @@ -53,59 +53,70 @@ func inList(c Component, compList []Component) bool { func TestSecurity_ComponentHasAccess(t *testing.T) { allComponents := []Component{ComponentUndefined, ComponentAdmin, ComponentAgent, ComponentServer} testCases := map[string][]Component{ - "/ctl.CtlSvc/StorageScan": {ComponentAdmin}, - "/ctl.CtlSvc/StorageFormat": {ComponentAdmin}, - "/ctl.CtlSvc/StorageNvmeRebind": {ComponentAdmin}, - "/ctl.CtlSvc/StorageNvmeAddDevice": {ComponentAdmin}, - "/ctl.CtlSvc/NetworkScan": {ComponentAdmin}, - "/ctl.CtlSvc/CollectLog": {ComponentAdmin}, - "/ctl.CtlSvc/FirmwareQuery": {ComponentAdmin}, - "/ctl.CtlSvc/FirmwareUpdate": {ComponentAdmin}, - "/ctl.CtlSvc/SmdQuery": {ComponentAdmin}, - "/ctl.CtlSvc/SmdManage": {ComponentAdmin}, - "/ctl.CtlSvc/SetEngineLogMasks": {ComponentAdmin}, - "/ctl.CtlSvc/PrepShutdownRanks": {ComponentServer}, - "/ctl.CtlSvc/StopRanks": {ComponentServer}, - "/ctl.CtlSvc/ResetFormatRanks": {ComponentServer}, - "/ctl.CtlSvc/StartRanks": {ComponentServer}, - "/mgmt.MgmtSvc/Join": {ComponentServer}, - "/mgmt.MgmtSvc/ClusterEvent": {ComponentServer}, - "/mgmt.MgmtSvc/LeaderQuery": {ComponentAdmin}, - "/mgmt.MgmtSvc/SystemQuery": {ComponentAdmin}, - "/mgmt.MgmtSvc/SystemStop": {ComponentAdmin}, - "/mgmt.MgmtSvc/SystemErase": {ComponentAdmin}, - "/mgmt.MgmtSvc/SystemStart": {ComponentAdmin}, - "/mgmt.MgmtSvc/SystemExclude": {ComponentAdmin}, - "/mgmt.MgmtSvc/PoolCreate": {ComponentAdmin}, - "/mgmt.MgmtSvc/PoolDestroy": {ComponentAdmin}, - "/mgmt.MgmtSvc/PoolQuery": {ComponentAdmin}, - "/mgmt.MgmtSvc/PoolQueryTarget": {ComponentAdmin}, - "/mgmt.MgmtSvc/PoolSetProp": {ComponentAdmin}, - "/mgmt.MgmtSvc/PoolGetProp": {ComponentAdmin}, - "/mgmt.MgmtSvc/PoolGetACL": {ComponentAdmin}, - "/mgmt.MgmtSvc/PoolOverwriteACL": {ComponentAdmin}, - "/mgmt.MgmtSvc/PoolUpdateACL": {ComponentAdmin}, - "/mgmt.MgmtSvc/PoolDeleteACL": {ComponentAdmin}, - "/mgmt.MgmtSvc/PoolExclude": {ComponentAdmin}, - "/mgmt.MgmtSvc/PoolDrain": {ComponentAdmin}, - "/mgmt.MgmtSvc/PoolReintegrate": {ComponentAdmin}, - "/mgmt.MgmtSvc/PoolEvict": {ComponentAdmin, ComponentAgent}, - "/mgmt.MgmtSvc/PoolExtend": {ComponentAdmin}, - "/mgmt.MgmtSvc/GetAttachInfo": {ComponentAgent}, - "/mgmt.MgmtSvc/ListPools": {ComponentAdmin}, - "/mgmt.MgmtSvc/ListContainers": {ComponentAdmin}, - "/mgmt.MgmtSvc/ContSetOwner": {ComponentAdmin}, - "/mgmt.MgmtSvc/SystemCleanup": {ComponentAdmin, ComponentAgent}, - "/mgmt.MgmtSvc/PoolUpgrade": {ComponentAdmin}, - "/mgmt.MgmtSvc/SystemSetAttr": {ComponentAdmin}, - "/mgmt.MgmtSvc/SystemGetAttr": {ComponentAdmin}, - "/mgmt.MgmtSvc/SystemSetProp": {ComponentAdmin}, - "/mgmt.MgmtSvc/SystemGetProp": {ComponentAdmin}, - "/RaftTransport/AppendEntries": {ComponentServer}, - "/RaftTransport/AppendEntriesPipeline": {ComponentServer}, - "/RaftTransport/RequestVote": {ComponentServer}, - "/RaftTransport/TimeoutNow": {ComponentServer}, - "/RaftTransport/InstallSnapshot": {ComponentServer}, + "/ctl.CtlSvc/StorageScan": {ComponentAdmin}, + "/ctl.CtlSvc/StorageFormat": {ComponentAdmin}, + "/ctl.CtlSvc/StorageNvmeRebind": {ComponentAdmin}, + "/ctl.CtlSvc/StorageNvmeAddDevice": {ComponentAdmin}, + "/ctl.CtlSvc/NetworkScan": {ComponentAdmin}, + "/ctl.CtlSvc/CollectLog": {ComponentAdmin}, + "/ctl.CtlSvc/FirmwareQuery": {ComponentAdmin}, + "/ctl.CtlSvc/FirmwareUpdate": {ComponentAdmin}, + "/ctl.CtlSvc/SmdQuery": {ComponentAdmin}, + "/ctl.CtlSvc/SmdManage": {ComponentAdmin}, + "/ctl.CtlSvc/SetEngineLogMasks": {ComponentAdmin}, + "/ctl.CtlSvc/PrepShutdownRanks": {ComponentServer}, + "/ctl.CtlSvc/StopRanks": {ComponentServer}, + "/ctl.CtlSvc/ResetFormatRanks": {ComponentServer}, + "/ctl.CtlSvc/StartRanks": {ComponentServer}, + "/mgmt.MgmtSvc/Join": {ComponentServer}, + "/mgmt.MgmtSvc/ClusterEvent": {ComponentServer}, + "/mgmt.MgmtSvc/LeaderQuery": {ComponentAdmin}, + "/mgmt.MgmtSvc/SystemQuery": {ComponentAdmin}, + "/mgmt.MgmtSvc/SystemStop": {ComponentAdmin}, + "/mgmt.MgmtSvc/SystemErase": {ComponentAdmin}, + "/mgmt.MgmtSvc/SystemStart": {ComponentAdmin}, + "/mgmt.MgmtSvc/SystemExclude": {ComponentAdmin}, + "/mgmt.MgmtSvc/PoolCreate": {ComponentAdmin}, + "/mgmt.MgmtSvc/PoolDestroy": {ComponentAdmin}, + "/mgmt.MgmtSvc/PoolQuery": {ComponentAdmin}, + "/mgmt.MgmtSvc/PoolQueryTarget": {ComponentAdmin}, + "/mgmt.MgmtSvc/PoolSetProp": {ComponentAdmin}, + "/mgmt.MgmtSvc/PoolGetProp": {ComponentAdmin}, + "/mgmt.MgmtSvc/PoolGetACL": {ComponentAdmin}, + "/mgmt.MgmtSvc/PoolOverwriteACL": {ComponentAdmin}, + "/mgmt.MgmtSvc/PoolUpdateACL": {ComponentAdmin}, + "/mgmt.MgmtSvc/PoolDeleteACL": {ComponentAdmin}, + "/mgmt.MgmtSvc/PoolExclude": {ComponentAdmin}, + "/mgmt.MgmtSvc/PoolDrain": {ComponentAdmin}, + "/mgmt.MgmtSvc/PoolReintegrate": {ComponentAdmin}, + "/mgmt.MgmtSvc/PoolEvict": {ComponentAdmin, ComponentAgent}, + "/mgmt.MgmtSvc/PoolExtend": {ComponentAdmin}, + "/mgmt.MgmtSvc/GetAttachInfo": {ComponentAgent}, + "/mgmt.MgmtSvc/ListPools": {ComponentAdmin}, + "/mgmt.MgmtSvc/ListContainers": {ComponentAdmin}, + "/mgmt.MgmtSvc/ContSetOwner": {ComponentAdmin}, + "/mgmt.MgmtSvc/SystemCleanup": {ComponentAdmin, ComponentAgent}, + "/mgmt.MgmtSvc/SystemCheckEnable": {ComponentAdmin}, + "/mgmt.MgmtSvc/SystemCheckDisable": {ComponentAdmin}, + "/mgmt.MgmtSvc/SystemCheckStart": {ComponentAdmin}, + "/mgmt.MgmtSvc/SystemCheckStop": {ComponentAdmin}, + "/mgmt.MgmtSvc/SystemCheckQuery": {ComponentAdmin}, + "/mgmt.MgmtSvc/SystemCheckSetPolicy": {ComponentAdmin}, + "/mgmt.MgmtSvc/SystemCheckGetPolicy": {ComponentAdmin}, + "/mgmt.MgmtSvc/SystemCheckRepair": {ComponentAdmin}, + "/mgmt.MgmtSvc/FaultInjectReport": {ComponentAdmin}, + "/mgmt.MgmtSvc/FaultInjectPoolFault": {ComponentAdmin}, + "/mgmt.MgmtSvc/FaultInjectMgmtPoolFault": {ComponentAdmin}, + "/mgmt.MgmtSvc/PoolUpgrade": {ComponentAdmin}, + "/mgmt.MgmtSvc/SystemSetAttr": {ComponentAdmin}, + "/mgmt.MgmtSvc/SystemGetAttr": {ComponentAdmin}, + "/mgmt.MgmtSvc/SystemSetProp": {ComponentAdmin}, + "/mgmt.MgmtSvc/SystemGetProp": {ComponentAdmin}, + "/RaftTransport/AppendEntries": {ComponentServer}, + "/RaftTransport/AppendEntriesPipeline": {ComponentServer}, + "/RaftTransport/RequestVote": {ComponentServer}, + "/RaftTransport/TimeoutNow": {ComponentServer}, + "/RaftTransport/InstallSnapshot": {ComponentServer}, } var missing []string diff --git a/src/control/server/ctl_ranks_rpc.go b/src/control/server/ctl_ranks_rpc.go index c3257f1e941..7006eec4647 100644 --- a/src/control/server/ctl_ranks_rpc.go +++ b/src/control/server/ctl_ranks_rpc.go @@ -293,6 +293,8 @@ func (svc *ControlService) StartRanks(ctx context.Context, req *ctlpb.RanksReq) return nil, err } for _, ei := range instances { + ei.SetCheckerMode(req.CheckMode) + if ei.IsStarted() { continue } diff --git a/src/control/server/drpc.go b/src/control/server/drpc.go index ec621437d6e..abea5ba4867 100644 --- a/src/control/server/drpc.go +++ b/src/control/server/drpc.go @@ -92,7 +92,7 @@ func drpcServerSetup(ctx context.Context, req *drpcServerSetupReq) error { // Create and add our modules drpcServer.RegisterRPCModule(NewSecurityModule(req.log, req.tc)) drpcServer.RegisterRPCModule(newMgmtModule()) - drpcServer.RegisterRPCModule(newSrvModule(req.log, req.sysdb, req.engines, req.events)) + drpcServer.RegisterRPCModule(newSrvModule(req.log, req.sysdb, req.sysdb, req.engines, req.events)) if err := drpcServer.Start(ctx); err != nil { return errors.Wrapf(err, "unable to start socket server on %s", sockPath) diff --git a/src/control/server/engine/config.go b/src/control/server/engine/config.go index 16565dde749..150b0c5d2a2 100644 --- a/src/control/server/engine/config.go +++ b/src/control/server/engine/config.go @@ -261,6 +261,7 @@ type Config struct { Index uint32 `yaml:"-" cmdLongFlag:"--instance_idx" cmdShortFlag:"-I"` MemSize int `yaml:"-" cmdLongFlag:"--mem_size" cmdShortFlag:"-r"` HugepageSz int `yaml:"-" cmdLongFlag:"--hugepage_size" cmdShortFlag:"-H"` + CheckerEnabled bool `yaml:"-" cmdLongFlag:"--checker" cmdShortFlag:"-C"` } // NewConfig returns an I/O Engine config. diff --git a/src/control/server/harness.go b/src/control/server/harness.go index af8eb206ce5..343d5d5b01f 100644 --- a/src/control/server/harness.go +++ b/src/control/server/harness.go @@ -61,6 +61,7 @@ type Engine interface { OnInstanceExit(...onInstanceExitFn) OnReady(...onReadyFn) GetStorage() *storage.Provider + SetCheckerMode(bool) Debugf(format string, args ...interface{}) Tracef(format string, args ...interface{}) } diff --git a/src/control/server/instance.go b/src/control/server/instance.go index 0aa83419c2b..215b8c424b5 100644 --- a/src/control/server/instance.go +++ b/src/control/server/instance.go @@ -157,6 +157,12 @@ func (ei *EngineInstance) Index() uint32 { return ei.runner.GetConfig().Index } +// SetCheckerMode adjusts the engine configuration to enable or disable +// starting the engine in checker mode. +func (ei *EngineInstance) SetCheckerMode(enabled bool) { + ei.runner.GetConfig().CheckerEnabled = enabled +} + // removeSocket removes the socket file used for dRPC communication with // harness and updates relevant ready states. func (ei *EngineInstance) removeSocket() error { @@ -199,6 +205,7 @@ func (ei *EngineInstance) determineRank(ctx context.Context, ready *srvpb.Notify FaultDomain: ei.hostFaultDomain, InstanceIdx: ei.Index(), Incarnation: ready.GetIncarnation(), + CheckMode: ready.GetCheckMode(), } resp, err := ei.joinSystem(ctx, joinReq) @@ -209,6 +216,16 @@ func (ei *EngineInstance) determineRank(ctx context.Context, ready *srvpb.Notify switch resp.State { case system.MemberStateAdminExcluded, system.MemberStateExcluded: return ranklist.NilRank, resp.LocalJoin, 0, errors.Errorf("rank %d excluded", resp.Rank) + case system.MemberStateCheckerStarted: + // If the system is in checker mode but the rank was not started in + // checker mode, we need to restart it in order to get the correct + // modules loaded. + if !ready.GetCheckMode() { + ei.log.Noticef("restarting rank %d in checker mode", resp.Rank) + go ei.requestStart(context.Background()) + ei.SetCheckerMode(true) + return ranklist.NilRank, resp.LocalJoin, 0, errors.Errorf("rank %d restarting to enable checker", resp.Rank) + } } r = ranklist.Rank(resp.Rank) diff --git a/src/control/server/instance_test.go b/src/control/server/instance_test.go index 2792c95facc..e4ec0ad84fb 100644 --- a/src/control/server/instance_test.go +++ b/src/control/server/instance_test.go @@ -161,6 +161,7 @@ type ( Index uint32 Started atm.Bool Ready atm.Bool + CheckerMode atm.Bool LocalState system.MemberState RemoveSuperblockErr error SetupRankErr error @@ -188,6 +189,10 @@ func DefaultMockInstance() *MockInstance { return NewMockInstance(nil) } +func (mi *MockInstance) SetCheckerMode(enabled bool) { + mi.cfg.CheckerMode.Store(enabled) +} + func (mi *MockInstance) CallDrpc(_ context.Context, _ drpc.Method, _ proto.Message) (*drpc.Response, error) { return mi.cfg.CallDrpcResp, mi.cfg.CallDrpcErr } diff --git a/src/control/server/mgmt_check.go b/src/control/server/mgmt_check.go new file mode 100644 index 00000000000..606f4e2dad4 --- /dev/null +++ b/src/control/server/mgmt_check.go @@ -0,0 +1,536 @@ +// +// (C) Copyright 2022-2024 Intel Corporation. +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// + +package server + +import ( + "context" + "encoding/json" + "sort" + "strings" + + "github.com/pkg/errors" + "google.golang.org/protobuf/proto" + + "github.com/daos-stack/daos/src/control/common" + chkpb "github.com/daos-stack/daos/src/control/common/proto/chk" + mgmtpb "github.com/daos-stack/daos/src/control/common/proto/mgmt" + "github.com/daos-stack/daos/src/control/drpc" + "github.com/daos-stack/daos/src/control/lib/daos" + "github.com/daos-stack/daos/src/control/lib/ranklist" + "github.com/daos-stack/daos/src/control/system" + "github.com/daos-stack/daos/src/control/system/checker" +) + +const ( + checkerEnabledKey = "checker_enabled" + checkerPoliciesKey = "checker_policies" + checkerLatestPolicyKey = "checker_latest_policy" +) + +var errNoSavedPolicies = errors.New("no previous policies have been saved") + +func (svc *mgmtSvc) enableChecker() error { + if err := system.SetMgmtProperty(svc.sysdb, checkerEnabledKey, "true"); err != nil { + return errors.Wrap(err, "failed to enable checker") + } + return nil +} + +func (svc *mgmtSvc) disableChecker() error { + if err := system.SetMgmtProperty(svc.sysdb, checkerEnabledKey, "false"); err != nil { + return errors.Wrap(err, "failed to disable checker") + } + return nil +} + +func (svc *mgmtSvc) checkerIsEnabled() bool { + value, err := system.GetMgmtProperty(svc.sysdb, checkerEnabledKey) + if err != nil { + if !system.IsNotLeader(err) && !system.IsErrSystemAttrNotFound(err) { + svc.log.Errorf("failed to get checker enabled value: %s", err) + } + return false + } + return value == "true" +} + +// checkerRequest is a wrapper around a request that is made on behalf of +// the checker or is otherwise allowed to be made while the checker is enabled. +type checkerRequest struct { + proto.Message +} + +func wrapCheckerReq(req proto.Message) proto.Message { + if common.InterfaceIsNil(req) { + return nil + } + return &checkerRequest{req} +} + +func (svc *mgmtSvc) unwrapCheckerReq(req proto.Message) (proto.Message, error) { + cr, ok := req.(*checkerRequest) + if ok { + return cr.Message, nil + } + + if svc.checkerIsEnabled() { + return nil, checker.FaultCheckerEnabled + } + + return req, nil +} + +func (svc *mgmtSvc) makeCheckerCall(ctx context.Context, method drpc.Method, req proto.Message) (*drpc.Response, error) { + if err := svc.checkLeaderRequest(wrapCheckerReq(req)); err != nil { + return nil, err + } + + if err := svc.verifyCheckerReady(); err != nil { + return nil, err + } + + return svc.harness.CallDrpc(ctx, method, req) +} + +func (svc *mgmtSvc) verifyCheckerReady() error { + if !svc.checkerIsEnabled() { + return checker.FaultCheckerNotEnabled + } + + if err := svc.checkMemberStates( + system.MemberStateAdminExcluded, + system.MemberStateCheckerStarted, + ); err != nil { + return err + } + + return nil +} + +type poolCheckerReq interface { + proto.Message + GetUuids() []string +} + +func (svc *mgmtSvc) makePoolCheckerCall(ctx context.Context, method drpc.Method, req poolCheckerReq) (*drpc.Response, error) { + poolUuids := make([]string, len(req.GetUuids())) + for i, id := range req.GetUuids() { + uuid, err := svc.resolvePoolID(id) + if err != nil { + return nil, err + } + poolUuids[i] = uuid.String() + } + + switch r := req.(type) { + case *mgmtpb.CheckStartReq: + checkRanks, err := svc.sysdb.MemberRanks(system.MemberStateCheckerStarted) + if err != nil { + return nil, err + } + + r.Ranks = ranklist.RanksToUint32(checkRanks) + r.Uuids = poolUuids + case *mgmtpb.CheckStopReq: + r.Uuids = poolUuids + case *mgmtpb.CheckQueryReq: + r.Uuids = poolUuids + default: + return nil, errors.Errorf("unexpected request type %T", req) + } + + return svc.makeCheckerCall(ctx, method, req) +} + +func (svc *mgmtSvc) startSystemRanks(ctx context.Context, sys string) error { + // Use the group membership to determine the set of ranks + // that are available to be started. + gm, err := svc.sysdb.GroupMap() + if err != nil { + return errors.Wrap(err, "failed to get group map") + } + availRanks := ranklist.NewRankSet() + for rank := range gm.RankEntries { + availRanks.Add(rank) + } + + // Finally, restart all of the ranks so that they join in + // checker mode. + startReq := &mgmtpb.SystemStartReq{ + Sys: sys, + CheckMode: svc.checkerIsEnabled(), + Ranks: availRanks.String(), + } + if _, err := svc.SystemStart(ctx, startReq); err != nil { + return errors.Wrap(err, "failed to start all ranks") + } + + return nil +} + +// SystemCheckEnable puts the system in checker mode. +func (svc *mgmtSvc) SystemCheckEnable(ctx context.Context, req *mgmtpb.CheckEnableReq) (*mgmtpb.DaosResp, error) { + if err := svc.checkLeaderRequest(wrapCheckerReq(req)); err != nil { + return nil, err + } + + if svc.checkerIsEnabled() { + return &mgmtpb.DaosResp{Status: int32(daos.Already)}, nil + } + + if err := svc.checkMemberStates( + system.MemberStateAdminExcluded, + system.MemberStateStopped, + ); err != nil { + return nil, err + } + + if err := svc.enableChecker(); err != nil { + return nil, err + } + + if err := svc.startSystemRanks(ctx, req.Sys); err != nil { + return nil, err + } + + return &mgmtpb.DaosResp{}, nil +} + +// SystemCheckDisable turns off checker mode for the system. +func (svc *mgmtSvc) SystemCheckDisable(ctx context.Context, req *mgmtpb.CheckDisableReq) (*mgmtpb.DaosResp, error) { + if err := svc.checkLeaderRequest(wrapCheckerReq(req)); err != nil { + return nil, err + } + + if !svc.checkerIsEnabled() { + return &mgmtpb.DaosResp{Status: int32(daos.Already)}, nil + } + + if err := svc.disableChecker(); err != nil { + return nil, err + } + + // Stop all of the ranks that are currently running in checker mode. + checkRanks, err := svc.sysdb.MemberRanks(system.MemberStateCheckerStarted) + if err != nil { + return nil, err + } + stopReq := &mgmtpb.SystemStopReq{ + Sys: req.Sys, + // Do not force stop system, it may cause resource leak and fail next system start. + Force: false, + Ranks: ranklist.RankSetFromRanks(checkRanks).String(), + } + if _, err := svc.SystemStop(ctx, stopReq); err != nil { + return nil, errors.Wrap(err, "failed to stop all checker ranks") + } + + return &mgmtpb.DaosResp{}, nil +} + +// SystemCheckStart starts a system check. The checker must be explicitly enabled to successfully +// start a check. +func (svc *mgmtSvc) SystemCheckStart(ctx context.Context, req *mgmtpb.CheckStartReq) (*mgmtpb.CheckStartResp, error) { + if err := svc.checkLeaderRequest(wrapCheckerReq(req)); err != nil { + return nil, err + } + + policies, err := svc.mergePoliciesWithCurrent(req.Policies) + if err != nil { + return nil, err + } + req.Policies = policies + + if err := svc.setLastPoliciesUsed(req.Policies); err != nil { + svc.log.Errorf("failed to save the policies used: %s", err.Error()) + } + + dResp, err := svc.makePoolCheckerCall(ctx, drpc.MethodCheckerStart, req) + if err != nil { + return nil, err + } + + resp := new(mgmtpb.CheckStartResp) + if err := proto.Unmarshal(dResp.Body, resp); err != nil { + return nil, errors.Wrap(err, "unmarshal CheckStart response") + } + + if resp.Status > 0 { + if len(req.Uuids) == 0 { + svc.log.Debug("resetting checker findings DB") + if err := svc.sysdb.ResetCheckerData(); err != nil { + return nil, errors.Wrap(err, "failed to reset checker finding database") + } + } else { + pools := strings.Join(req.Uuids, ", ") + svc.log.Debugf("removing old checker findings for pools: %s", pools) + if err := svc.sysdb.RemoveCheckerFindingsForPools(req.Uuids...); err != nil { + return nil, errors.Wrapf(err, "failed to remove old findings for pools: %s", pools) + } + } + resp.Status = 0 // reset status to indicate success + } + + return resp, nil +} + +func (svc *mgmtSvc) mergePoliciesWithCurrent(policies []*mgmtpb.CheckInconsistPolicy) ([]*mgmtpb.CheckInconsistPolicy, error) { + pm, err := svc.getCheckerPolicyMap() + if err != nil { + return nil, err + } + + // Allow the requested policies to override any policies stored in the policy map. + for _, pol := range policies { + pm[pol.InconsistCas] = pol + } + return pm.ToSlice(), nil +} + +func (svc *mgmtSvc) setLastPoliciesUsed(polList []*mgmtpb.CheckInconsistPolicy) error { + polStr, err := json.Marshal(polList) + if err != nil { + return errors.Wrap(err, "failed to marshal latest checker policies") + } + + return system.SetMgmtProperty(svc.sysdb, checkerLatestPolicyKey, string(polStr)) +} + +// SystemCheckStop stops a running system check. +func (svc *mgmtSvc) SystemCheckStop(ctx context.Context, req *mgmtpb.CheckStopReq) (*mgmtpb.CheckStopResp, error) { + if err := svc.checkLeaderRequest(wrapCheckerReq(req)); err != nil { + return nil, err + } + + dResp, err := svc.makePoolCheckerCall(ctx, drpc.MethodCheckerStop, req) + if err != nil { + return nil, err + } + + resp := new(mgmtpb.CheckStopResp) + if err := proto.Unmarshal(dResp.Body, resp); err != nil { + return nil, errors.Wrap(err, "unmarshal CheckStop response") + } + + return resp, nil +} + +// SystemCheckQuery queries the state of the checker. This will indicate all known findings, as +// well as the running state. +func (svc *mgmtSvc) SystemCheckQuery(ctx context.Context, req *mgmtpb.CheckQueryReq) (*mgmtpb.CheckQueryResp, error) { + if err := svc.checkLeaderRequest(wrapCheckerReq(req)); err != nil { + return nil, err + } + + resp := new(mgmtpb.CheckQueryResp) + if len(req.GetSeqs()) > 0 { + req.Shallow = true + } + + if !req.Shallow { + dResp, err := svc.makePoolCheckerCall(ctx, drpc.MethodCheckerQuery, req) + if err != nil { + return nil, err + } + + if err = proto.Unmarshal(dResp.Body, resp); err != nil { + return nil, errors.Wrap(err, "unmarshal CheckQuery response") + } + } + + cfList, err := svc.sysdb.GetCheckerFindings(req.GetSeqs()...) + if err != nil { + return nil, err + } + + for _, f := range cfList { + resp.Reports = append(resp.Reports, &f.CheckReport) + } + + return resp, nil +} + +type policyMap map[chkpb.CheckInconsistClass]*mgmtpb.CheckInconsistPolicy + +// ToSlice returns a sorted slice of policies from the map. +func (pm policyMap) ToSlice(classes ...chkpb.CheckInconsistClass) []*mgmtpb.CheckInconsistPolicy { + policies := []*mgmtpb.CheckInconsistPolicy{} + + if len(classes) > 0 { + for _, cls := range classes { + if pol, found := pm[cls]; found { + policies = append(policies, pol) + } + } + } else { + for _, pol := range pm { + policies = append(policies, pol) + } + } + + sort.Slice(policies, func(i, j int) bool { + return policies[i].InconsistCas < policies[j].InconsistCas + }) + + return policies +} + +func (svc *mgmtSvc) getCheckerPolicyMap() (policyMap, error) { + if pm, err := svc.getCheckerPolicyMapWithKey(checkerPoliciesKey); err == nil { + return pm, nil + } else if !system.IsErrSystemAttrNotFound(err) { + return nil, errors.Wrap(err, "failed to get checker policies map") + } + + // No policies have been set + pm := svc.defaultPolicyMap() + + if err := svc.setCheckerPolicyMap(pm.ToSlice()); err != nil { + svc.log.Errorf("failed to set default policies: %s", err.Error()) + } + return pm, nil +} + +func (svc *mgmtSvc) getCheckerPolicyMapWithKey(key string) (policyMap, error) { + var polList []*mgmtpb.CheckInconsistPolicy + polStr, err := system.GetMgmtProperty(svc.sysdb, key) + if err != nil { + return nil, err + } + + if err := json.Unmarshal([]byte(polStr), &polList); err != nil { + return nil, errors.Wrap(err, "failed to unmarshal checker policies map") + } + + pm := make(policyMap) + + for _, pol := range polList { + pm[pol.InconsistCas] = pol + } + + return pm, nil +} + +func (svc *mgmtSvc) defaultPolicyMap() policyMap { + pm := make(policyMap) + for cicEnum := range chkpb.CheckInconsistClass_name { + cic := chkpb.CheckInconsistClass(cicEnum) + if cic == chkpb.CheckInconsistClass_CIC_NONE || cic == chkpb.CheckInconsistClass_CIC_UNKNOWN { + continue + } + pm[cic] = &mgmtpb.CheckInconsistPolicy{ + InconsistCas: cic, + InconsistAct: chkpb.CheckInconsistAction_CIA_DEFAULT, + } + } + return pm +} + +func (svc *mgmtSvc) getLastPoliciesUsed() (policyMap, error) { + pm, err := svc.getCheckerPolicyMapWithKey(checkerLatestPolicyKey) + if system.IsErrSystemAttrNotFound(err) { + return nil, errNoSavedPolicies + } + return pm, nil +} + +// SystemCheckGetPolicy fetches the policies for the system checker. +func (svc *mgmtSvc) SystemCheckGetPolicy(ctx context.Context, req *mgmtpb.CheckGetPolicyReq) (*mgmtpb.CheckGetPolicyResp, error) { + if err := svc.checkLeaderRequest(wrapCheckerReq(req)); err != nil { + return nil, err + } + + if err := svc.verifyCheckerReady(); err != nil { + return nil, err + } + + resp := new(mgmtpb.CheckGetPolicyResp) + + var pm policyMap + var err error + if req.LastUsed { + pm, err = svc.getLastPoliciesUsed() + if errors.Is(err, errNoSavedPolicies) { + pm, err = svc.getCheckerPolicyMap() + } + } else { + pm, err = svc.getCheckerPolicyMap() + } + if err != nil { + return nil, err + } + + resp.Policies = pm.ToSlice(req.Classes...) + + return resp, nil +} + +func (svc *mgmtSvc) setCheckerPolicyMap(polList []*mgmtpb.CheckInconsistPolicy) error { + polStr, err := json.Marshal(polList) + if err != nil { + return errors.Wrap(err, "failed to marshal checker policies map") + } + + return system.SetMgmtProperty(svc.sysdb, checkerPoliciesKey, string(polStr)) +} + +// SystemCheckSetPolicy sets checker policies in the policy map. +func (svc *mgmtSvc) SystemCheckSetPolicy(ctx context.Context, req *mgmtpb.CheckSetPolicyReq) (*mgmtpb.DaosResp, error) { + if err := svc.checkLeaderRequest(wrapCheckerReq(req)); err != nil { + return nil, err + } + + if err := svc.verifyCheckerReady(); err != nil { + return nil, err + } + + policies, err := svc.mergePoliciesWithCurrent(req.Policies) + if err != nil { + return nil, err + } + + if err := svc.setCheckerPolicyMap(policies); err != nil { + return nil, err + } + + return &mgmtpb.DaosResp{}, nil +} + +// SystemCheckRepair repairs a previous checker finding. +func (svc *mgmtSvc) SystemCheckRepair(ctx context.Context, req *mgmtpb.CheckActReq) (*mgmtpb.CheckActResp, error) { + if err := svc.checkLeaderRequest(wrapCheckerReq(req)); err != nil { + return nil, err + } + + f, err := svc.sysdb.GetCheckerFinding(req.Seq) + if err != nil { + return nil, err + } + + if !f.HasChoice(req.Act) { + return nil, errors.Errorf("invalid action %s (must be one of %s)", req.Act, f.ValidChoicesString()) + } + + dResp, err := svc.makeCheckerCall(ctx, drpc.MethodCheckerAction, req) + if err != nil { + return nil, err + } + + resp := new(mgmtpb.CheckActResp) + if err = proto.Unmarshal(dResp.Body, resp); err != nil { + return nil, errors.Wrap(err, "unmarshal CheckRepair response") + } + + if resp.Status == 0 { + if err := svc.sysdb.SetCheckerFindingAction(req.Seq, int32(req.Act)); err != nil { + return nil, err + } + svc.log.Debugf("Set action %s for finding %d", req.Act, req.Seq) + } + + return resp, nil +} diff --git a/src/control/server/mgmt_check_test.go b/src/control/server/mgmt_check_test.go new file mode 100644 index 00000000000..0465dec5e19 --- /dev/null +++ b/src/control/server/mgmt_check_test.go @@ -0,0 +1,643 @@ +// +// (C) Copyright 2023 Intel Corporation. +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// + +package server + +import ( + "net" + "sort" + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + "github.com/pkg/errors" + "google.golang.org/protobuf/proto" + + "github.com/daos-stack/daos/src/control/build" + "github.com/daos-stack/daos/src/control/common/proto/chk" + chkpb "github.com/daos-stack/daos/src/control/common/proto/chk" + "github.com/daos-stack/daos/src/control/common/proto/mgmt" + mgmtpb "github.com/daos-stack/daos/src/control/common/proto/mgmt" + "github.com/daos-stack/daos/src/control/common/test" + "github.com/daos-stack/daos/src/control/lib/daos" + "github.com/daos-stack/daos/src/control/logging" + "github.com/daos-stack/daos/src/control/system" + "github.com/daos-stack/daos/src/control/system/checker" + "github.com/daos-stack/daos/src/control/system/raft" +) + +var defaultPolicies = testPoliciesWithAction(chkpb.CheckInconsistAction_CIA_DEFAULT) + +func testPoliciesWithAction(action chkpb.CheckInconsistAction) []*mgmtpb.CheckInconsistPolicy { + policies := make([]*mgmtpb.CheckInconsistPolicy, 0, len(chkpb.CheckInconsistClass_name)) + + for cls := range chkpb.CheckInconsistClass_name { + checkCls := chkpb.CheckInconsistClass(cls) + if checkCls == chkpb.CheckInconsistClass_CIC_NONE || checkCls == chkpb.CheckInconsistClass_CIC_UNKNOWN { + continue + } + policies = append(policies, &mgmtpb.CheckInconsistPolicy{ + InconsistCas: checkCls, + InconsistAct: action, + }) + } + + sort.Slice(policies, func(i, j int) bool { return policies[i].InconsistCas < policies[j].InconsistCas }) + return policies +} + +func testSvcWithMemberState(t *testing.T, log logging.Logger, state system.MemberState, testPoolUUIDs []string) *mgmtSvc { + t.Helper() + + t.Logf("creating a test MS with member state %s", state) + + svc := newTestMgmtSvc(t, log) + addTestPools(t, svc.sysdb, testPoolUUIDs...) + + updateTestMemberState(t, svc, state) + return svc +} + +func updateTestMemberState(t *testing.T, svc *mgmtSvc, state system.MemberState) { + members, err := svc.sysdb.AllMembers() + if err != nil { + t.Fatal(err) + } + for _, m := range members { + m.State = state + if err := svc.sysdb.UpdateMember(m); err != nil { + t.Fatal(err) + } + } +} + +func testSvcCheckerEnabled(t *testing.T, log logging.Logger, state system.MemberState, testPoolUUIDs []string) *mgmtSvc { + t.Helper() + + svc := testSvcWithMemberState(t, log, state, testPoolUUIDs) + if err := svc.enableChecker(); err != nil { + t.Fatal(err) + } + return svc +} + +func testPoolUUIDs(numTestPools int) []string { + uuids := []string{} + for i := 0; i < numTestPools; i++ { + uuids = append(uuids, test.MockPoolUUID(int32(i+1)).String()) + } + return uuids +} + +func mergeTestPolicies(current, merge []*mgmtpb.CheckInconsistPolicy) []*mgmtpb.CheckInconsistPolicy { + polMap := make(policyMap) + for _, cur := range current { + polMap[cur.InconsistCas] = cur + } + for _, toMerge := range merge { + polMap[toMerge.InconsistCas] = toMerge + } + return polMap.ToSlice() +} + +func TestServer_mgmtSvc_SystemCheckStart(t *testing.T) { + specificPolicies := []*mgmtpb.CheckInconsistPolicy{ + { + InconsistCas: chk.CheckInconsistClass_CIC_CONT_NONEXIST_ON_PS, + InconsistAct: chkpb.CheckInconsistAction_CIA_IGNORE, + }, + { + InconsistCas: chk.CheckInconsistClass_CIC_CONT_BAD_LABEL, + InconsistAct: chkpb.CheckInconsistAction_CIA_INTERACT, + }, + } + testPolicies := testPoliciesWithAction(chkpb.CheckInconsistAction_CIA_INTERACT) + + uuids := testPoolUUIDs(3) + testFindings := func() []*checker.Finding { + findings := []*checker.Finding{} + for i, uuid := range uuids { + f := &checker.Finding{CheckReport: chkpb.CheckReport{ + Seq: uint64(i + 1), + PoolUuid: uuid, + }} + findings = append(findings, f) + } + return findings + } + + for name, tc := range map[string]struct { + createMS func(*testing.T, logging.Logger) *mgmtSvc + setupDrpc func(*testing.T, *mgmtSvc) + req *mgmtpb.CheckStartReq + expResp *mgmtpb.CheckStartResp + expErr error + expFindings []*checker.Finding + expPolicies []*mgmtpb.CheckInconsistPolicy + }{ + "checker is not enabled": { + createMS: func(t *testing.T, log logging.Logger) *mgmtSvc { + return testSvcWithMemberState(t, log, system.MemberStateStopped, uuids) + }, + req: &mgmtpb.CheckStartReq{ + Sys: "daos_server", + }, + expErr: checker.FaultCheckerNotEnabled, + }, + "bad member states": { + createMS: func(t *testing.T, log logging.Logger) *mgmtSvc { + return testSvcCheckerEnabled(t, log, system.MemberStateJoined, uuids) + }, + req: &mgmtpb.CheckStartReq{ + Sys: "daos_server", + }, + expErr: errors.New("expected states"), + }, + "corrupted policy map": { + createMS: func(t *testing.T, log logging.Logger) *mgmtSvc { + svc := testSvcCheckerEnabled(t, log, system.MemberStateCheckerStarted, uuids) + if err := system.SetMgmtProperty(svc.sysdb, checkerPoliciesKey, "garbage"); err != nil { + t.Fatal(err) + } + return svc + }, + req: &mgmtpb.CheckStartReq{ + Sys: "daos_server", + }, + expErr: errors.New("unmarshal checker policies"), + }, + "dRPC fails": { + setupDrpc: func(t *testing.T, ms *mgmtSvc) { + setupMockDrpcClient(ms, nil, errors.New("mock dRPC")) + }, + req: &mgmtpb.CheckStartReq{ + Sys: "daos_server", + }, + expErr: errors.New("mock dRPC"), + expFindings: testFindings(), + expPolicies: testPolicies, + }, + "bad resp": { + setupDrpc: func(t *testing.T, ms *mgmtSvc) { + setupMockDrpcClientBytes(ms, []byte("garbage"), nil) + }, + req: &mgmtpb.CheckStartReq{ + Sys: "daos_server", + }, + expErr: errors.New("unmarshal CheckStart response"), + expFindings: testFindings(), + expPolicies: testPolicies, + }, + "request failed": { + setupDrpc: func(t *testing.T, ms *mgmtSvc) { + setupMockDrpcClient(ms, &mgmt.CheckStartResp{Status: int32(daos.MiscError)}, nil) + }, + req: &mgmtpb.CheckStartReq{ + Sys: "daos_server", + }, + expResp: &mgmt.CheckStartResp{Status: int32(daos.MiscError)}, + expFindings: testFindings(), + expPolicies: testPolicies, + }, + "no reset": { + req: &mgmtpb.CheckStartReq{ + Sys: "daos_server", + }, + expResp: &mgmtpb.CheckStartResp{}, + expFindings: testFindings(), + expPolicies: testPolicies, + }, + "reset": { + req: &mgmtpb.CheckStartReq{ + Sys: "daos_server", + Flags: uint32(chkpb.CheckFlag_CF_RESET), + }, + setupDrpc: func(t *testing.T, ms *mgmtSvc) { + // engine returns status > 0 to indicate reset + setupMockDrpcClient(ms, &mgmt.CheckStartResp{Status: 1}, nil) + }, + expResp: &mgmtpb.CheckStartResp{}, + expFindings: []*checker.Finding{}, + expPolicies: testPolicies, + }, + "reset specific pools": { + req: &mgmtpb.CheckStartReq{ + Sys: "daos_server", + Flags: uint32(chkpb.CheckFlag_CF_RESET), + Uuids: []string{uuids[0], uuids[2]}, + }, + setupDrpc: func(t *testing.T, ms *mgmtSvc) { + // engine returns status > 0 to indicate reset + setupMockDrpcClient(ms, &mgmt.CheckStartResp{Status: 1}, nil) + }, + expResp: &mgmtpb.CheckStartResp{}, + expFindings: []*checker.Finding{ + { + CheckReport: chkpb.CheckReport{ + Seq: 2, + PoolUuid: uuids[1], + }, + }, + }, + expPolicies: testPolicies, + }, + "no policy map": { + createMS: func(t *testing.T, log logging.Logger) *mgmtSvc { + return testSvcCheckerEnabled(t, log, system.MemberStateCheckerStarted, uuids) + }, + req: &mgmtpb.CheckStartReq{ + Sys: "daos_server", + }, + expResp: &mgmtpb.CheckStartResp{}, + expPolicies: defaultPolicies, + }, + "req policies": { + req: &mgmtpb.CheckStartReq{ + Sys: "daos_server", + Policies: specificPolicies, + }, + expResp: &mgmtpb.CheckStartResp{}, + expPolicies: mergeTestPolicies(testPolicies, specificPolicies), + }, + } { + t.Run(name, func(t *testing.T) { + log, buf := logging.NewTestLogger(t.Name()) + defer test.ShowBufferOnFailure(t, buf) + + if tc.createMS == nil { + tc.createMS = func(t *testing.T, log logging.Logger) *mgmtSvc { + svc := testSvcCheckerEnabled(t, log, system.MemberStateCheckerStarted, uuids) + if err := svc.setCheckerPolicyMap(testPolicies); err != nil { + t.Fatal(err) + } + for _, f := range testFindings() { + if err := svc.sysdb.AddCheckerFinding(f); err != nil { + t.Fatal(err) + } + } + return svc + } + } + svc := tc.createMS(t, log) + + if tc.setupDrpc == nil { + tc.setupDrpc = func(t *testing.T, ms *mgmtSvc) { + setupMockDrpcClient(ms, &mgmtpb.CheckStartResp{}, nil) + } + } + tc.setupDrpc(t, svc) + + resp, err := svc.SystemCheckStart(test.Context(t), tc.req) + + test.CmpErr(t, tc.expErr, err) + + if diff := cmp.Diff(tc.expResp, resp, cmpopts.IgnoreUnexported(mgmtpb.CheckStartResp{})); diff != "" { + t.Fatalf("want-, got+:\n%s", diff) + } + + if tc.expFindings != nil { + findings, err := svc.sysdb.GetCheckerFindings() + sort.Slice(findings, func(i, j int) bool { + return findings[i].Seq < findings[j].Seq + }) + test.CmpErr(t, nil, err) + if diff := cmp.Diff(tc.expFindings, findings, cmpopts.IgnoreUnexported(chkpb.CheckReport{})); diff != "" { + t.Fatalf("want-, got+:\n%s", diff) + } + } + + // Check contents of drpc payload + ei, ok := svc.harness.instances[0].(*EngineInstance) + if !ok { + t.Fatalf("bad engine instance type %T", svc.harness.instances[0]) + } + mockDrpc, ok := ei._drpcClient.(*mockDrpcClient) + if !ok { + t.Fatalf("bad drpc client type type %T", ei._drpcClient) + } + + drpcInput := new(mgmtpb.CheckStartReq) + calls := mockDrpc.calls.get() + if len(calls) == 0 { + return + } + + if err := proto.Unmarshal(mockDrpc.calls.get()[0].Body, drpcInput); err != nil { + t.Fatal(err) + } + + // ensure the slices are in the same order + sort.Slice(tc.expPolicies, func(i, j int) bool { return tc.expPolicies[i].InconsistCas < tc.expPolicies[j].InconsistCas }) + sort.Slice(drpcInput.Policies, func(i, j int) bool { return drpcInput.Policies[i].InconsistCas < drpcInput.Policies[j].InconsistCas }) + if diff := cmp.Diff(tc.expPolicies, drpcInput.Policies, cmpopts.IgnoreUnexported(mgmtpb.CheckInconsistPolicy{})); diff != "" { + t.Fatalf("want-, got+:\n%s", diff) + } + + // last used policies should be set + lastPM, err := svc.getLastPoliciesUsed() + if err != nil { + t.Fatal(err) + } + lastPol := lastPM.ToSlice() + if diff := cmp.Diff(tc.expPolicies, lastPol, cmpopts.IgnoreUnexported(mgmtpb.CheckInconsistPolicy{})); diff != "" { + t.Fatalf("want-, got+:\n%s", diff) + } + }) + } +} + +func TestServer_mgmtSvc_SystemCheckGetPolicy(t *testing.T) { + uuids := testPoolUUIDs(4) + + for name, tc := range map[string]struct { + createMS func(*testing.T, logging.Logger) *mgmtSvc + req *mgmtpb.CheckGetPolicyReq + expResp *mgmtpb.CheckGetPolicyResp + expErr error + }{ + "not MS replica": { + createMS: func(t *testing.T, log logging.Logger) *mgmtSvc { + svc := newTestMgmtSvc(t, log) + svc.sysdb = raft.MockDatabaseWithCfg(t, log, &raft.DatabaseConfig{ + SystemName: build.DefaultSystemName, + Replicas: []*net.TCPAddr{{IP: net.IP{111, 222, 1, 1}}}, + }) + return svc + }, + req: &mgmtpb.CheckGetPolicyReq{ + Sys: "daos_server", + }, + expErr: errors.New("replica"), + }, + "checker is not enabled": { + createMS: func(t *testing.T, log logging.Logger) *mgmtSvc { + return testSvcWithMemberState(t, log, system.MemberStateCheckerStarted, uuids) + }, + req: &mgmtpb.CheckGetPolicyReq{ + Sys: "daos_server", + }, + expErr: checker.FaultCheckerNotEnabled, + }, + "bad member states": { + createMS: func(t *testing.T, log logging.Logger) *mgmtSvc { + return testSvcCheckerEnabled(t, log, system.MemberStateJoined, uuids) + }, + req: &mgmtpb.CheckGetPolicyReq{ + Sys: "daos_server", + }, + expErr: errors.New("expected states"), + }, + "corrupted policy map": { + createMS: func(t *testing.T, log logging.Logger) *mgmtSvc { + svc := testSvcCheckerEnabled(t, log, system.MemberStateCheckerStarted, uuids) + if err := system.SetMgmtProperty(svc.sysdb, checkerPoliciesKey, "garbage"); err != nil { + t.Fatal(err) + } + return svc + }, + req: &mgmtpb.CheckGetPolicyReq{ + Sys: "daos_server", + }, + expErr: errors.New("unmarshal checker policies"), + }, + "default policies": { + req: &mgmtpb.CheckGetPolicyReq{ + Sys: "daos_server", + }, + expResp: &mgmtpb.CheckGetPolicyResp{ + Policies: defaultPolicies, + }, + }, + "requested classes": { + req: &mgmtpb.CheckGetPolicyReq{ + Sys: "daos_server", + Classes: []chkpb.CheckInconsistClass{ + chkpb.CheckInconsistClass_CIC_CONT_NONEXIST_ON_PS, + chkpb.CheckInconsistClass_CIC_CONT_BAD_LABEL, + }, + }, + expResp: &mgmtpb.CheckGetPolicyResp{ + Policies: []*mgmtpb.CheckInconsistPolicy{ + { + InconsistCas: chkpb.CheckInconsistClass_CIC_CONT_NONEXIST_ON_PS, + InconsistAct: chkpb.CheckInconsistAction_CIA_DEFAULT, + }, + { + InconsistCas: chkpb.CheckInconsistClass_CIC_CONT_BAD_LABEL, + InconsistAct: chkpb.CheckInconsistAction_CIA_DEFAULT, + }, + }, + }, + }, + "non-default policies": { + createMS: func(t *testing.T, log logging.Logger) *mgmtSvc { + svc := testSvcCheckerEnabled(t, log, system.MemberStateCheckerStarted, uuids) + if err := svc.setCheckerPolicyMap(testPoliciesWithAction(chkpb.CheckInconsistAction_CIA_INTERACT)); err != nil { + t.Fatal(err) + } + return svc + }, + req: &mgmtpb.CheckGetPolicyReq{ + Sys: "daos_server", + }, + expResp: &mgmtpb.CheckGetPolicyResp{ + Policies: testPoliciesWithAction(chkpb.CheckInconsistAction_CIA_INTERACT), + }, + }, + "latest policy": { + createMS: func(t *testing.T, log logging.Logger) *mgmtSvc { + svc := testSvcCheckerEnabled(t, log, system.MemberStateCheckerStarted, uuids) + latestPolicies := testPoliciesWithAction(chkpb.CheckInconsistAction_CIA_TRUST_MS) + if err := svc.setLastPoliciesUsed(latestPolicies); err != nil { + t.Fatal(err) + } + return svc + }, + req: &mgmtpb.CheckGetPolicyReq{ + Sys: "daos_server", + LastUsed: true, + }, + expResp: &mgmtpb.CheckGetPolicyResp{ + Policies: testPoliciesWithAction(chkpb.CheckInconsistAction_CIA_TRUST_MS), + }, + }, + "no latest policy saved": { + req: &mgmtpb.CheckGetPolicyReq{ + Sys: "daos_server", + LastUsed: true, + }, + expResp: &mgmtpb.CheckGetPolicyResp{ + Policies: testPoliciesWithAction(chkpb.CheckInconsistAction_CIA_DEFAULT), + }, + }, + "latest policy with requested classes": { + createMS: func(t *testing.T, log logging.Logger) *mgmtSvc { + svc := testSvcCheckerEnabled(t, log, system.MemberStateCheckerStarted, uuids) + latestPolicies := testPoliciesWithAction(chkpb.CheckInconsistAction_CIA_TRUST_MS) + if err := svc.setLastPoliciesUsed(latestPolicies); err != nil { + t.Fatal(err) + } + return svc + }, + req: &mgmtpb.CheckGetPolicyReq{ + Sys: "daos_server", + Classes: []chkpb.CheckInconsistClass{ + chkpb.CheckInconsistClass_CIC_CONT_NONEXIST_ON_PS, + chkpb.CheckInconsistClass_CIC_CONT_BAD_LABEL, + }, + LastUsed: true, + }, + expResp: &mgmtpb.CheckGetPolicyResp{ + Policies: []*mgmtpb.CheckInconsistPolicy{ + { + InconsistCas: chkpb.CheckInconsistClass_CIC_CONT_NONEXIST_ON_PS, + InconsistAct: chkpb.CheckInconsistAction_CIA_TRUST_MS, + }, + { + InconsistCas: chkpb.CheckInconsistClass_CIC_CONT_BAD_LABEL, + InconsistAct: chkpb.CheckInconsistAction_CIA_TRUST_MS, + }, + }, + }, + }, + } { + t.Run(name, func(t *testing.T) { + log, buf := logging.NewTestLogger(t.Name()) + defer test.ShowBufferOnFailure(t, buf) + + if tc.createMS == nil { + tc.createMS = func(t *testing.T, log logging.Logger) *mgmtSvc { + return testSvcCheckerEnabled(t, log, system.MemberStateCheckerStarted, uuids) + } + } + svc := tc.createMS(t, log) + + resp, err := svc.SystemCheckGetPolicy(test.Context(t), tc.req) + + test.CmpErr(t, tc.expErr, err) + + if diff := cmp.Diff(tc.expResp, resp, cmpopts.IgnoreUnexported(mgmtpb.CheckGetPolicyResp{}, mgmtpb.CheckInconsistPolicy{})); diff != "" { + t.Fatalf("want-, got+:\n%s", diff) + } + }) + } +} + +func TestServer_mgmtSvc_SystemCheckSetPolicy(t *testing.T) { + uuids := testPoolUUIDs(4) + interactReq := &mgmtpb.CheckSetPolicyReq{ + Sys: "daos_server", + Policies: testPoliciesWithAction(chkpb.CheckInconsistAction_CIA_INTERACT), + } + + for name, tc := range map[string]struct { + createMS func(*testing.T, logging.Logger) *mgmtSvc + req *mgmtpb.CheckSetPolicyReq + expResp *mgmtpb.DaosResp + expErr error + expPolicies []*mgmtpb.CheckInconsistPolicy + }{ + "not MS replica": { + createMS: func(t *testing.T, log logging.Logger) *mgmtSvc { + svc := newTestMgmtSvc(t, log) + svc.sysdb = raft.MockDatabaseWithCfg(t, log, &raft.DatabaseConfig{ + SystemName: build.DefaultSystemName, + Replicas: []*net.TCPAddr{{IP: net.IP{111, 222, 1, 1}}}, + }) + return svc + }, + req: interactReq, + expErr: errors.New("replica"), + }, + "checker is not enabled": { + createMS: func(t *testing.T, log logging.Logger) *mgmtSvc { + return testSvcWithMemberState(t, log, system.MemberStateCheckerStarted, uuids) + }, + req: interactReq, + expErr: checker.FaultCheckerNotEnabled, + }, + "bad member states": { + createMS: func(t *testing.T, log logging.Logger) *mgmtSvc { + return testSvcCheckerEnabled(t, log, system.MemberStateJoined, uuids) + }, + req: interactReq, + expErr: errors.New("expected states"), + }, + "corrupted policy map": { + createMS: func(t *testing.T, log logging.Logger) *mgmtSvc { + svc := testSvcCheckerEnabled(t, log, system.MemberStateCheckerStarted, uuids) + if err := system.SetMgmtProperty(svc.sysdb, checkerPoliciesKey, "garbage"); err != nil { + t.Fatal(err) + } + return svc + }, + req: interactReq, + expErr: errors.New("unmarshal checker policies"), + }, + "no policies in request": { + req: &mgmtpb.CheckSetPolicyReq{ + Sys: "daos_server", + }, + expResp: &mgmtpb.DaosResp{}, + expPolicies: testPoliciesWithAction(chkpb.CheckInconsistAction_CIA_DEFAULT), + }, + "set all policies": { + req: interactReq, + expResp: &mgmtpb.DaosResp{}, + expPolicies: interactReq.Policies, + }, + "set single policy": { + req: &mgmtpb.CheckSetPolicyReq{ + Sys: "daos_server", + Policies: []*mgmtpb.CheckInconsistPolicy{ + { + InconsistCas: chkpb.CheckInconsistClass_CIC_CONT_NONEXIST_ON_PS, + InconsistAct: chkpb.CheckInconsistAction_CIA_TRUST_MS, + }, + }, + }, + expResp: &mgmtpb.DaosResp{}, + expPolicies: mergeTestPolicies(testPoliciesWithAction(chkpb.CheckInconsistAction_CIA_DEFAULT), + []*mgmtpb.CheckInconsistPolicy{ + { + InconsistCas: chkpb.CheckInconsistClass_CIC_CONT_NONEXIST_ON_PS, + InconsistAct: chkpb.CheckInconsistAction_CIA_TRUST_MS, + }, + }), + }, + } { + t.Run(name, func(t *testing.T) { + log, buf := logging.NewTestLogger(t.Name()) + defer test.ShowBufferOnFailure(t, buf) + + if tc.createMS == nil { + tc.createMS = func(t *testing.T, log logging.Logger) *mgmtSvc { + return testSvcCheckerEnabled(t, log, system.MemberStateCheckerStarted, uuids) + } + } + svc := tc.createMS(t, log) + + resp, err := svc.SystemCheckSetPolicy(test.Context(t), tc.req) + + test.CmpErr(t, tc.expErr, err) + if diff := cmp.Diff(tc.expResp, resp, cmpopts.IgnoreUnexported(mgmtpb.DaosResp{})); diff != "" { + t.Fatalf("want-, got+:\n%s", diff) + } + + if tc.expPolicies == nil { + return + } + + policies, err := svc.getCheckerPolicyMap() + if err != nil { + t.Fatal(err) + } + if diff := cmp.Diff(tc.expPolicies, policies.ToSlice(), cmpopts.IgnoreUnexported(mgmtpb.CheckInconsistPolicy{})); diff != "" { + t.Fatalf("want-, got+:\n%s", diff) + } + }) + } +} diff --git a/src/control/server/mgmt_drpc.go b/src/control/server/mgmt_drpc.go index 6476e3c3121..1f287964a5b 100644 --- a/src/control/server/mgmt_drpc.go +++ b/src/control/server/mgmt_drpc.go @@ -21,6 +21,8 @@ import ( "github.com/daos-stack/daos/src/control/lib/ranklist" "github.com/daos-stack/daos/src/control/logging" "github.com/daos-stack/daos/src/control/system" + "github.com/daos-stack/daos/src/control/system/checker" + "github.com/daos-stack/daos/src/control/system/raft" ) // mgmtModule represents the daos_server mgmt dRPC module. It sends dRPCs to @@ -42,35 +44,42 @@ func (mod *mgmtModule) ID() drpc.ModuleID { return drpc.ModuleMgmt } -// poolResolver defines an interface to be implemented by -// something that can resolve a pool ID into a PoolService. -type poolResolver interface { +// poolDatabase defines an interface to be implemented by +// a system pool database. +type poolDatabase interface { FindPoolServiceByLabel(string) (*system.PoolService, error) FindPoolServiceByUUID(uuid.UUID) (*system.PoolService, error) + PoolServiceList(bool) ([]*system.PoolService, error) + AddPoolService(context.Context, *system.PoolService) error + RemovePoolService(context.Context, uuid.UUID) error + UpdatePoolService(context.Context, *system.PoolService) error + TakePoolLock(context.Context, uuid.UUID) (*raft.PoolLock, error) } // srvModule represents the daos_server dRPC module. It handles dRPCs sent by // the daos_engine (src/engine). type srvModule struct { - log logging.Logger - sysdb poolResolver - engines []Engine - events *events.PubSub + log logging.Logger + poolDB poolDatabase + checkerDB checker.FindingStore + engines []Engine + events *events.PubSub } // newSrvModule creates a new srv module references to the system database, // resident EngineInstances and event publish subscribe reference. -func newSrvModule(log logging.Logger, sysdb poolResolver, engines []Engine, events *events.PubSub) *srvModule { +func newSrvModule(log logging.Logger, pdb poolDatabase, cdb checker.FindingStore, engines []Engine, events *events.PubSub) *srvModule { return &srvModule{ - log: log, - sysdb: sysdb, - engines: engines, - events: events, + log: log, + poolDB: pdb, + checkerDB: cdb, + engines: engines, + events: events, } } // HandleCall is the handler for calls to the srvModule. -func (mod *srvModule) HandleCall(_ context.Context, session *drpc.Session, method drpc.Method, req []byte) ([]byte, error) { +func (mod *srvModule) HandleCall(ctx context.Context, session *drpc.Session, method drpc.Method, req []byte) ([]byte, error) { switch method { case drpc.MethodNotifyReady: return nil, mod.handleNotifyReady(req) @@ -80,6 +89,14 @@ func (mod *srvModule) HandleCall(_ context.Context, session *drpc.Session, metho return mod.handlePoolFindByLabel(req) case drpc.MethodClusterEvent: return mod.handleClusterEvent(req) + case drpc.MethodCheckerListPools: + return mod.handleCheckerListPools(ctx, req) + case drpc.MethodCheckerRegisterPool: + return mod.handleCheckerRegisterPool(ctx, req) + case drpc.MethodCheckerDeregisterPool: + return mod.handleCheckerDeregisterPool(ctx, req) + case drpc.MethodCheckerReport: + return mod.handleCheckerReport(ctx, req) default: return nil, drpc.UnknownMethodFailure() } @@ -105,7 +122,7 @@ func (mod *srvModule) handleGetPoolServiceRanks(reqb []byte) ([]byte, error) { resp := new(srvpb.GetPoolSvcResp) - ps, err := mod.sysdb.FindPoolServiceByUUID(uuid) + ps, err := mod.poolDB.FindPoolServiceByUUID(uuid) if err != nil || ps.State != system.PoolServiceStateReady { resp.Status = int32(daos.Nonexistent) mod.log.Debugf("GetPoolSvcResp: %+v", resp) @@ -129,7 +146,7 @@ func (mod *srvModule) handlePoolFindByLabel(reqb []byte) ([]byte, error) { resp := new(srvpb.PoolFindByLabelResp) - ps, err := mod.sysdb.FindPoolServiceByLabel(req.GetLabel()) + ps, err := mod.poolDB.FindPoolServiceByLabel(req.GetLabel()) if err != nil || ps.State != system.PoolServiceStateReady { resp.Status = int32(daos.Nonexistent) mod.log.Debugf("PoolFindByLabelResp: %+v", resp) diff --git a/src/control/server/mgmt_drpc_checker.go b/src/control/server/mgmt_drpc_checker.go new file mode 100644 index 00000000000..7fb1ebe7efb --- /dev/null +++ b/src/control/server/mgmt_drpc_checker.go @@ -0,0 +1,232 @@ +// +// (C) Copyright 2022 Intel Corporation. +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// + +package server + +import ( + "context" + + "github.com/google/uuid" + "google.golang.org/protobuf/proto" + + srvpb "github.com/daos-stack/daos/src/control/common/proto/srv" + "github.com/daos-stack/daos/src/control/drpc" + "github.com/daos-stack/daos/src/control/lib/daos" + "github.com/daos-stack/daos/src/control/lib/ranklist" + "github.com/daos-stack/daos/src/control/system" + "github.com/daos-stack/daos/src/control/system/checker" +) + +func (mod *srvModule) handleCheckerListPools(_ context.Context, reqb []byte) (out []byte, outErr error) { + // TODO: Remove if we never add request fields? + req := new(srvpb.CheckListPoolReq) + if err := proto.Unmarshal(reqb, req); err != nil { + return nil, drpc.UnmarshalingPayloadFailure() + } + mod.log.Debugf("handling CheckerListPools: %+v", req) + + resp := new(srvpb.CheckListPoolResp) + defer func() { + mod.log.Debugf("CheckerListPools resp: %+v", resp) + out, outErr = proto.Marshal(resp) + }() + + pools, err := mod.poolDB.PoolServiceList(true) + if err != nil { + mod.log.Errorf("failed to list pools: %s", err) + resp.Status = int32(daos.MiscError) + return + } + + for _, ps := range pools { + resp.Pools = append(resp.Pools, &srvpb.CheckListPoolResp_OnePool{ + Uuid: ps.PoolUUID.String(), + Label: ps.PoolLabel, + Svcreps: ranklist.RanksToUint32(ps.Replicas), + }) + } + + return +} + +func (mod *srvModule) handleCheckerRegisterPool(parent context.Context, reqb []byte) (out []byte, outErr error) { + req := new(srvpb.CheckRegPoolReq) + if err := proto.Unmarshal(reqb, req); err != nil { + return nil, drpc.UnmarshalingPayloadFailure() + } + mod.log.Debugf("handling CheckerRegisterPool: %+v", req) + + resp := new(srvpb.CheckRegPoolResp) + defer func() { + mod.log.Debugf("CheckerRegisterPool resp: %+v", resp) + out, outErr = proto.Marshal(resp) + }() + + poolUUID, err := uuid.Parse(req.Uuid) + if err != nil { + mod.log.Errorf("invalid pool UUID %q: %s", req.Uuid, err) + resp.Status = int32(daos.InvalidInput) + return + } + if !daos.LabelIsValid(req.Label) { + mod.log.Errorf("bad pool label %q", req.Label) + resp.Status = int32(daos.InvalidInput) + return + } + if len(req.Svcreps) == 0 { + mod.log.Errorf("pool %q has zero svcreps", req.Uuid) + resp.Status = int32(daos.InvalidInput) + return + } + + lock, err := mod.poolDB.TakePoolLock(parent, poolUUID) + if err != nil { + mod.log.Errorf("failed to take pool lock: %s", err) + resp.Status = int32(daos.MiscError) + return + } + defer lock.Release() + ctx := lock.InContext(parent) + + ps, err := mod.poolDB.FindPoolServiceByUUID(poolUUID) + if err == nil { + // We're updating an existing pool service. + if ps.PoolLabel != req.Label { + if _, err := mod.poolDB.FindPoolServiceByLabel(req.Label); err == nil { + mod.log.Errorf("pool with label %q already exists", req.Label) + resp.Status = int32(daos.Exists) + return + } + } + ps.PoolLabel = req.Label + ps.Replicas = ranklist.RanksFromUint32(req.Svcreps) + + mod.log.Debugf("updating pool service from req: %+v", req) + if err := mod.poolDB.UpdatePoolService(ctx, ps); err != nil { + mod.log.Errorf("failed to update pool: %s", err) + resp.Status = int32(daos.MiscError) + return + } + + return + } else if !system.IsPoolNotFound(err) { + mod.log.Errorf("failed to find pool: %s", err) + resp.Status = int32(daos.MiscError) + return + } + + if _, err := mod.poolDB.FindPoolServiceByLabel(req.Label); err == nil { + mod.log.Errorf("pool with label %q already exists", req.Label) + resp.Status = int32(daos.Exists) + return + } + + ps = &system.PoolService{ + PoolUUID: poolUUID, + PoolLabel: req.Label, + State: system.PoolServiceStateReady, + Replicas: ranklist.RanksFromUint32(req.Svcreps), + } + + mod.log.Debugf("adding pool service from req: %+v", req) + if err := mod.poolDB.AddPoolService(ctx, ps); err != nil { + mod.log.Errorf("failed to register pool: %s", err) + resp.Status = int32(daos.MiscError) + return + } + + return +} + +func (mod *srvModule) handleCheckerDeregisterPool(parent context.Context, reqb []byte) (out []byte, outErr error) { + req := new(srvpb.CheckDeregPoolReq) + if err := proto.Unmarshal(reqb, req); err != nil { + return nil, drpc.UnmarshalingPayloadFailure() + } + mod.log.Debugf("handling CheckerDeregisterPool: %+v", req) + + resp := new(srvpb.CheckDeregPoolResp) + defer func() { + mod.log.Debugf("CheckerDeregisterPool resp: %+v", resp) + out, outErr = proto.Marshal(resp) + }() + + poolUUID, err := uuid.Parse(req.Uuid) + if err != nil { + mod.log.Errorf("invalid pool UUID %q: %s", req.Uuid, err) + resp.Status = int32(daos.InvalidInput) + return + } + + lock, err := mod.poolDB.TakePoolLock(parent, poolUUID) + if err != nil { + mod.log.Errorf("failed to take pool lock: %s", err) + resp.Status = int32(daos.MiscError) + return + } + defer lock.Release() + ctx := lock.InContext(parent) + + if _, err := mod.poolDB.FindPoolServiceByUUID(poolUUID); err != nil { + if system.IsPoolNotFound(err) { + mod.log.Errorf("pool with uuid %q does not exist", req.Uuid) + resp.Status = int32(daos.Nonexistent) + } else { + mod.log.Errorf("failed to check pool uuid: %s", err) + resp.Status = int32(daos.MiscError) + } + return + } + + if err := mod.poolDB.RemovePoolService(ctx, poolUUID); err != nil { + mod.log.Errorf("failed to remove pool: %s", err) + resp.Status = int32(daos.MiscError) + return + } + + return +} + +func (mod *srvModule) handleCheckerReport(_ context.Context, reqb []byte) (out []byte, outErr error) { + req := new(srvpb.CheckReportReq) + if err := proto.Unmarshal(reqb, req); err != nil { + return nil, drpc.UnmarshalingPayloadFailure() + } + mod.log.Debugf("handling CheckerReport: %+v", req) + + resp := new(srvpb.CheckReportResp) + defer func() { + mod.log.Debugf("CheckerReport resp: %+v", resp) + out, outErr = proto.Marshal(resp) + }() + + if req.Report != nil && req.Report.PoolLabel == "" && req.Report.PoolUuid != "" { + poolUUID, err := uuid.Parse(req.Report.PoolUuid) + if err != nil { + mod.log.Errorf("invalid pool UUID %q: %s", req.Report.PoolUuid, err) + resp.Status = int32(daos.InvalidInput) + return + } + + if ps, err := mod.poolDB.FindPoolServiceByUUID(poolUUID); err == nil { + // Annotate the report with the pool label for the user. + // NB: In some cases this label may be incorrect, in which + // case the user will want to use the verbose or JSON output + // modes of the checker in order to get the UUID. + req.Report.PoolLabel = ps.PoolLabel + } + } + + finding := checker.AnnotateFinding(checker.NewFinding(req.Report)) + mod.log.Debugf("annotated finding: %+v", finding) + if err := mod.checkerDB.AddOrUpdateCheckerFinding(finding); err != nil { + mod.log.Errorf("failed to add checker finding %+v: %s", finding, err) + resp.Status = int32(daos.MiscError) + return + } + + return +} diff --git a/src/control/server/mgmt_drpc_checker_test.go b/src/control/server/mgmt_drpc_checker_test.go new file mode 100644 index 00000000000..e0e051a891a --- /dev/null +++ b/src/control/server/mgmt_drpc_checker_test.go @@ -0,0 +1,304 @@ +// +// (C) Copyright 2022 Intel Corporation. +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// + +package server + +import ( + "context" + "testing" + + "github.com/google/go-cmp/cmp" + uuid "github.com/google/uuid" + "google.golang.org/protobuf/proto" + "google.golang.org/protobuf/testing/protocmp" + + srvpb "github.com/daos-stack/daos/src/control/common/proto/srv" + "github.com/daos-stack/daos/src/control/common/test" + "github.com/daos-stack/daos/src/control/drpc" + "github.com/daos-stack/daos/src/control/lib/daos" + "github.com/daos-stack/daos/src/control/lib/ranklist" + "github.com/daos-stack/daos/src/control/logging" + "github.com/daos-stack/daos/src/control/system" + "github.com/daos-stack/daos/src/control/system/raft" +) + +func mockSrvModule(t *testing.T, log logging.Logger, ec int) *srvModule { + srv := &srvModule{ + log: log, + poolDB: raft.MockDatabase(t, log), + } + addEngineInstances(srv, ec, log) + + return srv +} + +func TestSrvModule_HandleCheckerListPools(t *testing.T) { + testPool := &system.PoolService{ + PoolUUID: uuid.New(), + PoolLabel: "test-pool", + Replicas: []ranklist.Rank{0, 1, 2}, + } + + for name, tc := range map[string]struct { + req []byte + notReplica bool + expResp *srvpb.CheckListPoolResp + expErr error + }{ + "bad payload": { + req: []byte{'b', 'a', 'd'}, + expErr: drpc.UnmarshalingPayloadFailure(), + }, + "not replica": { + notReplica: true, + expResp: &srvpb.CheckListPoolResp{Status: int32(daos.MiscError)}, + }, + "success": { + expResp: &srvpb.CheckListPoolResp{ + Pools: []*srvpb.CheckListPoolResp_OnePool{ + { + Uuid: testPool.PoolUUID.String(), + Label: testPool.PoolLabel, + Svcreps: ranklist.RanksToUint32(testPool.Replicas), + }, + }, + }, + }, + } { + t.Run(name, func(t *testing.T) { + log, buf := logging.NewTestLogger(t.Name()) + defer test.ShowBufferOnFailure(t, buf) + + parent, cancel := context.WithCancel(context.Background()) + defer cancel() + + mod := mockSrvModule(t, log, 1) + if tc.notReplica { + mod.poolDB = raft.MockDatabaseWithCfg(t, log, &raft.DatabaseConfig{}) + } else { + lock, ctx := getPoolLockCtx(t, parent, mod.poolDB, testPool.PoolUUID) + if err := mod.poolDB.AddPoolService(ctx, testPool); err != nil { + t.Fatal(err) + } + lock.Release() + } + + gotMsg, gotErr := mod.handleCheckerListPools(parent, tc.req) + test.CmpErr(t, tc.expErr, gotErr) + if tc.expErr != nil { + return + } + + gotResp := new(srvpb.CheckListPoolResp) + if err := proto.Unmarshal(gotMsg, gotResp); err != nil { + t.Fatal(err) + } + if diff := cmp.Diff(tc.expResp, gotResp, protocmp.Transform()); diff != "" { + t.Fatalf("unexpected response (-want +got):\n%s", diff) + } + }) + } +} + +func TestSrvModule_HandleCheckerRegisterPool(t *testing.T) { + existingPool := &system.PoolService{ + PoolUUID: uuid.New(), + PoolLabel: "test-pool", + Replicas: []ranklist.Rank{0, 1, 2}, + } + otherPool := &system.PoolService{ + PoolUUID: uuid.New(), + PoolLabel: "test-pool2", + Replicas: []ranklist.Rank{0, 1, 2}, + } + makeReqBytes := func(id, label string, replicas []ranklist.Rank) []byte { + req := &srvpb.CheckRegPoolReq{ + Uuid: id, + Label: label, + Svcreps: ranklist.RanksToUint32(replicas), + } + b, err := proto.Marshal(req) + if err != nil { + t.Fatal(err) + } + return b + } + newUUID := uuid.New().String() + + for name, tc := range map[string]struct { + req []byte + notReplica bool + expResp *srvpb.CheckRegPoolResp + expErr error + }{ + "bad payload": { + req: []byte{'b', 'a', 'd'}, + expErr: drpc.UnmarshalingPayloadFailure(), + }, + "bad uuid": { + req: makeReqBytes("ow", "new", []ranklist.Rank{0}), + expResp: &srvpb.CheckRegPoolResp{Status: int32(daos.InvalidInput)}, + }, + "bad label": { + req: makeReqBytes(newUUID, newUUID, []ranklist.Rank{0}), + expResp: &srvpb.CheckRegPoolResp{Status: int32(daos.InvalidInput)}, + }, + "empty label": { + req: makeReqBytes(newUUID, "", []ranklist.Rank{0}), + expResp: &srvpb.CheckRegPoolResp{Status: int32(daos.InvalidInput)}, + }, + "zero svcreps": { + req: makeReqBytes(newUUID, "new", []ranklist.Rank{}), + expResp: &srvpb.CheckRegPoolResp{Status: int32(daos.InvalidInput)}, + }, + "not replica on update": { + req: makeReqBytes(existingPool.PoolUUID.String(), "new-label", []ranklist.Rank{1}), + notReplica: true, + expResp: &srvpb.CheckRegPoolResp{Status: int32(daos.MiscError)}, + }, + "not replica on add": { + req: makeReqBytes(newUUID, "new", []ranklist.Rank{0}), + notReplica: true, + expResp: &srvpb.CheckRegPoolResp{Status: int32(daos.MiscError)}, + }, + "duplicate label on update": { + req: makeReqBytes(existingPool.PoolUUID.String(), otherPool.PoolLabel, []ranklist.Rank{0}), + expResp: &srvpb.CheckRegPoolResp{Status: int32(daos.Exists)}, + }, + "duplicate label on add": { + req: makeReqBytes(newUUID, existingPool.PoolLabel, []ranklist.Rank{0}), + expResp: &srvpb.CheckRegPoolResp{Status: int32(daos.Exists)}, + }, + "successful update": { + req: makeReqBytes(existingPool.PoolUUID.String(), "new-label", []ranklist.Rank{1}), + expResp: &srvpb.CheckRegPoolResp{}, + }, + "successful add": { + req: makeReqBytes(newUUID, "new", []ranklist.Rank{0}), + expResp: &srvpb.CheckRegPoolResp{}, + }, + } { + t.Run(name, func(t *testing.T) { + log, buf := logging.NewTestLogger(t.Name()) + defer test.ShowBufferOnFailure(t, buf) + + parent, cancel := context.WithCancel(context.Background()) + defer cancel() + + mod := mockSrvModule(t, log, 1) + if tc.notReplica { + mod.poolDB = raft.MockDatabaseWithCfg(t, log, &raft.DatabaseConfig{}) + } else { + lock, ctx := getPoolLockCtx(t, parent, mod.poolDB, existingPool.PoolUUID) + if err := mod.poolDB.AddPoolService(ctx, existingPool); err != nil { + t.Fatal(err) + } + lock.Release() + lock, ctx = getPoolLockCtx(t, parent, mod.poolDB, otherPool.PoolUUID) + if err := mod.poolDB.AddPoolService(ctx, otherPool); err != nil { + t.Fatal(err) + } + lock.Release() + } + + gotMsg, gotErr := mod.handleCheckerRegisterPool(parent, tc.req) + test.CmpErr(t, tc.expErr, gotErr) + if tc.expErr != nil { + return + } + + gotResp := new(srvpb.CheckRegPoolResp) + if err := proto.Unmarshal(gotMsg, gotResp); err != nil { + t.Fatal(err) + } + if diff := cmp.Diff(tc.expResp, gotResp, protocmp.Transform()); diff != "" { + t.Fatalf("unexpected response (-want +got):\n%s", diff) + } + }) + } +} + +func TestSrvModule_HandleCheckerDeregisterPool(t *testing.T) { + existingPool := &system.PoolService{ + PoolUUID: uuid.New(), + PoolLabel: "test-pool", + Replicas: []ranklist.Rank{0, 1, 2}, + } + makeReqBytes := func(id string) []byte { + req := &srvpb.CheckDeregPoolReq{ + Uuid: id, + } + b, err := proto.Marshal(req) + if err != nil { + t.Fatal(err) + } + return b + } + unkUUID := uuid.New().String() + + for name, tc := range map[string]struct { + req []byte + notReplica bool + expResp *srvpb.CheckDeregPoolResp + expErr error + }{ + "bad payload": { + req: []byte{'b', 'a', 'd'}, + expErr: drpc.UnmarshalingPayloadFailure(), + }, + "not replica": { + req: makeReqBytes(existingPool.PoolUUID.String()), + notReplica: true, + expResp: &srvpb.CheckDeregPoolResp{Status: int32(daos.MiscError)}, + }, + "bad uuid": { + req: makeReqBytes("ow"), + expResp: &srvpb.CheckDeregPoolResp{Status: int32(daos.InvalidInput)}, + }, + "unknown uuid": { + req: makeReqBytes(unkUUID), + expResp: &srvpb.CheckDeregPoolResp{Status: int32(daos.Nonexistent)}, + }, + "success": { + req: makeReqBytes(existingPool.PoolUUID.String()), + expResp: &srvpb.CheckDeregPoolResp{}, + }, + } { + t.Run(name, func(t *testing.T) { + log, buf := logging.NewTestLogger(t.Name()) + defer test.ShowBufferOnFailure(t, buf) + + parent, cancel := context.WithCancel(context.Background()) + defer cancel() + + mod := mockSrvModule(t, log, 1) + if tc.notReplica { + mod.poolDB = raft.MockDatabaseWithCfg(t, log, &raft.DatabaseConfig{}) + } else { + lock, ctx := getPoolLockCtx(t, parent, mod.poolDB, existingPool.PoolUUID) + if err := mod.poolDB.AddPoolService(ctx, existingPool); err != nil { + t.Fatal(err) + } + lock.Release() + } + + ctx := context.Background() + gotMsg, gotErr := mod.handleCheckerDeregisterPool(ctx, tc.req) + test.CmpErr(t, tc.expErr, gotErr) + if tc.expErr != nil { + return + } + + gotResp := new(srvpb.CheckDeregPoolResp) + if err := proto.Unmarshal(gotMsg, gotResp); err != nil { + t.Fatal(err) + } + if diff := cmp.Diff(tc.expResp, gotResp, protocmp.Transform()); diff != "" { + t.Fatalf("unexpected response (-want +got):\n%s", diff) + } + }) + } +} diff --git a/src/control/server/mgmt_drpc_test.go b/src/control/server/mgmt_drpc_test.go index cb8ec5c61d6..0c8f0808aa7 100644 --- a/src/control/server/mgmt_drpc_test.go +++ b/src/control/server/mgmt_drpc_test.go @@ -291,8 +291,8 @@ func TestSrvModule_handleGetPoolServiceRanks(t *testing.T) { db := raft.MockDatabase(t, log) mod := &srvModule{ - log: log, - sysdb: db, + log: log, + poolDB: db, } if tc.testPool != nil { lock, err := db.TakePoolLock(ctx, tc.testPool.PoolUUID) @@ -375,8 +375,8 @@ func TestSrvModule_handlePoolFindByLabel(t *testing.T) { db := raft.MockDatabase(t, log) mod := &srvModule{ - log: log, - sysdb: db, + log: log, + poolDB: db, } if tc.testPool != nil { lock, err := db.TakePoolLock(ctx, tc.testPool.PoolUUID) diff --git a/src/control/server/mgmt_fi.go b/src/control/server/mgmt_fi.go new file mode 100644 index 00000000000..8a32e0ca7eb --- /dev/null +++ b/src/control/server/mgmt_fi.go @@ -0,0 +1,189 @@ +// +// (C) Copyright 2022 Intel Corporation. +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// +//go:build fault_injection +// +build fault_injection + +package server + +import ( + "context" + + "github.com/pkg/errors" + "google.golang.org/protobuf/proto" + + chkpb "github.com/daos-stack/daos/src/control/common/proto/chk" + mgmtpb "github.com/daos-stack/daos/src/control/common/proto/mgmt" + "github.com/daos-stack/daos/src/control/drpc" + "github.com/daos-stack/daos/src/control/lib/daos" + "github.com/daos-stack/daos/src/control/lib/ranklist" + "github.com/daos-stack/daos/src/control/system" + "github.com/daos-stack/daos/src/control/system/checker" +) + +func (svc *mgmtSvc) FaultInjectReport(ctx context.Context, rpt *chkpb.CheckReport) (*mgmtpb.DaosResp, error) { + if err := svc.checkLeaderRequest(rpt); err != nil { + return nil, err + } + + cf := checker.NewFinding(rpt) + if err := svc.sysdb.AddCheckerFinding(cf); err != nil { + return nil, err + } + + return new(mgmtpb.DaosResp), nil +} + +func (svc *mgmtSvc) FaultInjectMgmtPoolFault(parent context.Context, fault *chkpb.Fault) (*mgmtpb.DaosResp, error) { + if err := svc.checkLeaderRequest(fault); err != nil { + return nil, err + } + + var poolID string + var newLabel string + switch len(fault.Strings) { + case 1: + poolID = fault.Strings[0] + case 2: + poolID = fault.Strings[0] + newLabel = fault.Strings[1] + default: + return nil, errors.New("no pool UUID provided") + } + + ps, err := svc.getPoolService(poolID) + if err != nil { + return nil, err + } + if newLabel == "" { + newLabel = ps.PoolLabel + "-fault" + } + + lock, err := svc.sysdb.TakePoolLock(parent, ps.PoolUUID) + if err != nil { + return nil, err + } + defer lock.Release() + ctx := lock.InContext(parent) + + var newRanks []ranklist.Rank + switch len(fault.Uints) { + case 0: + if len(ps.Replicas) == 0 { + newRanks = []ranklist.Rank{0, 3, 6, 9} + } else { + newRanks = []ranklist.Rank{ps.Replicas[0]} + } + default: + newRanks = ranklist.RanksFromUint32(fault.Uints) + } + + switch fault.Class { + case chkpb.CheckInconsistClass_CIC_POOL_BAD_LABEL: + ps.PoolLabel = newLabel + case chkpb.CheckInconsistClass_CIC_POOL_BAD_SVCL: + ps.Replicas = newRanks + case chkpb.CheckInconsistClass_CIC_POOL_NONEXIST_ON_MS: + if err := svc.sysdb.RemovePoolService(ctx, ps.PoolUUID); err != nil { + return nil, err + } + ps = nil + default: + return nil, errors.Errorf("unhandled fault class %q", fault.Class) + } + + if ps != nil { + if err := svc.sysdb.UpdatePoolService(ctx, ps); err != nil { + return nil, err + } + } + return new(mgmtpb.DaosResp), nil +} + +func (svc *mgmtSvc) FaultInjectPoolFault(parent context.Context, fault *chkpb.Fault) (*mgmtpb.DaosResp, error) { + if err := svc.checkLeaderRequest(fault); err != nil { + return nil, err + } + + var poolID string + var newLabel string + switch len(fault.Strings) { + case 1: + poolID = fault.Strings[0] + case 2: + poolID = fault.Strings[0] + newLabel = fault.Strings[1] + default: + return nil, errors.New("no pool UUID provided") + } + + ps, err := svc.getPoolService(poolID) + if err != nil { + return nil, err + } + if newLabel == "" { + newLabel = ps.PoolLabel + "-fault" + } + + lock, err := svc.sysdb.TakePoolLock(parent, ps.PoolUUID) + if err != nil { + return nil, err + } + defer lock.Release() + ctx := lock.InContext(parent) + + resp := new(mgmtpb.DaosResp) + switch fault.Class { + case chkpb.CheckInconsistClass_CIC_POOL_BAD_LABEL: + prop := &mgmtpb.PoolProperty{ + Number: uint32(daos.PoolPropertyLabel), + Value: &mgmtpb.PoolProperty_Strval{Strval: newLabel}, + } + req := &mgmtpb.PoolSetPropReq{ + Id: poolID, + Properties: []*mgmtpb.PoolProperty{prop}, + } + + var dresp *drpc.Response + dresp, err = svc.makePoolServiceCall(ctx, drpc.MethodPoolSetProp, req) + if err != nil { + return nil, err + } + + if err = proto.Unmarshal(dresp.Body, resp); err != nil { + return nil, errors.Wrap(err, "unmarshal PoolSetProp response") + } + + if resp.GetStatus() != 0 { + return nil, errors.Errorf("label update failed: %s", drpc.Status(resp.Status)) + } + case chkpb.CheckInconsistClass_CIC_POOL_NONEXIST_ON_ENGINE: + allRanks, err := svc.sysdb.MemberRanks(system.NonExcludedMemberFilter) + if err != nil { + return nil, err + } + req := &mgmtpb.PoolDestroyReq{ + Id: ps.PoolUUID.String(), + SvcRanks: ranklist.RanksToUint32(allRanks), + Force: true, + } + dresp, err := svc.harness.CallDrpc(ctx, drpc.MethodPoolDestroy, req) + if err != nil { + return nil, err + } + + if err = proto.Unmarshal(dresp.Body, resp); err != nil { + return nil, errors.Wrap(err, "unmarshal PoolDestroy response") + } + + if resp.GetStatus() != 0 { + return nil, errors.Errorf("pool destroy failed: %s", drpc.Status(resp.Status)) + } + default: + return nil, errors.Errorf("unhandled fault class %q", fault.Class) + } + + return resp, nil +} diff --git a/src/control/server/mgmt_pool.go b/src/control/server/mgmt_pool.go index a432bb040a7..5c4f3c88349 100644 --- a/src/control/server/mgmt_pool.go +++ b/src/control/server/mgmt_pool.go @@ -1190,7 +1190,7 @@ func (svc *mgmtSvc) PoolDeleteACL(ctx context.Context, req *mgmtpb.DeleteACLReq) // ListPools returns a set of all pools in the system. func (svc *mgmtSvc) ListPools(ctx context.Context, req *mgmtpb.ListPoolsReq) (*mgmtpb.ListPoolsResp, error) { - if err := svc.checkReplicaRequest(req); err != nil { + if err := svc.checkReplicaRequest(wrapCheckerReq(req)); err != nil { return nil, err } diff --git a/src/control/server/mgmt_pool_test.go b/src/control/server/mgmt_pool_test.go index 325184e57d7..d7d255cbf49 100644 --- a/src/control/server/mgmt_pool_test.go +++ b/src/control/server/mgmt_pool_test.go @@ -34,7 +34,7 @@ import ( "github.com/daos-stack/daos/src/control/system/raft" ) -func getPoolLockCtx(t *testing.T, parent context.Context, sysdb *raft.Database, poolUUID uuid.UUID) (*raft.PoolLock, context.Context) { +func getPoolLockCtx(t *testing.T, parent context.Context, sysdb poolDatabase, poolUUID uuid.UUID) (*raft.PoolLock, context.Context) { t.Helper() if parent == nil { diff --git a/src/control/server/mgmt_svc.go b/src/control/server/mgmt_svc.go index 97bb6736e44..35792c44249 100644 --- a/src/control/server/mgmt_svc.go +++ b/src/control/server/mgmt_svc.go @@ -127,7 +127,12 @@ func (svc *mgmtSvc) checkSystemRequest(req proto.Message) error { // checkLeaderRequest performs sanity-checking on a request that must // be run on the current MS leader. func (svc *mgmtSvc) checkLeaderRequest(req proto.Message) error { - if err := svc.checkSystemRequest(req); err != nil { + unwrapped, err := svc.unwrapCheckerReq(req) + if err != nil { + return err + } + + if err := svc.checkSystemRequest(unwrapped); err != nil { return err } return svc.sysdb.CheckLeader() @@ -136,7 +141,12 @@ func (svc *mgmtSvc) checkLeaderRequest(req proto.Message) error { // checkReplicaRequest performs sanity-checking on a request that must // be run on a MS replica. func (svc *mgmtSvc) checkReplicaRequest(req proto.Message) error { - if err := svc.checkSystemRequest(req); err != nil { + unwrapped, err := svc.unwrapCheckerReq(req) + if err != nil { + return err + } + + if err := svc.checkSystemRequest(unwrapped); err != nil { return err } return svc.sysdb.CheckReplica() diff --git a/src/control/server/mgmt_system.go b/src/control/server/mgmt_system.go index e53ba364fbd..1ab741438b2 100644 --- a/src/control/server/mgmt_system.go +++ b/src/control/server/mgmt_system.go @@ -36,6 +36,7 @@ import ( "github.com/daos-stack/daos/src/control/lib/ranklist" "github.com/daos-stack/daos/src/control/logging" "github.com/daos-stack/daos/src/control/system" + "github.com/daos-stack/daos/src/control/system/checker" "github.com/daos-stack/daos/src/control/system/raft" ) @@ -189,6 +190,7 @@ func (svc *mgmtSvc) join(ctx context.Context, req *mgmtpb.JoinReq, peerAddr *net SecondaryFabricContexts: req.SecondaryNctxs, FaultDomain: fd, Incarnation: req.Incarnation, + CheckMode: req.CheckMode, }) if err != nil { return nil, errors.Wrap(err, "failed to join system") @@ -203,8 +205,12 @@ func (svc *mgmtSvc) join(ctx context.Context, req *mgmtpb.JoinReq, peerAddr *net member.Rank, member.PrimaryFabricURI, member.SecondaryFabricURIs, joinResponse.PrevState, member.State) } + joinState := mgmtpb.JoinResp_IN + if svc.checkerIsEnabled() { + joinState = mgmtpb.JoinResp_CHECK + } resp := &mgmtpb.JoinResp{ - State: mgmtpb.JoinResp_IN, + State: joinState, Rank: member.Rank.Uint32(), MapVersion: joinResponse.MapVersion, } @@ -396,7 +402,7 @@ func (svc *mgmtSvc) doGroupUpdate(ctx context.Context, forced bool) error { // with listening port from joining instance's host addr contained in the // provided request. func (svc *mgmtSvc) Join(ctx context.Context, req *mgmtpb.JoinReq) (resp *mgmtpb.JoinResp, err error) { - if err := svc.checkLeaderRequest(req); err != nil { + if err := svc.checkLeaderRequest(wrapCheckerReq(req)); err != nil { return nil, err } @@ -417,6 +423,7 @@ type ( Ranks *ranklist.RankSet Force bool FullSystem bool + CheckMode bool } fanoutResponse struct { @@ -530,7 +537,9 @@ func (svc *mgmtSvc) rpcFanout(ctx context.Context, req *fanoutRequest, resp *fan } ranksReq := &control.RanksReq{ - Ranks: req.Ranks.String(), Force: req.Force, + Ranks: req.Ranks.String(), + Force: req.Force, + CheckMode: req.CheckMode, } funcName := func(i interface{}) string { @@ -601,7 +610,7 @@ func (svc *mgmtSvc) rpcFanout(ctx context.Context, req *fanoutRequest, resp *fan // same name in lib/control/system.go and returns results from all selected // ranks. func (svc *mgmtSvc) SystemQuery(ctx context.Context, req *mgmtpb.SystemQueryReq) (*mgmtpb.SystemQueryResp, error) { - if err := svc.checkReplicaRequest(req); err != nil { + if err := svc.checkReplicaRequest(wrapCheckerReq(req)); err != nil { return nil, err } @@ -725,7 +734,7 @@ func (svc *mgmtSvc) getFanout(req systemReq) (*fanoutRequest, *fanoutResponse, e // This control service method is triggered from the control API method of the // same name in lib/control/system.go and returns results from all selected ranks. func (svc *mgmtSvc) SystemStop(ctx context.Context, req *mgmtpb.SystemStopReq) (*mgmtpb.SystemStopResp, error) { - if err := svc.checkLeaderRequest(req); err != nil { + if err := svc.checkLeaderRequest(wrapCheckerReq(req)); err != nil { return nil, err } svc.log.Debug("Received SystemStop RPC") @@ -792,6 +801,41 @@ func processStartResp(fr *fanoutResponse, publisher events.Publisher) (*mgmtpb.S return sr, nil } +func (svc *mgmtSvc) checkMemberStates(requiredStates ...system.MemberState) error { + var stateMask system.MemberState + for _, state := range requiredStates { + stateMask |= state + } + + allMembers, err := svc.sysdb.AllMembers() + if err != nil { + return err + } + invalidMembers := &ranklist.RankSet{} + + svc.log.Tracef("checking %d members", len(allMembers)) + for _, m := range allMembers { + svc.log.Tracef("member %d: %s", m.Rank.Uint32(), m.State) + if m.State&stateMask == 0 { + invalidMembers.Add(m.Rank) + } + } + + stopRequired := false + if stateMask&system.MemberStateStopped != 0 { + stopRequired = true + } + if invalidMembers.Count() > 0 { + states := make([]string, len(requiredStates)) + for i, state := range requiredStates { + states[i] = state.String() + } + return checker.FaultIncorrectMemberStates(stopRequired, invalidMembers.String(), strings.Join(states, "|")) + } + + return nil +} + // SystemStart implements the method defined for the Management Service. // // Initiate controlled start of DAOS system instances (system members) @@ -801,7 +845,7 @@ func processStartResp(fr *fanoutResponse, publisher events.Publisher) (*mgmtpb.S // This control service method is triggered from the control API method of the // same name in lib/control/system.go and returns results from all selected ranks. func (svc *mgmtSvc) SystemStart(ctx context.Context, req *mgmtpb.SystemStartReq) (*mgmtpb.SystemStartResp, error) { - if err := svc.checkLeaderRequest(req); err != nil { + if err := svc.checkLeaderRequest(wrapCheckerReq(req)); err != nil { return nil, err } svc.log.Debug("Received SystemStart RPC") @@ -811,6 +855,7 @@ func (svc *mgmtSvc) SystemStart(ctx context.Context, req *mgmtpb.SystemStartReq) return nil, err } + fReq.CheckMode = req.CheckMode fReq.Method = control.StartRanks fResp, _, err = svc.rpcFanout(ctx, fReq, fResp, true) if err != nil { @@ -827,7 +872,7 @@ func (svc *mgmtSvc) SystemStart(ctx context.Context, req *mgmtpb.SystemStartReq) // SystemExclude marks the specified ranks as administratively excluded from the system. func (svc *mgmtSvc) SystemExclude(ctx context.Context, req *mgmtpb.SystemExcludeReq) (*mgmtpb.SystemExcludeResp, error) { - if err := svc.checkLeaderRequest(req); err != nil { + if err := svc.checkLeaderRequest(wrapCheckerReq(req)); err != nil { return nil, err } @@ -877,7 +922,7 @@ func (svc *mgmtSvc) SystemExclude(ctx context.Context, req *mgmtpb.SystemExclude // from control-plane instances attempting to notify the MS of a cluster event // in the DAOS system (this handler should only get called on the MS leader). func (svc *mgmtSvc) ClusterEvent(ctx context.Context, req *sharedpb.ClusterEventReq) (*sharedpb.ClusterEventResp, error) { - if err := svc.checkLeaderRequest(req); err != nil { + if err := svc.checkLeaderRequest(wrapCheckerReq(req)); err != nil { return nil, err } @@ -1169,7 +1214,7 @@ func (svc *mgmtSvc) updatePoolPropsWithSysProps(ctx context.Context, systemPrope // SystemGetProp gets user-visible system properties. func (svc *mgmtSvc) SystemGetProp(ctx context.Context, req *mgmtpb.SystemGetPropReq) (resp *mgmtpb.SystemGetPropResp, err error) { - if err := svc.checkReplicaRequest(req); err != nil { + if err := svc.checkReplicaRequest(wrapCheckerReq(req)); err != nil { return nil, err } diff --git a/src/control/system/checker/checker.go b/src/control/system/checker/checker.go new file mode 100644 index 00000000000..eacce708d77 --- /dev/null +++ b/src/control/system/checker/checker.go @@ -0,0 +1,18 @@ +// +// (C) Copyright 2022 Intel Corporation. +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// + +package checker + +type ( + FindingStore interface { + AddCheckerFinding(finding *Finding) error + UpdateCheckerFinding(finding *Finding) error + AddOrUpdateCheckerFinding(finding *Finding) error + GetCheckerFindings(seqs ...uint64) ([]*Finding, error) + GetCheckerFinding(seq uint64) (*Finding, error) + SetCheckerFindingAction(seq uint64, action int32) error + } +) diff --git a/src/control/system/checker/errors.go b/src/control/system/checker/errors.go new file mode 100644 index 00000000000..6d23148c5ee --- /dev/null +++ b/src/control/system/checker/errors.go @@ -0,0 +1,18 @@ +// +// (C) Copyright 2022 Intel Corporation. +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// + +package checker + +import "github.com/pkg/errors" + +var ( + ErrNoMorePasses = errors.New("no more passes") + ErrPassFindings = errors.New("pass has findings that must be addressed") +) + +func IsPassFindings(err error) bool { + return errors.Cause(err) == ErrPassFindings +} diff --git a/src/control/system/checker/faults.go b/src/control/system/checker/faults.go new file mode 100644 index 00000000000..7f58df9ae5e --- /dev/null +++ b/src/control/system/checker/faults.go @@ -0,0 +1,52 @@ +// +// (C) Copyright 2022 Intel Corporation. +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// + +package checker + +import ( + "fmt" + + "github.com/daos-stack/daos/src/control/fault" + "github.com/daos-stack/daos/src/control/fault/code" +) + +func IsIncorrectMemberStates(err error) bool { + return fault.IsFaultCode(err, code.SystemCheckerInvalidMemberStates) +} + +var ( + FaultCheckerNotEnabled = checkerFault( + code.SystemCheckerNotEnabled, + "system checker is not enabled", + "enable the system checker and try again", + ) + FaultCheckerEnabled = checkerFault( + code.SystemCheckerEnabled, + "system checker is enabled; normal operations are disabled", + "disable the system checker to enable normal operations", + ) +) + +func FaultIncorrectMemberStates(stopRequired bool, members, expectedStates string) *fault.Fault { + remedy := "enable checker mode" + if stopRequired { + remedy = "stop system before enabling checker mode" + } + return checkerFault( + code.SystemCheckerInvalidMemberStates, + "members not in expected states ("+expectedStates+"): "+members, + fmt.Sprintf("%s and/or administratively exclude members as appropriate", remedy), + ) +} + +func checkerFault(code code.Code, desc, res string) *fault.Fault { + return &fault.Fault{ + Domain: "checker", + Code: code, + Description: desc, + Resolution: res, + } +} diff --git a/src/control/system/checker/finding.go b/src/control/system/checker/finding.go new file mode 100644 index 00000000000..770dfcd7132 --- /dev/null +++ b/src/control/system/checker/finding.go @@ -0,0 +1,210 @@ +// +// (C) Copyright 2022 Intel Corporation. +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// + +package checker + +import ( + "fmt" + "strconv" + "strings" + + chkpb "github.com/daos-stack/daos/src/control/common/proto/chk" + "google.golang.org/protobuf/proto" + "google.golang.org/protobuf/reflect/protoreflect" +) + +type ( + Finding struct { + chkpb.CheckReport + } + + reportObject uint +) + +const ( + unkObj reportObject = iota + poolObj + contObj + engObj + otherObj +) + +func (ro reportObject) String() string { + return map[reportObject]string{ + unkObj: "unknown", + poolObj: "pool", + contObj: "container", + engObj: "target", + otherObj: "other", + }[ro] +} + +func (f *Finding) HasChoice(action chkpb.CheckInconsistAction) bool { + for _, a := range f.ActChoices { + if a == action { + return true + } + } + return false +} + +func (f *Finding) ValidChoicesString() string { + if len(f.ActChoices) == 0 { + return "no valid action choices (already repaired?)" + } + + var actions []string + for _, a := range f.ActChoices { + actions = append(actions, strconv.Itoa(int(a))) + } + return strings.Join(actions, ",") +} + +func NewFinding(report *chkpb.CheckReport) *Finding { + if report == nil { + return nil + } + + f := new(Finding) + proto.Merge(&f.CheckReport, report) + return f +} + +// descAction attempts to generate a human-readable description of the +// action that may be taken for the given finding. +func descAction(class chkpb.CheckInconsistClass, action chkpb.CheckInconsistAction, details ...string) string { + var ro reportObject + switch { + case class >= chkpb.CheckInconsistClass_CIC_POOL_LESS_SVC_WITH_QUORUM && class <= chkpb.CheckInconsistClass_CIC_POOL_BAD_LABEL: + ro = poolObj + case class >= chkpb.CheckInconsistClass_CIC_CONT_NONEXIST_ON_PS && class <= chkpb.CheckInconsistClass_CIC_CONT_BAD_LABEL: + ro = contObj + case class >= chkpb.CheckInconsistClass_CIC_ENGINE_NONEXIST_IN_MAP && class <= chkpb.CheckInconsistClass_CIC_ENGINE_HAS_NO_STORAGE: + ro = engObj + default: + ro = otherObj + } + + // Create a map of details. If a detail is not found by + // the expected index, then the default is an empty string. + detMap := make(map[int]string) + for i, det := range details { + detMap[i] = det + } + + switch action { + case chkpb.CheckInconsistAction_CIA_IGNORE: + return fmt.Sprintf("Ignore the %s finding", ro) + case chkpb.CheckInconsistAction_CIA_DISCARD: + return fmt.Sprintf("Discard the %s", ro) + case chkpb.CheckInconsistAction_CIA_READD: + return fmt.Sprintf("Re-add the %s", ro) + case chkpb.CheckInconsistAction_CIA_TRUST_MS: + switch ro { + case poolObj: + switch class { + case chkpb.CheckInconsistClass_CIC_POOL_NONEXIST_ON_MS: + return fmt.Sprintf("Reclaim the orphaned pool storage for %s", detMap[0]) + case chkpb.CheckInconsistClass_CIC_POOL_BAD_LABEL: + return fmt.Sprintf("Reset the pool property using the MS label for %s", detMap[0]) + } + return fmt.Sprintf("Trust the MS pool entry for %s", detMap[0]) + } + return fmt.Sprintf("Trust the MS %s entry", ro) + case chkpb.CheckInconsistAction_CIA_TRUST_PS: + switch ro { + case poolObj: + switch class { + case chkpb.CheckInconsistClass_CIC_POOL_NONEXIST_ON_MS: + return fmt.Sprintf("Recreate the MS pool entry for %s", detMap[0]) + case chkpb.CheckInconsistClass_CIC_POOL_NONEXIST_ON_ENGINE: + return fmt.Sprintf("Remove the MS pool entry for %s", detMap[0]) + case chkpb.CheckInconsistClass_CIC_POOL_BAD_LABEL: + return fmt.Sprintf("Update the MS label to use the pool property value for %s", detMap[0]) + } + case contObj: + switch class { + case chkpb.CheckInconsistClass_CIC_CONT_BAD_LABEL: + return fmt.Sprintf("Reset the container property using the PS label for %s", detMap[1]) + } + } + return fmt.Sprintf("Trust the PS %s entry", ro) + case chkpb.CheckInconsistAction_CIA_TRUST_TARGET: + switch ro { + case contObj: + switch class { + case chkpb.CheckInconsistClass_CIC_CONT_BAD_LABEL: + return fmt.Sprintf("Update the CS label to use the container property value for %s", detMap[1]) + } + } + return fmt.Sprintf("Trust the %s result", ro) + case chkpb.CheckInconsistAction_CIA_TRUST_MAJORITY: + return fmt.Sprintf("Trust the majority of the %s results", ro) + case chkpb.CheckInconsistAction_CIA_TRUST_LATEST: + return fmt.Sprintf("Trust the latest %s result", ro) + case chkpb.CheckInconsistAction_CIA_TRUST_OLDEST: + return fmt.Sprintf("Trust the oldest %s result", ro) + case chkpb.CheckInconsistAction_CIA_TRUST_EC_PARITY: + return fmt.Sprintf("Trust the parity of the %s results", ro) + case chkpb.CheckInconsistAction_CIA_TRUST_EC_DATA: + return fmt.Sprintf("Trust the data of the %s results", ro) + default: + return fmt.Sprintf("%s: %s (details: %+v)", ro, action, details) + } +} + +// Trim leading/trailing whitespace from all string fields in the +// checker report. +func trimProtoSpaces(pm proto.Message) { + pr := pm.ProtoReflect() + pr.Range(func(fd protoreflect.FieldDescriptor, v protoreflect.Value) bool { + if fd.Kind() == protoreflect.StringKind { + if fd.IsList() { + for i := 0; i < v.List().Len(); i++ { + v.List().Set(i, protoreflect.ValueOf(strings.TrimSpace(v.List().Get(i).String()))) + } + } else { + pr.Set(fd, protoreflect.ValueOf(strings.TrimSpace(v.String()))) + } + } + return true + }) +} + +func AnnotateFinding(f *Finding) *Finding { + if f == nil { + return nil + } + + trimProtoSpaces(f) + + // Pad out the list of details as necessary to match + // the length of the action list. + if len(f.ActChoices) > 0 && len(f.ActDetails) != len(f.ActChoices) { + for i := len(f.ActDetails); i < len(f.ActChoices); i++ { + f.ActDetails = append(f.ActDetails, "") + } + } + + // If the report does not specify a list of informative messages to + // accompany the list of actions, then create one. + if len(f.ActMsgs) == 0 { + if len(f.ActChoices) > 0 { + f.ActMsgs = make([]string, len(f.ActChoices)) + for i, act := range f.ActChoices { + f.ActMsgs[i] = descAction(f.Class, act, append([]string{f.PoolUuid, f.ContUuid}, f.ActDetails...)...) + } + } else { + f.ActMsgs = make([]string, 1) + f.ActMsgs[0] = descAction(f.Class, f.Action, append([]string{f.PoolUuid, f.ContUuid}, f.ActDetails...)...) + } + } + if len(f.Msg) == 0 { + f.Msg = fmt.Sprintf("Inconsistency found: %s (details: %+v)", f.Class, f.ActDetails) + } + + return f +} diff --git a/src/control/system/checker/finding_test.go b/src/control/system/checker/finding_test.go new file mode 100644 index 00000000000..3517ce33315 --- /dev/null +++ b/src/control/system/checker/finding_test.go @@ -0,0 +1,455 @@ +// +// (C) Copyright 2022 Intel Corporation. +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// + +package checker_test + +import ( + "fmt" + "testing" + + "github.com/google/go-cmp/cmp" + "google.golang.org/protobuf/testing/protocmp" + + chkpb "github.com/daos-stack/daos/src/control/common/proto/chk" + "github.com/daos-stack/daos/src/control/common/test" + "github.com/daos-stack/daos/src/control/system/checker" +) + +func TestChecker_AnnotateFinding(t *testing.T) { + for name, tc := range map[string]struct { + rpt *chkpb.CheckReport + expFinding *checker.Finding + }{ + "nil report": {}, + // Ideally each report should have a list of details to match the + // list of actions. However, the list of details is not required + // and we want to verify that the code handles this correctly. + "pad details to match actions": { + rpt: &chkpb.CheckReport{ + Seq: 972946141031694340, + Class: chkpb.CheckInconsistClass_CIC_POOL_BAD_LABEL, + Action: chkpb.CheckInconsistAction_CIA_INTERACT, + PoolUuid: test.MockUUID(), + ActChoices: []chkpb.CheckInconsistAction{ + chkpb.CheckInconsistAction_CIA_TRUST_MS, + chkpb.CheckInconsistAction_CIA_TRUST_PS, + chkpb.CheckInconsistAction_CIA_IGNORE, + }, + ActDetails: []string{"ms-label", "ps-label"}, + }, + expFinding: checker.NewFinding( + &chkpb.CheckReport{ + Seq: 972946141031694340, + Class: chkpb.CheckInconsistClass_CIC_POOL_BAD_LABEL, + Action: chkpb.CheckInconsistAction_CIA_INTERACT, + PoolUuid: test.MockUUID(), + ActChoices: []chkpb.CheckInconsistAction{ + chkpb.CheckInconsistAction_CIA_TRUST_MS, + chkpb.CheckInconsistAction_CIA_TRUST_PS, + chkpb.CheckInconsistAction_CIA_IGNORE, + }, + ActDetails: []string{"ms-label", "ps-label", ""}, + Msg: "Inconsistency found: CIC_POOL_BAD_LABEL (details: [ms-label ps-label ])", + ActMsgs: []string{ + fmt.Sprintf("Reset the pool property using the MS label for %s", test.MockUUID()), + fmt.Sprintf("Update the MS label to use the pool property value for %s", test.MockUUID()), + "Ignore the pool finding", + }, + }), + }, + "action report: orphaned pool restored to MS": { + rpt: &chkpb.CheckReport{ + Seq: 972946141031694340, + Class: chkpb.CheckInconsistClass_CIC_POOL_NONEXIST_ON_MS, + Action: chkpb.CheckInconsistAction_CIA_TRUST_PS, + PoolUuid: "28b265f6-a70b-412f-9559-8b6df06b7f7f", + Msg: "Check leader detects orphan pool\n", + Timestamp: "Mon Dec 5 19:25:39 2022\n", + }, + expFinding: checker.NewFinding( + &chkpb.CheckReport{ + Seq: 972946141031694340, + Class: chkpb.CheckInconsistClass_CIC_POOL_NONEXIST_ON_MS, + Action: chkpb.CheckInconsistAction_CIA_TRUST_PS, + PoolUuid: "28b265f6-a70b-412f-9559-8b6df06b7f7f", + Msg: "Check leader detects orphan pool", + Timestamp: "Mon Dec 5 19:25:39 2022", + ActMsgs: []string{ + "Recreate the MS pool entry for 28b265f6-a70b-412f-9559-8b6df06b7f7f", + }, + }), + }, + "action report: orphaned pool removed on engines": { + rpt: &chkpb.CheckReport{ + Seq: 972946141031694340, + Class: chkpb.CheckInconsistClass_CIC_POOL_NONEXIST_ON_MS, + Action: chkpb.CheckInconsistAction_CIA_TRUST_MS, + PoolUuid: "28b265f6-a70b-412f-9559-8b6df06b7f7f", + Msg: "Check leader detects orphan pool\n", + Timestamp: "Mon Dec 5 19:25:39 2022\n", + }, + expFinding: checker.NewFinding( + &chkpb.CheckReport{ + Seq: 972946141031694340, + Class: chkpb.CheckInconsistClass_CIC_POOL_NONEXIST_ON_MS, + Action: chkpb.CheckInconsistAction_CIA_TRUST_MS, + PoolUuid: "28b265f6-a70b-412f-9559-8b6df06b7f7f", + Msg: "Check leader detects orphan pool", + Timestamp: "Mon Dec 5 19:25:39 2022", + ActMsgs: []string{ + "Reclaim the orphaned pool storage for 28b265f6-a70b-412f-9559-8b6df06b7f7f", + }, + }), + }, + "action report: dangling pool removed from MS": { + rpt: &chkpb.CheckReport{ + Seq: 972946141031694340, + Class: chkpb.CheckInconsistClass_CIC_POOL_NONEXIST_ON_ENGINE, + Action: chkpb.CheckInconsistAction_CIA_TRUST_PS, + PoolUuid: "40da737d-e47f-0000-ffff-ffff00000000", + Timestamp: "Mon Dec 5 19:25:39 2022\n", + Msg: "Check leader detects dangling pool\n", + }, + expFinding: checker.NewFinding( + &chkpb.CheckReport{ + Seq: 972946141031694340, + Class: chkpb.CheckInconsistClass_CIC_POOL_NONEXIST_ON_ENGINE, + Action: chkpb.CheckInconsistAction_CIA_TRUST_PS, + PoolUuid: "40da737d-e47f-0000-ffff-ffff00000000", + Timestamp: "Mon Dec 5 19:25:39 2022", + Msg: "Check leader detects dangling pool", + ActMsgs: []string{ + "Remove the MS pool entry for 40da737d-e47f-0000-ffff-ffff00000000", + }, + }), + }, + "action report: corrupt pool label (fixed on PS)": { + rpt: &chkpb.CheckReport{ + Seq: 972946141031694340, + Class: chkpb.CheckInconsistClass_CIC_POOL_BAD_LABEL, + Action: chkpb.CheckInconsistAction_CIA_TRUST_MS, + PoolUuid: "6a8bbf20-fb86-416d-8045-c23fbce5048a", + Timestamp: "Mon Dec 5 19:25:52 2022\n", + Msg: "Check engine detects corrupted pool label: one-fault (MS) vs one (PS).\n", + }, + expFinding: checker.NewFinding( + &chkpb.CheckReport{ + Seq: 972946141031694340, + Class: chkpb.CheckInconsistClass_CIC_POOL_BAD_LABEL, + Action: chkpb.CheckInconsistAction_CIA_TRUST_MS, + PoolUuid: "6a8bbf20-fb86-416d-8045-c23fbce5048a", + Timestamp: "Mon Dec 5 19:25:52 2022", + Msg: "Check engine detects corrupted pool label: one-fault (MS) vs one (PS).", + ActMsgs: []string{ + "Reset the pool property using the MS label for 6a8bbf20-fb86-416d-8045-c23fbce5048a", + }, + }), + }, + "action report: corrupt pool label (fixed on MS)": { + rpt: &chkpb.CheckReport{ + Seq: 972946141031694340, + Class: chkpb.CheckInconsistClass_CIC_POOL_BAD_LABEL, + Action: chkpb.CheckInconsistAction_CIA_TRUST_PS, + PoolUuid: "6a8bbf20-fb86-416d-8045-c23fbce5048a", + Timestamp: "Mon Dec 5 19:25:52 2022\n", + Msg: "Check engine detects corrupted pool label: one-fault (MS) vs one (PS).\n", + }, + expFinding: checker.NewFinding( + &chkpb.CheckReport{ + Seq: 972946141031694340, + Class: chkpb.CheckInconsistClass_CIC_POOL_BAD_LABEL, + Action: chkpb.CheckInconsistAction_CIA_TRUST_PS, + PoolUuid: "6a8bbf20-fb86-416d-8045-c23fbce5048a", + Timestamp: "Mon Dec 5 19:25:52 2022", + Msg: "Check engine detects corrupted pool label: one-fault (MS) vs one (PS).", + ActMsgs: []string{ + "Update the MS label to use the pool property value for 6a8bbf20-fb86-416d-8045-c23fbce5048a", + }, + }), + }, + "action report: container non-exist on PS (discard)": { + rpt: &chkpb.CheckReport{ + Seq: 972946141031694340, + Class: chkpb.CheckInconsistClass_CIC_CONT_NONEXIST_ON_PS, + Action: chkpb.CheckInconsistAction_CIA_DISCARD, + Rank: 1, + PoolUuid: "04077e64-085c-489f-bd6c-bc895710414f", + ContUuid: "44b03d6a-b7e0-431b-818b-24c6cf331181", + Timestamp: "Mon Dec 5 19:25:48 2022\n", + Msg: "Check engine detects orphan container 04077e64-085c-489f-bd6c-bc895710414f/44b03d6a-b7e0-431b-818b-24c6cf331181\n", + }, + expFinding: checker.NewFinding( + &chkpb.CheckReport{ + Seq: 972946141031694340, + Class: chkpb.CheckInconsistClass_CIC_CONT_NONEXIST_ON_PS, + Action: chkpb.CheckInconsistAction_CIA_DISCARD, + Rank: 1, + PoolUuid: "04077e64-085c-489f-bd6c-bc895710414f", + ContUuid: "44b03d6a-b7e0-431b-818b-24c6cf331181", + Timestamp: "Mon Dec 5 19:25:48 2022", + Msg: "Check engine detects orphan container 04077e64-085c-489f-bd6c-bc895710414f/44b03d6a-b7e0-431b-818b-24c6cf331181", + ActMsgs: []string{ + "Discard the container", + }, + }), + }, + "action report: container label mismatch (fixed on targets)": { + rpt: &chkpb.CheckReport{ + Seq: 972946141031694341, + Class: chkpb.CheckInconsistClass_CIC_CONT_BAD_LABEL, + Action: chkpb.CheckInconsistAction_CIA_TRUST_PS, + Rank: 1, + PoolUuid: "d48a9aa7-4341-446a-8125-bb7eab3781b3", + ContUuid: "00c841c8-4e01-4001-bd58-7de5ad602926", + Timestamp: "Mon Dec 5 19:25:49 2022\n", + Msg: "Check engine detects inconsistent container label: new-label (CS) vs six-cont (property).\n", + }, + expFinding: checker.NewFinding( + &chkpb.CheckReport{ + Seq: 972946141031694341, + Class: chkpb.CheckInconsistClass_CIC_CONT_BAD_LABEL, + Action: chkpb.CheckInconsistAction_CIA_TRUST_PS, + Rank: 1, + PoolUuid: "d48a9aa7-4341-446a-8125-bb7eab3781b3", + ContUuid: "00c841c8-4e01-4001-bd58-7de5ad602926", + Timestamp: "Mon Dec 5 19:25:49 2022", + Msg: "Check engine detects inconsistent container label: new-label (CS) vs six-cont (property).", + ActMsgs: []string{ + "Reset the container property using the PS label for 00c841c8-4e01-4001-bd58-7de5ad602926", + }, + }), + }, + "action report: container label mismatch (fixed on PS)": { + rpt: &chkpb.CheckReport{ + Seq: 972946141031694341, + Class: chkpb.CheckInconsistClass_CIC_CONT_BAD_LABEL, + Action: chkpb.CheckInconsistAction_CIA_TRUST_TARGET, + Rank: 1, + PoolUuid: "d48a9aa7-4341-446a-8125-bb7eab3781b3", + ContUuid: "00c841c8-4e01-4001-bd58-7de5ad602926", + Timestamp: "Mon Dec 5 19:25:49 2022\n", + Msg: "Check engine detects inconsistent container label: new-label (CS) vs six-cont (property).\n", + }, + expFinding: checker.NewFinding( + &chkpb.CheckReport{ + Seq: 972946141031694341, + Class: chkpb.CheckInconsistClass_CIC_CONT_BAD_LABEL, + Action: chkpb.CheckInconsistAction_CIA_TRUST_TARGET, + Rank: 1, + PoolUuid: "d48a9aa7-4341-446a-8125-bb7eab3781b3", + ContUuid: "00c841c8-4e01-4001-bd58-7de5ad602926", + Timestamp: "Mon Dec 5 19:25:49 2022", + Msg: "Check engine detects inconsistent container label: new-label (CS) vs six-cont (property).", + ActMsgs: []string{ + "Update the CS label to use the container property value for 00c841c8-4e01-4001-bd58-7de5ad602926", + }, + }), + }, + "interactive finding: orphaned MS entry for pool": { + rpt: &chkpb.CheckReport{ + Seq: 973024752426024961, + Class: chkpb.CheckInconsistClass_CIC_POOL_NONEXIST_ON_ENGINE, + Action: chkpb.CheckInconsistAction_CIA_INTERACT, + PoolUuid: "e0f1371c-087f-0000-ffff-ffff00000000", + Timestamp: "Mon Dec 5 20:47:31 2022\n", + Msg: "Check leader detects dangling pool.\n", + ActChoices: []chkpb.CheckInconsistAction{ + chkpb.CheckInconsistAction_CIA_DISCARD, + chkpb.CheckInconsistAction_CIA_IGNORE, + }, + ActDetails: []string{ + "Discard the dangling pool entry from MS [suggested].", + "Keep the dangling pool entry on MS, repair nothing.", + }, + ActMsgs: []string{ + "Discard the unrecognized element: pool service, pool itself, container, and so on.", + "Ignore but log the inconsistency.", + }, + }, + expFinding: checker.NewFinding( + &chkpb.CheckReport{ + Seq: 973024752426024961, + Class: chkpb.CheckInconsistClass_CIC_POOL_NONEXIST_ON_ENGINE, + Action: chkpb.CheckInconsistAction_CIA_INTERACT, + PoolUuid: "e0f1371c-087f-0000-ffff-ffff00000000", + Timestamp: "Mon Dec 5 20:47:31 2022", + Msg: "Check leader detects dangling pool.", + ActChoices: []chkpb.CheckInconsistAction{ + chkpb.CheckInconsistAction_CIA_DISCARD, + chkpb.CheckInconsistAction_CIA_IGNORE, + }, + ActDetails: []string{ + "Discard the dangling pool entry from MS [suggested].", + "Keep the dangling pool entry on MS, repair nothing.", + }, + ActMsgs: []string{ + "Discard the unrecognized element: pool service, pool itself, container, and so on.", + "Ignore but log the inconsistency.", + }, + }), + }, + "interactive finding: pool label mismatch": { + rpt: &chkpb.CheckReport{ + Seq: 973024752426024962, + Class: chkpb.CheckInconsistClass_CIC_POOL_BAD_LABEL, + Action: chkpb.CheckInconsistAction_CIA_INTERACT, + PoolUuid: "dfbb7b07-061f-46b2-a11c-935af3fb7169", + Timestamp: "Mon Dec 5 20:47:31 2022\n", + Msg: "Check leader detects corrupted pool label: one-fault (MS) vs one (PS).\n", + ActChoices: []chkpb.CheckInconsistAction{ + chkpb.CheckInconsistAction_CIA_TRUST_MS, + chkpb.CheckInconsistAction_CIA_TRUST_PS, + chkpb.CheckInconsistAction_CIA_IGNORE, + }, + ActDetails: []string{ + "Inconsistent pool label: one-fault (MS) vs one (PS), Trust MS pool label [suggested]", + "Trust PS pool label.", + "Keep the inconsistent pool label, repair nothing.", + }, + ActMsgs: []string{ + "Trust the information recorded in MS DB.", + "Trust the information recorded in PS DB.", + "Ignore but log the inconsistency.", + }, + }, + expFinding: checker.NewFinding( + &chkpb.CheckReport{ + Seq: 973024752426024962, + Class: chkpb.CheckInconsistClass_CIC_POOL_BAD_LABEL, + Action: chkpb.CheckInconsistAction_CIA_INTERACT, + PoolUuid: "dfbb7b07-061f-46b2-a11c-935af3fb7169", + Timestamp: "Mon Dec 5 20:47:31 2022", + Msg: "Check leader detects corrupted pool label: one-fault (MS) vs one (PS).", + ActChoices: []chkpb.CheckInconsistAction{ + chkpb.CheckInconsistAction_CIA_TRUST_MS, + chkpb.CheckInconsistAction_CIA_TRUST_PS, + chkpb.CheckInconsistAction_CIA_IGNORE, + }, + ActDetails: []string{ + "Inconsistent pool label: one-fault (MS) vs one (PS), Trust MS pool label [suggested]", + "Trust PS pool label.", + "Keep the inconsistent pool label, repair nothing.", + }, + ActMsgs: []string{ + "Trust the information recorded in MS DB.", + "Trust the information recorded in PS DB.", + "Ignore but log the inconsistency.", + }, + }), + }, + "interactive finding: orphaned container storage": { + rpt: &chkpb.CheckReport{ + Seq: 973024752426024964, + Class: chkpb.CheckInconsistClass_CIC_CONT_NONEXIST_ON_PS, + Action: chkpb.CheckInconsistAction_CIA_INTERACT, + Rank: 1, + PoolUuid: "06a6ecff-5252-4ce3-a7e4-c7d875fb7495", + ContUuid: "fda598ea-6794-49ea-97af-ac787c2585a9", + Timestamp: "Mon Dec 5 20:47:42 2022", + Msg: "Check engine detects orphan container 06a6ecff-5252-4ce3-a7e4-c7d875fb7495/fda598ea-6794-49ea-97af-ac787c2585a9", + ActChoices: []chkpb.CheckInconsistAction{ + chkpb.CheckInconsistAction_CIA_DISCARD, + chkpb.CheckInconsistAction_CIA_IGNORE, + }, + ActDetails: []string{ + "Destroy the orphan container to release space [suggested].", + "Keep the orphan container on engines, repair nothing.", + }, + ActMsgs: []string{ + "Discard the unrecognized element: pool service, pool itself, container, and so on.", + "Ignore but log the inconsistency.", + }, + }, + expFinding: checker.NewFinding( + &chkpb.CheckReport{ + Seq: 973024752426024964, + Class: chkpb.CheckInconsistClass_CIC_CONT_NONEXIST_ON_PS, + Action: chkpb.CheckInconsistAction_CIA_INTERACT, + Rank: 1, + PoolUuid: "06a6ecff-5252-4ce3-a7e4-c7d875fb7495", + ContUuid: "fda598ea-6794-49ea-97af-ac787c2585a9", + Timestamp: "Mon Dec 5 20:47:42 2022", + Msg: "Check engine detects orphan container 06a6ecff-5252-4ce3-a7e4-c7d875fb7495/fda598ea-6794-49ea-97af-ac787c2585a9", + ActChoices: []chkpb.CheckInconsistAction{ + chkpb.CheckInconsistAction_CIA_DISCARD, + chkpb.CheckInconsistAction_CIA_IGNORE, + }, + ActDetails: []string{ + "Destroy the orphan container to release space [suggested].", + "Keep the orphan container on engines, repair nothing.", + }, + ActMsgs: []string{ + "Discard the unrecognized element: pool service, pool itself, container, and so on.", + "Ignore but log the inconsistency.", + }, + }), + }, + "interactive finding: container label mismatch": { + rpt: &chkpb.CheckReport{ + Seq: 972775323717861377, + Class: chkpb.CheckInconsistClass_CIC_CONT_BAD_LABEL, + Action: chkpb.CheckInconsistAction_CIA_INTERACT, + PoolUuid: "9614ebfb-cbad-4250-a4e4-d24b7b70d85e", + ContUuid: "18b9b418-211c-455f-aa42-0cc13dedcff9", + Timestamp: "Mon Dec 5 16:27:56 2022\n", + Msg: "Check engine detects inconsistent container label: new-label (CS) vs foo (property).\n", + ActChoices: []chkpb.CheckInconsistAction{ + chkpb.CheckInconsistAction_CIA_TRUST_PS, + chkpb.CheckInconsistAction_CIA_IGNORE, + chkpb.CheckInconsistAction_CIA_TRUST_TARGET, + }, + ActDetails: []string{ + "Repair the container label in container property [suggested].6", + "Keep the inconsistent container label, repair nothing.0", + "Repair the container label in container service.", + }, + ActMsgs: []string{ + "Trust the information recorded in PS DB.", + "Ignore but log the inconsistency.", + "Trust the information recorded by target(s).", + }, + }, + expFinding: checker.NewFinding( + &chkpb.CheckReport{ + Seq: 972775323717861377, + Class: chkpb.CheckInconsistClass_CIC_CONT_BAD_LABEL, + Action: chkpb.CheckInconsistAction_CIA_INTERACT, + PoolUuid: "9614ebfb-cbad-4250-a4e4-d24b7b70d85e", + ContUuid: "18b9b418-211c-455f-aa42-0cc13dedcff9", + Timestamp: "Mon Dec 5 16:27:56 2022", + Msg: "Check engine detects inconsistent container label: new-label (CS) vs foo (property).", + ActChoices: []chkpb.CheckInconsistAction{ + chkpb.CheckInconsistAction_CIA_TRUST_PS, + chkpb.CheckInconsistAction_CIA_IGNORE, + chkpb.CheckInconsistAction_CIA_TRUST_TARGET, + }, + ActDetails: []string{ + "Repair the container label in container property [suggested].6", + "Keep the inconsistent container label, repair nothing.0", + "Repair the container label in container service.", + }, + ActMsgs: []string{ + "Trust the information recorded in PS DB.", + "Ignore but log the inconsistency.", + "Trust the information recorded by target(s).", + }, + }), + }, + } { + t.Run(name, func(t *testing.T) { + f := checker.NewFinding(tc.rpt) + + gotFinding := checker.AnnotateFinding(f) + if tc.expFinding == nil && gotFinding == nil { + return + } + + if diff := cmp.Diff(tc.expFinding, gotFinding, protocmp.Transform()); diff != "" { + t.Fatalf("unexpected finding (-want +got):\n%s", diff) + } + }) + } +} diff --git a/src/control/system/checker/mocks.go b/src/control/system/checker/mocks.go new file mode 100644 index 00000000000..a13a8e42ad3 --- /dev/null +++ b/src/control/system/checker/mocks.go @@ -0,0 +1,38 @@ +// +// (C) Copyright 2022 Intel Corporation. +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// + +package checker + +import ( + "math/rand" + "time" + + chkpb "github.com/daos-stack/daos/src/control/common/proto/chk" + "github.com/daos-stack/daos/src/control/common/test" +) + +func MockFinding(idx ...int) *Finding { + if len(idx) == 0 { + idx = []int{rand.Int()} + } + return &Finding{ + CheckReport: chkpb.CheckReport{ + Seq: uint64(idx[0]), + Class: chkpb.CheckInconsistClass(rand.Int31n(int32(len(chkpb.CheckInconsistClass_name)))), + Action: chkpb.CheckInconsistAction(rand.Int31n(int32(len(chkpb.CheckInconsistAction_name)))), + Rank: uint32(idx[0]), + Target: uint32(idx[0]), + PoolUuid: test.MockUUID(int32(idx[0])), + ContUuid: test.MockUUID(int32(idx[0])), + Timestamp: time.Now().String(), + ActChoices: []chkpb.CheckInconsistAction{ + chkpb.CheckInconsistAction(rand.Int31n(int32(len(chkpb.CheckInconsistAction_name)))), + chkpb.CheckInconsistAction(rand.Int31n(int32(len(chkpb.CheckInconsistAction_name)))), + chkpb.CheckInconsistAction(rand.Int31n(int32(len(chkpb.CheckInconsistAction_name)))), + }, + }, + } +} diff --git a/src/control/system/member.go b/src/control/system/member.go index 84a1136f035..9ff5b6f720d 100644 --- a/src/control/system/member.go +++ b/src/control/system/member.go @@ -47,8 +47,10 @@ const ( MemberStateUnresponsive MemberState = 0x0100 // MemberStateAdminExcluded indicates that the rank has been administratively excluded. MemberStateAdminExcluded MemberState = 0x0200 + // MemberStateCheckerStarted indicates that the rank is running in checker mode. + MemberStateCheckerStarted MemberState = 0x0400 // MemberStateMax is the last entry indicating end of list. - MemberStateMax MemberState = 0x0400 + MemberStateMax MemberState = 0x0800 // ExcludedMemberFilter defines the state(s) to be used when determining // whether or not a member should be excluded from CaRT group map updates. @@ -58,6 +60,8 @@ const ( AvailableMemberFilter = MemberStateReady | MemberStateJoined // AllMemberFilter will match all valid member states. AllMemberFilter = MemberState(0xFFFF) + // NonExcludedMemberFilter matches all members that don't match the ExcludedMemberFilter. + NonExcludedMemberFilter = AllMemberFilter ^ ExcludedMemberFilter ) func (ms MemberState) String() string { @@ -82,6 +86,8 @@ func (ms MemberState) String() string { return "Errored" case MemberStateUnresponsive: return "Unresponsive" + case MemberStateCheckerStarted: + return "CheckerStarted" default: return "Unknown" } @@ -109,6 +115,8 @@ func MemberStateFromString(in string) MemberState { return MemberStateErrored case "unresponsive": return MemberStateUnresponsive + case "checkerstarted": + return MemberStateCheckerStarted default: return MemberStateUnknown } @@ -149,6 +157,10 @@ func (ms MemberState) isTransitionIllegal(to MemberState) bool { MemberStateJoined: true, MemberStateStopping: true, }, + MemberStateCheckerStarted: { + MemberStateReady: true, + MemberStateJoined: true, + }, }[ms][to] } diff --git a/src/control/system/membership.go b/src/control/system/membership.go index dbf7238805c..d1b6e298b61 100644 --- a/src/control/system/membership.go +++ b/src/control/system/membership.go @@ -104,6 +104,7 @@ type JoinRequest struct { SecondaryFabricContexts []uint32 FaultDomain *FaultDomain Incarnation uint64 + CheckMode bool } // JoinResponse contains information returned from join membership update. @@ -165,7 +166,11 @@ func (m *Membership) Join(req *JoinRequest) (resp *JoinResponse, err error) { } resp.PrevState = curMember.State - curMember.State = MemberStateJoined + if req.CheckMode { + curMember.State = MemberStateCheckerStarted + } else { + curMember.State = MemberStateJoined + } curMember.Info = "" curMember.Addr = req.ControlAddr curMember.PrimaryFabricURI = req.PrimaryFabricURI diff --git a/src/control/system/properties.go b/src/control/system/properties.go index 69d2355c68e..6619f311d1b 100644 --- a/src/control/system/properties.go +++ b/src/control/system/properties.go @@ -110,6 +110,11 @@ func GetUserProperties(db SysAttrGetter, sysProps daos.SystemPropertyMap, keys [ return userProps, nil } +// SetUserProperty sets a single user-visible property for the supplied key/value. +func SetUserProperty(db SysAttrSetter, sysProps daos.SystemPropertyMap, key, value string) error { + return SetUserProperties(db, sysProps, map[string]string{key: value}) +} + // GetUserProperty returns a single user-visible property for the supplied key, or // an error if the key is unknown. func GetUserProperty(db SysAttrGetter, sysProps daos.SystemPropertyMap, key string) (val string, err error) { diff --git a/src/control/system/raft/database.go b/src/control/system/raft/database.go index 580bd7b41a9..818f709e5b6 100644 --- a/src/control/system/raft/database.go +++ b/src/control/system/raft/database.go @@ -75,6 +75,7 @@ type ( MapVersion uint32 Members *MemberDatabase Pools *PoolDatabase + Checker *CheckerDatabase System *SystemDatabase SchemaVersion uint } @@ -262,6 +263,9 @@ func NewDatabase(log logging.Logger, cfg *DatabaseConfig) (*Database, error) { Uuids: make(PoolUuidMap), Labels: make(PoolLabelMap), }, + Checker: &CheckerDatabase{ + Findings: make(CheckerFindingMap), + }, System: &SystemDatabase{ Attributes: make(map[string]string), }, @@ -797,6 +801,8 @@ func (db *Database) UpdateMember(m *system.Member) error { db.Lock() defer db.Unlock() + db.log.Tracef("updating member: %+v", m) + _, err := db.FindMemberByUUID(m.UUID) if err != nil { return err diff --git a/src/control/system/raft/database_checker.go b/src/control/system/raft/database_checker.go new file mode 100644 index 00000000000..6fa13acead5 --- /dev/null +++ b/src/control/system/raft/database_checker.go @@ -0,0 +1,219 @@ +// +// (C) Copyright 2022-2023 Intel Corporation. +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// + +package raft + +import ( + "github.com/pkg/errors" + "google.golang.org/protobuf/proto" + + "github.com/daos-stack/daos/src/control/common" + "github.com/daos-stack/daos/src/control/common/proto/chk" + "github.com/daos-stack/daos/src/control/system/checker" +) + +var ( + errFindingExists = errors.New("finding already exists") +) + +type ( + errFindingNotFound struct { + seq uint64 + } + + // CheckerFindingMap allows the lookup of a Finding by its sequence number. + CheckerFindingMap map[uint64]*checker.Finding + + // CheckerDatabase is the database containing all checker Findings. + CheckerDatabase struct { + Findings CheckerFindingMap + } +) + +func (e *errFindingNotFound) Error() string { + return errors.Errorf("finding 0x%x not found", e.seq).Error() +} + +// ErrFindingNotFound creates an error that indicates a specified finding wasn't found in the +// database. +func ErrFindingNotFound(seq uint64) error { + return &errFindingNotFound{seq: seq} +} + +// IsFindingNotFound checks whether an error is an ErrFindingNotFound. +func IsFindingNotFound(err error) bool { + _, ok := errors.Cause(err).(*errFindingNotFound) + return ok +} + +func copyFinding(in *checker.Finding) (out *checker.Finding) { + out = new(checker.Finding) + proto.Merge(&out.CheckReport, &in.CheckReport) + + return +} + +func (cdb *CheckerDatabase) resetFindings() { + cdb.Findings = make(CheckerFindingMap) +} + +func (cdb *CheckerDatabase) addFinding(finding *checker.Finding) error { + if _, found := cdb.Findings[finding.Seq]; found { + return errFindingExists + } + cdb.Findings[finding.Seq] = copyFinding(finding) + + return nil +} + +func (cdb *CheckerDatabase) updateFinding(finding *checker.Finding) error { + if _, found := cdb.Findings[finding.Seq]; !found { + return ErrFindingNotFound(finding.Seq) + } + // TODO: Selectively update fields? + cdb.Findings[finding.Seq] = finding + + return nil +} + +func (cdb *CheckerDatabase) removeFinding(finding *checker.Finding) error { + if _, found := cdb.Findings[finding.Seq]; !found { + return ErrFindingNotFound(finding.Seq) + } + + delete(cdb.Findings, finding.Seq) + return nil +} + +// AddCheckerFinding adds a finding to the database. +func (db *Database) AddCheckerFinding(finding *checker.Finding) error { + db.Lock() + defer db.Unlock() + + return db.submitCheckerUpdate(raftOpAddCheckerFinding, finding) +} + +// AddOrUpdateCheckerFinding updates a finding in the database if it is already stored, or stores +// it if not. +func (db *Database) AddOrUpdateCheckerFinding(finding *checker.Finding) error { + db.Lock() + defer db.Unlock() + + if _, err := db.GetCheckerFinding(finding.Seq); IsFindingNotFound(err) { + return db.submitCheckerUpdate(raftOpAddCheckerFinding, finding) + } + + return db.submitCheckerUpdate(raftOpUpdateCheckerFinding, finding) +} + +// UpdateCheckerFinding updates a finding that is already in the database. +func (db *Database) UpdateCheckerFinding(finding *checker.Finding) error { + db.Lock() + defer db.Unlock() + + if _, err := db.GetCheckerFinding(finding.Seq); err != nil { + return err + } + return db.submitCheckerUpdate(raftOpUpdateCheckerFinding, finding) +} + +// RemoveCheckerFindingsForPools removes any findings in the database associated with one or more +// pool IDs. +func (db *Database) RemoveCheckerFindingsForPools(poolIDs ...string) error { + db.Lock() + defer db.Unlock() + + poolIDSet := common.NewStringSet(poolIDs...) + for seq, f := range db.data.Checker.Findings { + if poolIDSet.Has(f.PoolUuid) || poolIDSet.Has(f.PoolLabel) { + delete(db.data.Checker.Findings, seq) + } + } + return nil +} + +// RemoveCheckerFinding removes a given finding from the checker database. +func (db *Database) RemoveCheckerFinding(finding *checker.Finding) error { + db.Lock() + defer db.Unlock() + + if _, err := db.GetCheckerFinding(finding.Seq); err != nil { + return err + } + return db.submitCheckerUpdate(raftOpRemoveCheckerFinding, finding) +} + +// SetCheckerFindingAction sets the action chosen for a giving finding. +func (db *Database) SetCheckerFindingAction(seq uint64, action int32) error { + if _, ok := chk.CheckInconsistAction_name[action]; !ok { + return errors.Errorf("invalid action %d", action) + } + chkAction := chk.CheckInconsistAction(action) + + db.Lock() + defer db.Unlock() + + f, err := db.GetCheckerFinding(seq) + if err != nil { + return err + } + + for i, d := range f.ActChoices { + if d != chkAction { + continue + } + f.Action = chkAction + if len(f.ActMsgs) > i { + f.ActMsgs = []string{f.ActMsgs[i]} + } + f.ActChoices = nil + } + + return db.submitCheckerUpdate(raftOpUpdateCheckerFinding, f) +} + +// ResetCheckerData clears all findings in the database. +func (db *Database) ResetCheckerData() error { + db.Lock() + defer db.Unlock() + + return db.submitCheckerUpdate(raftOpClearCheckerFindings, nil) +} + +// GetCheckerFindings fetches findings from the database by sequence number, or fetches all of them +// if no list is provided. +func (db *Database) GetCheckerFindings(searchList ...uint64) ([]*checker.Finding, error) { + db.data.RLock() + defer db.data.RUnlock() + + out := make([]*checker.Finding, 0, len(db.data.Checker.Findings)) + if len(searchList) == 0 { + for _, finding := range db.data.Checker.Findings { + out = append(out, copyFinding(finding)) + } + } else { + for _, seq := range searchList { + finding, found := db.data.Checker.Findings[seq] + if !found { + return nil, errors.Errorf("finding 0x%x not found", seq) + } + out = append(out, copyFinding(finding)) + } + } + return out, nil +} + +// GetCheckerFinding looks up a finding by sequence number. +func (db *Database) GetCheckerFinding(seq uint64) (*checker.Finding, error) { + db.data.RLock() + defer db.data.RUnlock() + + if f, found := db.data.Checker.Findings[seq]; found { + return copyFinding(f), nil + } + + return nil, ErrFindingNotFound(seq) +} diff --git a/src/control/system/raft/database_test.go b/src/control/system/raft/database_test.go index 9f1f0a1574b..0712986840e 100644 --- a/src/control/system/raft/database_test.go +++ b/src/control/system/raft/database_test.go @@ -25,6 +25,7 @@ import ( "github.com/google/uuid" "github.com/hashicorp/raft" "github.com/pkg/errors" + "google.golang.org/protobuf/testing/protocmp" "github.com/daos-stack/daos/src/control/build" "github.com/daos-stack/daos/src/control/common" @@ -34,6 +35,7 @@ import ( "github.com/daos-stack/daos/src/control/logging" "github.com/daos-stack/daos/src/control/system" . "github.com/daos-stack/daos/src/control/system" + "github.com/daos-stack/daos/src/control/system/checker" ) func waitForLeadership(ctx context.Context, t *testing.T, db *Database, gained bool) { @@ -60,7 +62,7 @@ func TestSystem_Database_filterMembers(t *testing.T) { memberStates := []MemberState{ MemberStateUnknown, MemberStateAwaitFormat, MemberStateStarting, MemberStateReady, MemberStateJoined, MemberStateStopping, MemberStateStopped, - MemberStateExcluded, MemberStateErrored, MemberStateUnresponsive, + MemberStateExcluded, MemberStateErrored, MemberStateUnresponsive, MemberStateAdminExcluded, } for i, ms := range memberStates { @@ -104,6 +106,13 @@ func TestSystem_Database_filterMembers(t *testing.T) { } } }, + "nonexcluded filter": func(t *testing.T) { + matches := db.filterMembers(NonExcludedMemberFilter) + matchLen := len(matches) + if matchLen != len(memberStates)-4 { + t.Fatalf("expected %d members to match; got %d", len(memberStates)-4, matchLen) + } + }, } { t.Run(name, func(t *testing.T) { buf.Reset() @@ -226,6 +235,7 @@ func TestSystem_Database_SnapshotRestore(t *testing.T) { maxRanks := 2048 maxPools := 1024 maxAttrs := 4096 + maxFindings := 512 log, buf := logging.NewTestLogger(t.Name()) defer test.ShowBufferOnFailure(t, buf) @@ -280,6 +290,18 @@ func TestSystem_Database_SnapshotRestore(t *testing.T) { (*fsm)(db0).Apply(rl) } + for i := 0; i < maxFindings; i++ { + f := checker.MockFinding(i) + data, err := createRaftUpdate(raftOpAddCheckerFinding, f) + if err != nil { + t.Fatal(err) + } + rl := &raft.Log{ + Data: data, + } + (*fsm)(db0).Apply(rl) + } + attrs := make(map[string]string) for i := 0; i < maxAttrs; i++ { attrs[fmt.Sprintf("prop%04d", i)] = fmt.Sprintf("value%04d", i) @@ -313,6 +335,7 @@ func TestSystem_Database_SnapshotRestore(t *testing.T) { cmpopts.IgnoreUnexported(dbData{}, Member{}, PoolServiceStorage{}), cmpopts.IgnoreFields(dbData{}, "RWMutex"), cmpopts.IgnoreFields(PoolServiceStorage{}, "Mutex"), + protocmp.Transform(), } if diff := cmp.Diff(db0.data, db1.data, cmpOpts...); diff != "" { t.Fatalf("db differs after restore (-want, +got):\n%s\n", diff) diff --git a/src/control/system/raft/raft.go b/src/control/system/raft/raft.go index 6a89c64a764..22906cb2073 100644 --- a/src/control/system/raft/raft.go +++ b/src/control/system/raft/raft.go @@ -25,6 +25,7 @@ import ( "github.com/daos-stack/daos/src/control/common" "github.com/daos-stack/daos/src/control/logging" "github.com/daos-stack/daos/src/control/system" + "github.com/daos-stack/daos/src/control/system/checker" ) // This file contains the "guts" of the new MS database. The basic theory @@ -46,6 +47,10 @@ const ( raftOpRemovePoolService raftOpIncMapVer raftOpUpdateSystemAttrs + raftOpAddCheckerFinding + raftOpUpdateCheckerFinding + raftOpRemoveCheckerFinding + raftOpClearCheckerFindings sysDBFile = "daos_system.db" ) @@ -81,6 +86,10 @@ func (ro raftOp) String() string { "removePoolService", "incMapVer", "updateSystemAttrs", + "addCheckerFinding", + "updateCheckerFinding", + "removeCheckerFinding", + "clearCheckerFindings", }[ro] } @@ -460,6 +469,15 @@ func (db *Database) submitSystemAttrsUpdate(props map[string]string) error { return db.submitRaftUpdate(data) } +// submitCheckerUpdate submits the given system checker update. +func (db *Database) submitCheckerUpdate(op raftOp, f *checker.Finding) error { + data, err := createRaftUpdate(op, f) + if err != nil { + return err + } + return db.submitRaftUpdate(data) +} + // submitRaftUpdate submits the serialized operation to the raft service. func (db *Database) submitRaftUpdate(data []byte) error { return db.raft.withReadLock(func(svc raftService) error { @@ -517,6 +535,8 @@ func (f *fsm) Apply(l *raft.Log) interface{} { f.data.applyPoolUpdate(c.Op, c.Data, f.EmergencyShutdown) case raftOpUpdateSystemAttrs: f.data.applySystemUpdate(c.Op, c.Data, f.EmergencyShutdown) + case raftOpAddCheckerFinding, raftOpUpdateCheckerFinding, raftOpRemoveCheckerFinding, raftOpClearCheckerFindings: + f.data.applyCheckerUpdate(c.Op, c.Data, f.EmergencyShutdown) default: f.EmergencyShutdown(errors.Errorf("unhandled Apply operation: %d", c.Op)) return nil @@ -623,6 +643,48 @@ func (d *dbData) applySystemUpdate(op raftOp, data []byte, panicFn func(error)) } } +// applyCheckerUpdate is responsible for applying the checker update +// operation to the database. +func (d *dbData) applyCheckerUpdate(op raftOp, data []byte, panicFn func(error)) { + if op == raftOpClearCheckerFindings { + d.Lock() + defer d.Unlock() + d.Checker.resetFindings() + return + } + + f := new(checker.Finding) + if err := json.Unmarshal(data, f); err != nil { + panicFn(errors.Wrap(err, "failed to decode checker finding update")) + return + } + + d.Lock() + defer d.Unlock() + + // TODO: Consider whether or not these should be fatal errors. + switch op { + case raftOpAddCheckerFinding: + if err := d.Checker.addFinding(f); err != nil { + panicFn(err) + return + } + case raftOpUpdateCheckerFinding: + if err := d.Checker.updateFinding(f); err != nil { + panicFn(err) + return + } + case raftOpRemoveCheckerFinding: + if err := d.Checker.removeFinding(f); err != nil { + panicFn(err) + return + } + default: + panicFn(errors.Errorf("unhandled Checker Apply operation: %d", op)) + return + } +} + // Snapshot is called to support log compaction, so that we don't have to keep // every log entry from the start of the system. Instead, the raft service periodically // creates a point-in-time snapshot which can be used to restore the current state, or @@ -658,6 +720,7 @@ func (f *fsm) Restore(rc io.ReadCloser) error { f.data.NextRank = db.data.NextRank f.data.MapVersion = db.data.MapVersion f.data.System = db.data.System + f.data.Checker = db.data.Checker f.data.Version = db.data.Version f.data.Unlock() f.log.Debugf("db snapshot loaded (map version %d; data version %d)", db.data.MapVersion, db.data.Version) diff --git a/src/control/system/raft/testdata/raft_recovery/daos_system.db b/src/control/system/raft/testdata/raft_recovery/daos_system.db index 769eef4a1d1..7f097491608 100644 Binary files a/src/control/system/raft/testdata/raft_recovery/daos_system.db and b/src/control/system/raft/testdata/raft_recovery/daos_system.db differ diff --git a/src/control/system/raft/testdata/raft_recovery/snapshots/2-11-1665528548388/state.bin b/src/control/system/raft/testdata/raft_recovery/snapshots/2-11-1665528548388/state.bin deleted file mode 100644 index 15ec7c14c58..00000000000 --- a/src/control/system/raft/testdata/raft_recovery/snapshots/2-11-1665528548388/state.bin +++ /dev/null @@ -1 +0,0 @@ -{"Version":8,"NextRank":9,"MapVersion":8,"Members":{"Ranks":{"1":"8a5818a9-4759-4762-95cc-71a43278e70d","2":"a111e09b-407e-41b2-9677-0db20a78bc1f","3":"c47c9082-7c0f-40fa-9ce1-c472d0b43920","4":"44b4cd58-73f6-40b0-86d7-d4b1ed2fd8c0","5":"bf95cedc-19f8-4de4-a1ce-a93aa3eebedf","6":"fe344a6f-7255-42af-b75d-126e6d862285","7":"d844bfa0-9d9c-4b31-9ff0-37c20ef17d7f","8":"2395840f-ebc6-4b71-807c-9dcf644e0623"},"Uuids":{"2395840f-ebc6-4b71-807c-9dcf644e0623":{"addr":"127.0.0.2:10001","state":"joined","fault_domain":"/my/test/domain","rank":8,"incarnation":0,"uuid":"2395840f-ebc6-4b71-807c-9dcf644e0623","fabric_uri":"","secondary_fabric_uris":null,"fabric_contexts":0,"secondary_fabric_contexts":null,"info":"","last_update":"2022-10-11T22:49:08.147351335Z"},"44b4cd58-73f6-40b0-86d7-d4b1ed2fd8c0":{"addr":"127.0.0.1:10001","state":"joined","fault_domain":"/my/test/domain","rank":4,"incarnation":0,"uuid":"44b4cd58-73f6-40b0-86d7-d4b1ed2fd8c0","fabric_uri":"","secondary_fabric_uris":null,"fabric_contexts":0,"secondary_fabric_contexts":null,"info":"","last_update":"2022-10-11T22:49:08.14418846Z"},"8a5818a9-4759-4762-95cc-71a43278e70d":{"addr":"127.0.0.1:10001","state":"joined","fault_domain":"/my/test/domain","rank":1,"incarnation":0,"uuid":"8a5818a9-4759-4762-95cc-71a43278e70d","fabric_uri":"","secondary_fabric_uris":null,"fabric_contexts":0,"secondary_fabric_contexts":null,"info":"","last_update":"2022-10-11T22:49:08.141166341Z"},"a111e09b-407e-41b2-9677-0db20a78bc1f":{"addr":"127.0.0.1:10001","state":"joined","fault_domain":"/my/test/domain","rank":2,"incarnation":0,"uuid":"a111e09b-407e-41b2-9677-0db20a78bc1f","fabric_uri":"","secondary_fabric_uris":null,"fabric_contexts":0,"secondary_fabric_contexts":null,"info":"","last_update":"2022-10-11T22:49:08.142242063Z"},"bf95cedc-19f8-4de4-a1ce-a93aa3eebedf":{"addr":"127.0.0.2:10001","state":"joined","fault_domain":"/my/test/domain","rank":5,"incarnation":0,"uuid":"bf95cedc-19f8-4de4-a1ce-a93aa3eebedf","fabric_uri":"","secondary_fabric_uris":null,"fabric_contexts":0,"secondary_fabric_contexts":null,"info":"","last_update":"2022-10-11T22:49:08.145014303Z"},"c47c9082-7c0f-40fa-9ce1-c472d0b43920":{"addr":"127.0.0.1:10001","state":"joined","fault_domain":"/my/test/domain","rank":3,"incarnation":0,"uuid":"c47c9082-7c0f-40fa-9ce1-c472d0b43920","fabric_uri":"","secondary_fabric_uris":null,"fabric_contexts":0,"secondary_fabric_contexts":null,"info":"","last_update":"2022-10-11T22:49:08.14317539Z"},"d844bfa0-9d9c-4b31-9ff0-37c20ef17d7f":{"addr":"127.0.0.2:10001","state":"joined","fault_domain":"/my/test/domain","rank":7,"incarnation":0,"uuid":"d844bfa0-9d9c-4b31-9ff0-37c20ef17d7f","fabric_uri":"","secondary_fabric_uris":null,"fabric_contexts":0,"secondary_fabric_contexts":null,"info":"","last_update":"2022-10-11T22:49:08.146615014Z"},"fe344a6f-7255-42af-b75d-126e6d862285":{"addr":"127.0.0.2:10001","state":"joined","fault_domain":"/my/test/domain","rank":6,"incarnation":0,"uuid":"fe344a6f-7255-42af-b75d-126e6d862285","fabric_uri":"","secondary_fabric_uris":null,"fabric_contexts":0,"secondary_fabric_contexts":null,"info":"","last_update":"2022-10-11T22:49:08.145799472Z"}},"Addrs":{"127.0.0.1:10001":["8a5818a9-4759-4762-95cc-71a43278e70d","a111e09b-407e-41b2-9677-0db20a78bc1f","c47c9082-7c0f-40fa-9ce1-c472d0b43920","44b4cd58-73f6-40b0-86d7-d4b1ed2fd8c0"],"127.0.0.2:10001":["bf95cedc-19f8-4de4-a1ce-a93aa3eebedf","fe344a6f-7255-42af-b75d-126e6d862285","d844bfa0-9d9c-4b31-9ff0-37c20ef17d7f","2395840f-ebc6-4b71-807c-9dcf644e0623"]},"FaultDomains":{"Domain":{"Domains":null},"ID":1,"Children":[{"Domain":{"Domains":["my"]},"ID":2,"Children":[{"Domain":{"Domains":["my","test"]},"ID":3,"Children":[{"Domain":{"Domains":["my","test","domain"]},"ID":4,"Children":[{"Domain":{"Domains":["my","test","domain","rank1"]},"ID":5,"Children":[]},{"Domain":{"Domains":["my","test","domain","rank2"]},"ID":6,"Children":[]},{"Domain":{"Domains":["my","test","domain","rank3"]},"ID":7,"Children":[]},{"Domain":{"Domains":["my","test","domain","rank4"]},"ID":8,"Children":[]},{"Domain":{"Domains":["my","test","domain","rank5"]},"ID":9,"Children":[]},{"Domain":{"Domains":["my","test","domain","rank6"]},"ID":10,"Children":[]},{"Domain":{"Domains":["my","test","domain","rank7"]},"ID":11,"Children":[]},{"Domain":{"Domains":["my","test","domain","rank8"]},"ID":12,"Children":[]}]}]}]}]}},"Pools":{"Ranks":{},"Uuids":{},"Labels":{}},"System":{"Attributes":{}},"SchemaVersion":0} \ No newline at end of file diff --git a/src/control/system/raft/testdata/raft_recovery/snapshots/2-19-1665528549936/state.bin b/src/control/system/raft/testdata/raft_recovery/snapshots/2-19-1665528549936/state.bin deleted file mode 100644 index adcad90e3ef..00000000000 --- a/src/control/system/raft/testdata/raft_recovery/snapshots/2-19-1665528549936/state.bin +++ /dev/null @@ -1 +0,0 @@ -{"Version":16,"NextRank":9,"MapVersion":8,"Members":{"Ranks":{"1":"8a5818a9-4759-4762-95cc-71a43278e70d","2":"a111e09b-407e-41b2-9677-0db20a78bc1f","3":"c47c9082-7c0f-40fa-9ce1-c472d0b43920","4":"44b4cd58-73f6-40b0-86d7-d4b1ed2fd8c0","5":"bf95cedc-19f8-4de4-a1ce-a93aa3eebedf","6":"fe344a6f-7255-42af-b75d-126e6d862285","7":"d844bfa0-9d9c-4b31-9ff0-37c20ef17d7f","8":"2395840f-ebc6-4b71-807c-9dcf644e0623"},"Uuids":{"2395840f-ebc6-4b71-807c-9dcf644e0623":{"addr":"127.0.0.2:10001","state":"joined","fault_domain":"/my/test/domain","rank":8,"incarnation":0,"uuid":"2395840f-ebc6-4b71-807c-9dcf644e0623","fabric_uri":"","secondary_fabric_uris":null,"fabric_contexts":0,"secondary_fabric_contexts":null,"info":"","last_update":"2022-10-11T22:49:08.147351335Z"},"44b4cd58-73f6-40b0-86d7-d4b1ed2fd8c0":{"addr":"127.0.0.1:10001","state":"joined","fault_domain":"/my/test/domain","rank":4,"incarnation":0,"uuid":"44b4cd58-73f6-40b0-86d7-d4b1ed2fd8c0","fabric_uri":"","secondary_fabric_uris":null,"fabric_contexts":0,"secondary_fabric_contexts":null,"info":"","last_update":"2022-10-11T22:49:08.14418846Z"},"8a5818a9-4759-4762-95cc-71a43278e70d":{"addr":"127.0.0.1:10001","state":"joined","fault_domain":"/my/test/domain","rank":1,"incarnation":0,"uuid":"8a5818a9-4759-4762-95cc-71a43278e70d","fabric_uri":"","secondary_fabric_uris":null,"fabric_contexts":0,"secondary_fabric_contexts":null,"info":"","last_update":"2022-10-11T22:49:08.141166341Z"},"a111e09b-407e-41b2-9677-0db20a78bc1f":{"addr":"127.0.0.1:10001","state":"joined","fault_domain":"/my/test/domain","rank":2,"incarnation":0,"uuid":"a111e09b-407e-41b2-9677-0db20a78bc1f","fabric_uri":"","secondary_fabric_uris":null,"fabric_contexts":0,"secondary_fabric_contexts":null,"info":"","last_update":"2022-10-11T22:49:08.142242063Z"},"bf95cedc-19f8-4de4-a1ce-a93aa3eebedf":{"addr":"127.0.0.2:10001","state":"joined","fault_domain":"/my/test/domain","rank":5,"incarnation":0,"uuid":"bf95cedc-19f8-4de4-a1ce-a93aa3eebedf","fabric_uri":"","secondary_fabric_uris":null,"fabric_contexts":0,"secondary_fabric_contexts":null,"info":"","last_update":"2022-10-11T22:49:08.145014303Z"},"c47c9082-7c0f-40fa-9ce1-c472d0b43920":{"addr":"127.0.0.1:10001","state":"joined","fault_domain":"/my/test/domain","rank":3,"incarnation":0,"uuid":"c47c9082-7c0f-40fa-9ce1-c472d0b43920","fabric_uri":"","secondary_fabric_uris":null,"fabric_contexts":0,"secondary_fabric_contexts":null,"info":"","last_update":"2022-10-11T22:49:08.14317539Z"},"d844bfa0-9d9c-4b31-9ff0-37c20ef17d7f":{"addr":"127.0.0.2:10001","state":"joined","fault_domain":"/my/test/domain","rank":7,"incarnation":0,"uuid":"d844bfa0-9d9c-4b31-9ff0-37c20ef17d7f","fabric_uri":"","secondary_fabric_uris":null,"fabric_contexts":0,"secondary_fabric_contexts":null,"info":"","last_update":"2022-10-11T22:49:08.146615014Z"},"fe344a6f-7255-42af-b75d-126e6d862285":{"addr":"127.0.0.2:10001","state":"joined","fault_domain":"/my/test/domain","rank":6,"incarnation":0,"uuid":"fe344a6f-7255-42af-b75d-126e6d862285","fabric_uri":"","secondary_fabric_uris":null,"fabric_contexts":0,"secondary_fabric_contexts":null,"info":"","last_update":"2022-10-11T22:49:08.145799472Z"}},"Addrs":{"127.0.0.1:10001":["8a5818a9-4759-4762-95cc-71a43278e70d","a111e09b-407e-41b2-9677-0db20a78bc1f","c47c9082-7c0f-40fa-9ce1-c472d0b43920","44b4cd58-73f6-40b0-86d7-d4b1ed2fd8c0"],"127.0.0.2:10001":["bf95cedc-19f8-4de4-a1ce-a93aa3eebedf","fe344a6f-7255-42af-b75d-126e6d862285","d844bfa0-9d9c-4b31-9ff0-37c20ef17d7f","2395840f-ebc6-4b71-807c-9dcf644e0623"]},"FaultDomains":{"Domain":{"Domains":null},"ID":1,"Children":[{"Domain":{"Domains":["my"]},"ID":2,"Children":[{"Domain":{"Domains":["my","test"]},"ID":3,"Children":[{"Domain":{"Domains":["my","test","domain"]},"ID":4,"Children":[{"Domain":{"Domains":["my","test","domain","rank1"]},"ID":5,"Children":[]},{"Domain":{"Domains":["my","test","domain","rank2"]},"ID":6,"Children":[]},{"Domain":{"Domains":["my","test","domain","rank3"]},"ID":7,"Children":[]},{"Domain":{"Domains":["my","test","domain","rank4"]},"ID":8,"Children":[]},{"Domain":{"Domains":["my","test","domain","rank5"]},"ID":9,"Children":[]},{"Domain":{"Domains":["my","test","domain","rank6"]},"ID":10,"Children":[]},{"Domain":{"Domains":["my","test","domain","rank7"]},"ID":11,"Children":[]},{"Domain":{"Domains":["my","test","domain","rank8"]},"ID":12,"Children":[]}]}]}]}]}},"Pools":{"Ranks":{"0":["67efa2cb-7fca-4528-907c-f846186d1fd4"],"2":["7b703ddf-f5b6-4d90-9dd2-6fa8863b1c4e","caf380e7-1ab7-40d2-9100-bf1a0dad52a7"],"3":["a4ec2566-9a5f-447a-bd08-159d1db617fd","7df843df-ba07-4fe3-80c2-86db369ff1f1"],"5":["4ff31c2c-5515-48ed-95bb-549143892ed7"],"6":["a4ec2566-9a5f-447a-bd08-159d1db617fd","7df843df-ba07-4fe3-80c2-86db369ff1f1","678fa845-ce21-47ef-b802-df491cfbf2fb"]},"Uuids":{"155ebf05-dc82-449c-ae62-4da5fcac783f":{"PoolUUID":"155ebf05-dc82-449c-ae62-4da5fcac783f","PoolLabel":"pool0000","State":1,"Replicas":null,"Storage":{"CreationRankStr":"[0-8]","CurrentRankStr":"[0-8]","PerRankTierStorage":[1,2]},"LastUpdate":"2022-10-11T22:49:09.154074898Z"},"4ff31c2c-5515-48ed-95bb-549143892ed7":{"PoolUUID":"4ff31c2c-5515-48ed-95bb-549143892ed7","PoolLabel":"pool0001","State":1,"Replicas":[5],"Storage":{"CreationRankStr":"[0-8]","CurrentRankStr":"[0-8]","PerRankTierStorage":[1,2]},"LastUpdate":"2022-10-11T22:49:09.155016853Z"},"678fa845-ce21-47ef-b802-df491cfbf2fb":{"PoolUUID":"678fa845-ce21-47ef-b802-df491cfbf2fb","PoolLabel":"pool0006","State":1,"Replicas":[6],"Storage":{"CreationRankStr":"[0-8]","CurrentRankStr":"[0-8]","PerRankTierStorage":[1,2]},"LastUpdate":"2022-10-11T22:49:09.159082982Z"},"67efa2cb-7fca-4528-907c-f846186d1fd4":{"PoolUUID":"67efa2cb-7fca-4528-907c-f846186d1fd4","PoolLabel":"pool0003","State":1,"Replicas":[0],"Storage":{"CreationRankStr":"[0-8]","CurrentRankStr":"[0-8]","PerRankTierStorage":[1,2]},"LastUpdate":"2022-10-11T22:49:09.15665199Z"},"7b703ddf-f5b6-4d90-9dd2-6fa8863b1c4e":{"PoolUUID":"7b703ddf-f5b6-4d90-9dd2-6fa8863b1c4e","PoolLabel":"pool0002","State":1,"Replicas":[2],"Storage":{"CreationRankStr":"[0-8]","CurrentRankStr":"[0-8]","PerRankTierStorage":[1,2]},"LastUpdate":"2022-10-11T22:49:09.155906588Z"},"7df843df-ba07-4fe3-80c2-86db369ff1f1":{"PoolUUID":"7df843df-ba07-4fe3-80c2-86db369ff1f1","PoolLabel":"pool0005","State":1,"Replicas":[3,6],"Storage":{"CreationRankStr":"[0-8]","CurrentRankStr":"[0-8]","PerRankTierStorage":[1,2]},"LastUpdate":"2022-10-11T22:49:09.158300971Z"},"a4ec2566-9a5f-447a-bd08-159d1db617fd":{"PoolUUID":"a4ec2566-9a5f-447a-bd08-159d1db617fd","PoolLabel":"pool0004","State":1,"Replicas":[3,6],"Storage":{"CreationRankStr":"[0-8]","CurrentRankStr":"[0-8]","PerRankTierStorage":[1,2]},"LastUpdate":"2022-10-11T22:49:09.157413619Z"},"caf380e7-1ab7-40d2-9100-bf1a0dad52a7":{"PoolUUID":"caf380e7-1ab7-40d2-9100-bf1a0dad52a7","PoolLabel":"pool0007","State":1,"Replicas":[2],"Storage":{"CreationRankStr":"[0-8]","CurrentRankStr":"[0-8]","PerRankTierStorage":[1,2]},"LastUpdate":"2022-10-11T22:49:09.159810941Z"}},"Labels":{"pool0000":"155ebf05-dc82-449c-ae62-4da5fcac783f","pool0001":"4ff31c2c-5515-48ed-95bb-549143892ed7","pool0002":"7b703ddf-f5b6-4d90-9dd2-6fa8863b1c4e","pool0003":"67efa2cb-7fca-4528-907c-f846186d1fd4","pool0004":"a4ec2566-9a5f-447a-bd08-159d1db617fd","pool0005":"7df843df-ba07-4fe3-80c2-86db369ff1f1","pool0006":"678fa845-ce21-47ef-b802-df491cfbf2fb","pool0007":"caf380e7-1ab7-40d2-9100-bf1a0dad52a7"}},"System":{"Attributes":{}},"SchemaVersion":0} \ No newline at end of file diff --git a/src/control/system/raft/testdata/raft_recovery/snapshots/2-11-1665528548388/meta.json b/src/control/system/raft/testdata/raft_recovery/snapshots/2-20-1710888709486/meta.json similarity index 51% rename from src/control/system/raft/testdata/raft_recovery/snapshots/2-11-1665528548388/meta.json rename to src/control/system/raft/testdata/raft_recovery/snapshots/2-20-1710888709486/meta.json index 8c0a130d409..0deb71db6b4 100644 --- a/src/control/system/raft/testdata/raft_recovery/snapshots/2-11-1665528548388/meta.json +++ b/src/control/system/raft/testdata/raft_recovery/snapshots/2-20-1710888709486/meta.json @@ -1 +1 @@ -{"Version":1,"ID":"2-11-1665528548388","Index":11,"Term":2,"Peers":"ka8xMjcuMC4wLjE6MTAwMDE=","Configuration":{"Servers":[{"Suffrage":0,"ID":"127.0.0.1:10001","Address":"127.0.0.1:10001"}]},"ConfigurationIndex":1,"Size":4441,"CRC":"tIArpKQ32y0="} +{"Version":1,"ID":"2-20-1710888709486","Index":20,"Term":2,"Peers":"ka8xMjcuMC4wLjE6MTAwMDE=","Configuration":{"Servers":[{"Suffrage":0,"ID":"127.0.0.1:10001","Address":"127.0.0.1:10001"}]},"ConfigurationIndex":1,"Size":4469,"CRC":"iy4czkUrmmQ="} diff --git a/src/control/system/raft/testdata/raft_recovery/snapshots/2-20-1710888709486/state.bin b/src/control/system/raft/testdata/raft_recovery/snapshots/2-20-1710888709486/state.bin new file mode 100644 index 00000000000..0f7c06c2753 --- /dev/null +++ b/src/control/system/raft/testdata/raft_recovery/snapshots/2-20-1710888709486/state.bin @@ -0,0 +1 @@ +{"Version":8,"NextRank":9,"MapVersion":8,"Members":{"Ranks":{"1":"d50800c9-2104-4ba5-996c-55bcf7a48292","2":"692e11da-0e05-4fd6-be95-84ce57f7a553","3":"85dbe95d-fe54-418c-a547-cddb4af352d8","4":"cc537a2a-5566-42f7-a151-69b414d99425","5":"e267adb5-d205-4c15-b0c8-4e3e75200d48","6":"367ce851-5fc1-4649-a40a-a5d7b097191c","7":"e7c73998-5f99-4dbe-9661-c53139ffe413","8":"16f9d2f3-97e4-4c02-bb63-eac9bc6af509"},"Uuids":{"16f9d2f3-97e4-4c02-bb63-eac9bc6af509":{"addr":"127.0.0.2:10001","state":"joined","fault_domain":"/my/test/domain","rank":8,"incarnation":0,"uuid":"16f9d2f3-97e4-4c02-bb63-eac9bc6af509","fabric_uri":"","secondary_fabric_uris":null,"fabric_contexts":0,"secondary_fabric_contexts":null,"info":"","last_update":"2024-03-19T22:51:47.902982689Z"},"367ce851-5fc1-4649-a40a-a5d7b097191c":{"addr":"127.0.0.2:10001","state":"joined","fault_domain":"/my/test/domain","rank":6,"incarnation":0,"uuid":"367ce851-5fc1-4649-a40a-a5d7b097191c","fabric_uri":"","secondary_fabric_uris":null,"fabric_contexts":0,"secondary_fabric_contexts":null,"info":"","last_update":"2024-03-19T22:51:47.897949913Z"},"692e11da-0e05-4fd6-be95-84ce57f7a553":{"addr":"127.0.0.1:10001","state":"joined","fault_domain":"/my/test/domain","rank":2,"incarnation":0,"uuid":"692e11da-0e05-4fd6-be95-84ce57f7a553","fabric_uri":"","secondary_fabric_uris":null,"fabric_contexts":0,"secondary_fabric_contexts":null,"info":"","last_update":"2024-03-19T22:51:47.886795929Z"},"85dbe95d-fe54-418c-a547-cddb4af352d8":{"addr":"127.0.0.1:10001","state":"joined","fault_domain":"/my/test/domain","rank":3,"incarnation":0,"uuid":"85dbe95d-fe54-418c-a547-cddb4af352d8","fabric_uri":"","secondary_fabric_uris":null,"fabric_contexts":0,"secondary_fabric_contexts":null,"info":"","last_update":"2024-03-19T22:51:47.889541042Z"},"cc537a2a-5566-42f7-a151-69b414d99425":{"addr":"127.0.0.1:10001","state":"joined","fault_domain":"/my/test/domain","rank":4,"incarnation":0,"uuid":"cc537a2a-5566-42f7-a151-69b414d99425","fabric_uri":"","secondary_fabric_uris":null,"fabric_contexts":0,"secondary_fabric_contexts":null,"info":"","last_update":"2024-03-19T22:51:47.891880278Z"},"d50800c9-2104-4ba5-996c-55bcf7a48292":{"addr":"127.0.0.1:10001","state":"joined","fault_domain":"/my/test/domain","rank":1,"incarnation":0,"uuid":"d50800c9-2104-4ba5-996c-55bcf7a48292","fabric_uri":"","secondary_fabric_uris":null,"fabric_contexts":0,"secondary_fabric_contexts":null,"info":"","last_update":"2024-03-19T22:51:47.883262135Z"},"e267adb5-d205-4c15-b0c8-4e3e75200d48":{"addr":"127.0.0.2:10001","state":"joined","fault_domain":"/my/test/domain","rank":5,"incarnation":0,"uuid":"e267adb5-d205-4c15-b0c8-4e3e75200d48","fabric_uri":"","secondary_fabric_uris":null,"fabric_contexts":0,"secondary_fabric_contexts":null,"info":"","last_update":"2024-03-19T22:51:47.895441851Z"},"e7c73998-5f99-4dbe-9661-c53139ffe413":{"addr":"127.0.0.2:10001","state":"joined","fault_domain":"/my/test/domain","rank":7,"incarnation":0,"uuid":"e7c73998-5f99-4dbe-9661-c53139ffe413","fabric_uri":"","secondary_fabric_uris":null,"fabric_contexts":0,"secondary_fabric_contexts":null,"info":"","last_update":"2024-03-19T22:51:47.900455403Z"}},"Addrs":{"127.0.0.1:10001":["d50800c9-2104-4ba5-996c-55bcf7a48292","692e11da-0e05-4fd6-be95-84ce57f7a553","85dbe95d-fe54-418c-a547-cddb4af352d8","cc537a2a-5566-42f7-a151-69b414d99425"],"127.0.0.2:10001":["e267adb5-d205-4c15-b0c8-4e3e75200d48","367ce851-5fc1-4649-a40a-a5d7b097191c","e7c73998-5f99-4dbe-9661-c53139ffe413","16f9d2f3-97e4-4c02-bb63-eac9bc6af509"]},"FaultDomains":{"Domain":{"Domains":null},"ID":1,"Children":[{"Domain":{"Domains":["my"]},"ID":2,"Children":[{"Domain":{"Domains":["my","test"]},"ID":3,"Children":[{"Domain":{"Domains":["my","test","domain"]},"ID":4,"Children":[{"Domain":{"Domains":["my","test","domain","rank1"]},"ID":5,"Children":[]},{"Domain":{"Domains":["my","test","domain","rank2"]},"ID":6,"Children":[]},{"Domain":{"Domains":["my","test","domain","rank3"]},"ID":7,"Children":[]},{"Domain":{"Domains":["my","test","domain","rank4"]},"ID":8,"Children":[]},{"Domain":{"Domains":["my","test","domain","rank5"]},"ID":9,"Children":[]},{"Domain":{"Domains":["my","test","domain","rank6"]},"ID":10,"Children":[]},{"Domain":{"Domains":["my","test","domain","rank7"]},"ID":11,"Children":[]},{"Domain":{"Domains":["my","test","domain","rank8"]},"ID":12,"Children":[]}]}]}]}]}},"Pools":{"Ranks":{},"Uuids":{},"Labels":{}},"Checker":{"Findings":{}},"System":{"Attributes":{}},"SchemaVersion":0} \ No newline at end of file diff --git a/src/control/system/raft/testdata/raft_recovery/snapshots/2-19-1665528549936/meta.json b/src/control/system/raft/testdata/raft_recovery/snapshots/2-44-1710888711237/meta.json similarity index 51% rename from src/control/system/raft/testdata/raft_recovery/snapshots/2-19-1665528549936/meta.json rename to src/control/system/raft/testdata/raft_recovery/snapshots/2-44-1710888711237/meta.json index d6e2fccb8d4..dd094666212 100644 --- a/src/control/system/raft/testdata/raft_recovery/snapshots/2-19-1665528549936/meta.json +++ b/src/control/system/raft/testdata/raft_recovery/snapshots/2-44-1710888711237/meta.json @@ -1 +1 @@ -{"Version":1,"ID":"2-19-1665528549936","Index":19,"Term":2,"Peers":"ka8xMjcuMC4wLjE6MTAwMDE=","Configuration":{"Servers":[{"Suffrage":0,"ID":"127.0.0.1:10001","Address":"127.0.0.1:10001"}]},"ConfigurationIndex":1,"Size":7424,"CRC":"WB3vb8m2U3w="} +{"Version":1,"ID":"2-44-1710888711237","Index":44,"Term":2,"Peers":"ka8xMjcuMC4wLjE6MTAwMDE=","Configuration":{"Servers":[{"Suffrage":0,"ID":"127.0.0.1:10001","Address":"127.0.0.1:10001"}]},"ConfigurationIndex":1,"Size":7466,"CRC":"5AdDOZ0IrcY="} diff --git a/src/control/system/raft/testdata/raft_recovery/snapshots/2-44-1710888711237/state.bin b/src/control/system/raft/testdata/raft_recovery/snapshots/2-44-1710888711237/state.bin new file mode 100644 index 00000000000..e4290729cfa --- /dev/null +++ b/src/control/system/raft/testdata/raft_recovery/snapshots/2-44-1710888711237/state.bin @@ -0,0 +1 @@ +{"Version":16,"NextRank":9,"MapVersion":8,"Members":{"Ranks":{"1":"d50800c9-2104-4ba5-996c-55bcf7a48292","2":"692e11da-0e05-4fd6-be95-84ce57f7a553","3":"85dbe95d-fe54-418c-a547-cddb4af352d8","4":"cc537a2a-5566-42f7-a151-69b414d99425","5":"e267adb5-d205-4c15-b0c8-4e3e75200d48","6":"367ce851-5fc1-4649-a40a-a5d7b097191c","7":"e7c73998-5f99-4dbe-9661-c53139ffe413","8":"16f9d2f3-97e4-4c02-bb63-eac9bc6af509"},"Uuids":{"16f9d2f3-97e4-4c02-bb63-eac9bc6af509":{"addr":"127.0.0.2:10001","state":"joined","fault_domain":"/my/test/domain","rank":8,"incarnation":0,"uuid":"16f9d2f3-97e4-4c02-bb63-eac9bc6af509","fabric_uri":"","secondary_fabric_uris":null,"fabric_contexts":0,"secondary_fabric_contexts":null,"info":"","last_update":"2024-03-19T22:51:47.902982689Z"},"367ce851-5fc1-4649-a40a-a5d7b097191c":{"addr":"127.0.0.2:10001","state":"joined","fault_domain":"/my/test/domain","rank":6,"incarnation":0,"uuid":"367ce851-5fc1-4649-a40a-a5d7b097191c","fabric_uri":"","secondary_fabric_uris":null,"fabric_contexts":0,"secondary_fabric_contexts":null,"info":"","last_update":"2024-03-19T22:51:47.897949913Z"},"692e11da-0e05-4fd6-be95-84ce57f7a553":{"addr":"127.0.0.1:10001","state":"joined","fault_domain":"/my/test/domain","rank":2,"incarnation":0,"uuid":"692e11da-0e05-4fd6-be95-84ce57f7a553","fabric_uri":"","secondary_fabric_uris":null,"fabric_contexts":0,"secondary_fabric_contexts":null,"info":"","last_update":"2024-03-19T22:51:47.886795929Z"},"85dbe95d-fe54-418c-a547-cddb4af352d8":{"addr":"127.0.0.1:10001","state":"joined","fault_domain":"/my/test/domain","rank":3,"incarnation":0,"uuid":"85dbe95d-fe54-418c-a547-cddb4af352d8","fabric_uri":"","secondary_fabric_uris":null,"fabric_contexts":0,"secondary_fabric_contexts":null,"info":"","last_update":"2024-03-19T22:51:47.889541042Z"},"cc537a2a-5566-42f7-a151-69b414d99425":{"addr":"127.0.0.1:10001","state":"joined","fault_domain":"/my/test/domain","rank":4,"incarnation":0,"uuid":"cc537a2a-5566-42f7-a151-69b414d99425","fabric_uri":"","secondary_fabric_uris":null,"fabric_contexts":0,"secondary_fabric_contexts":null,"info":"","last_update":"2024-03-19T22:51:47.891880278Z"},"d50800c9-2104-4ba5-996c-55bcf7a48292":{"addr":"127.0.0.1:10001","state":"joined","fault_domain":"/my/test/domain","rank":1,"incarnation":0,"uuid":"d50800c9-2104-4ba5-996c-55bcf7a48292","fabric_uri":"","secondary_fabric_uris":null,"fabric_contexts":0,"secondary_fabric_contexts":null,"info":"","last_update":"2024-03-19T22:51:47.883262135Z"},"e267adb5-d205-4c15-b0c8-4e3e75200d48":{"addr":"127.0.0.2:10001","state":"joined","fault_domain":"/my/test/domain","rank":5,"incarnation":0,"uuid":"e267adb5-d205-4c15-b0c8-4e3e75200d48","fabric_uri":"","secondary_fabric_uris":null,"fabric_contexts":0,"secondary_fabric_contexts":null,"info":"","last_update":"2024-03-19T22:51:47.895441851Z"},"e7c73998-5f99-4dbe-9661-c53139ffe413":{"addr":"127.0.0.2:10001","state":"joined","fault_domain":"/my/test/domain","rank":7,"incarnation":0,"uuid":"e7c73998-5f99-4dbe-9661-c53139ffe413","fabric_uri":"","secondary_fabric_uris":null,"fabric_contexts":0,"secondary_fabric_contexts":null,"info":"","last_update":"2024-03-19T22:51:47.900455403Z"}},"Addrs":{"127.0.0.1:10001":["d50800c9-2104-4ba5-996c-55bcf7a48292","692e11da-0e05-4fd6-be95-84ce57f7a553","85dbe95d-fe54-418c-a547-cddb4af352d8","cc537a2a-5566-42f7-a151-69b414d99425"],"127.0.0.2:10001":["e267adb5-d205-4c15-b0c8-4e3e75200d48","367ce851-5fc1-4649-a40a-a5d7b097191c","e7c73998-5f99-4dbe-9661-c53139ffe413","16f9d2f3-97e4-4c02-bb63-eac9bc6af509"]},"FaultDomains":{"Domain":{"Domains":null},"ID":1,"Children":[{"Domain":{"Domains":["my"]},"ID":2,"Children":[{"Domain":{"Domains":["my","test"]},"ID":3,"Children":[{"Domain":{"Domains":["my","test","domain"]},"ID":4,"Children":[{"Domain":{"Domains":["my","test","domain","rank1"]},"ID":5,"Children":[]},{"Domain":{"Domains":["my","test","domain","rank2"]},"ID":6,"Children":[]},{"Domain":{"Domains":["my","test","domain","rank3"]},"ID":7,"Children":[]},{"Domain":{"Domains":["my","test","domain","rank4"]},"ID":8,"Children":[]},{"Domain":{"Domains":["my","test","domain","rank5"]},"ID":9,"Children":[]},{"Domain":{"Domains":["my","test","domain","rank6"]},"ID":10,"Children":[]},{"Domain":{"Domains":["my","test","domain","rank7"]},"ID":11,"Children":[]},{"Domain":{"Domains":["my","test","domain","rank8"]},"ID":12,"Children":[]}]}]}]}]}},"Pools":{"Ranks":{"0":["3e04eb11-d9da-4913-8ad9-43ee43a252d5","c869c3a6-2a70-4543-89c0-da47dfb92303","7099e240-9d34-42ef-a762-25f525d328ee"],"1":["ab7db85b-57e5-4dc4-b604-4b5458020c6a","7099e240-9d34-42ef-a762-25f525d328ee"],"3":["c869c3a6-2a70-4543-89c0-da47dfb92303"],"4":["687b65a7-19d6-40ef-9cf7-12a45ae17270"],"6":["687b65a7-19d6-40ef-9cf7-12a45ae17270"]},"Uuids":{"15b67f96-f674-4f0b-8ee0-c9ff542de129":{"PoolUUID":"15b67f96-f674-4f0b-8ee0-c9ff542de129","PoolLabel":"pool0007","State":"Ready","Replicas":null,"Storage":{"CreationRankStr":"[0-8]","CurrentRankStr":"[0-8]","PerRankTierStorage":[1,2]},"LastUpdate":"2024-03-19T22:51:49.952013491Z"},"1aaa8dde-b471-48de-b7d2-ca18939ec6aa":{"PoolUUID":"1aaa8dde-b471-48de-b7d2-ca18939ec6aa","PoolLabel":"pool0005","State":"Ready","Replicas":null,"Storage":{"CreationRankStr":"[0-8]","CurrentRankStr":"[0-8]","PerRankTierStorage":[1,2]},"LastUpdate":"2024-03-19T22:51:49.943263063Z"},"3e04eb11-d9da-4913-8ad9-43ee43a252d5":{"PoolUUID":"3e04eb11-d9da-4913-8ad9-43ee43a252d5","PoolLabel":"pool0001","State":"Ready","Replicas":[0],"Storage":{"CreationRankStr":"[0-8]","CurrentRankStr":"[0-8]","PerRankTierStorage":[1,2]},"LastUpdate":"2024-03-19T22:51:49.927044641Z"},"687b65a7-19d6-40ef-9cf7-12a45ae17270":{"PoolUUID":"687b65a7-19d6-40ef-9cf7-12a45ae17270","PoolLabel":"pool0002","State":"Ready","Replicas":[4,6],"Storage":{"CreationRankStr":"[0-8]","CurrentRankStr":"[0-8]","PerRankTierStorage":[1,2]},"LastUpdate":"2024-03-19T22:51:49.931310931Z"},"7099e240-9d34-42ef-a762-25f525d328ee":{"PoolUUID":"7099e240-9d34-42ef-a762-25f525d328ee","PoolLabel":"pool0006","State":"Ready","Replicas":[0,1],"Storage":{"CreationRankStr":"[0-8]","CurrentRankStr":"[0-8]","PerRankTierStorage":[1,2]},"LastUpdate":"2024-03-19T22:51:49.947681515Z"},"ab7db85b-57e5-4dc4-b604-4b5458020c6a":{"PoolUUID":"ab7db85b-57e5-4dc4-b604-4b5458020c6a","PoolLabel":"pool0003","State":"Ready","Replicas":[1],"Storage":{"CreationRankStr":"[0-8]","CurrentRankStr":"[0-8]","PerRankTierStorage":[1,2]},"LastUpdate":"2024-03-19T22:51:49.934622936Z"},"c869c3a6-2a70-4543-89c0-da47dfb92303":{"PoolUUID":"c869c3a6-2a70-4543-89c0-da47dfb92303","PoolLabel":"pool0004","State":"Ready","Replicas":[3,0],"Storage":{"CreationRankStr":"[0-8]","CurrentRankStr":"[0-8]","PerRankTierStorage":[1,2]},"LastUpdate":"2024-03-19T22:51:49.938971457Z"},"df8a3571-8851-414e-b4e3-518c4feed10f":{"PoolUUID":"df8a3571-8851-414e-b4e3-518c4feed10f","PoolLabel":"pool0000","State":"Ready","Replicas":null,"Storage":{"CreationRankStr":"[0-8]","CurrentRankStr":"[0-8]","PerRankTierStorage":[1,2]},"LastUpdate":"2024-03-19T22:51:49.922749044Z"}},"Labels":{"pool0000":"df8a3571-8851-414e-b4e3-518c4feed10f","pool0001":"3e04eb11-d9da-4913-8ad9-43ee43a252d5","pool0002":"687b65a7-19d6-40ef-9cf7-12a45ae17270","pool0003":"ab7db85b-57e5-4dc4-b604-4b5458020c6a","pool0004":"c869c3a6-2a70-4543-89c0-da47dfb92303","pool0005":"1aaa8dde-b471-48de-b7d2-ca18939ec6aa","pool0006":"7099e240-9d34-42ef-a762-25f525d328ee","pool0007":"15b67f96-f674-4f0b-8ee0-c9ff542de129"}},"Checker":{"Findings":{}},"System":{"Attributes":{}},"SchemaVersion":0} \ No newline at end of file diff --git a/src/control/vendor/github.com/desertbit/closer/v3/.gitignore b/src/control/vendor/github.com/desertbit/closer/v3/.gitignore new file mode 100644 index 00000000000..33599436c33 --- /dev/null +++ b/src/control/vendor/github.com/desertbit/closer/v3/.gitignore @@ -0,0 +1,5 @@ +*~ +.DS_Store +._.DS_Store +.idea/ +sample/sample \ No newline at end of file diff --git a/src/control/vendor/github.com/desertbit/closer/v3/.travis.yml b/src/control/vendor/github.com/desertbit/closer/v3/.travis.yml new file mode 100644 index 00000000000..1ae3698fc2b --- /dev/null +++ b/src/control/vendor/github.com/desertbit/closer/v3/.travis.yml @@ -0,0 +1,14 @@ +language: go + +go: + - 1.12.x + - tip + +before_install: + - go get -t -v ./... + +script: + - go test -race -coverprofile=coverage.txt -covermode=atomic + +after_success: + - bash <(curl -s https://codecov.io/bash) \ No newline at end of file diff --git a/src/control/vendor/github.com/desertbit/closer/v3/AUTHORS b/src/control/vendor/github.com/desertbit/closer/v3/AUTHORS new file mode 100644 index 00000000000..6a73d6644c4 --- /dev/null +++ b/src/control/vendor/github.com/desertbit/closer/v3/AUTHORS @@ -0,0 +1,2 @@ +Roland Singer +Sebastian Borchers diff --git a/src/control/vendor/github.com/desertbit/closer/v3/LICENSE b/src/control/vendor/github.com/desertbit/closer/v3/LICENSE new file mode 100644 index 00000000000..d3d496285fc --- /dev/null +++ b/src/control/vendor/github.com/desertbit/closer/v3/LICENSE @@ -0,0 +1,22 @@ +MIT License + +Copyright (c) 2018 Roland Singer +Copyright (c) 2018 Sebastian Borchers + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/src/control/vendor/github.com/desertbit/closer/v3/README.md b/src/control/vendor/github.com/desertbit/closer/v3/README.md new file mode 100644 index 00000000000..7ef8db508bb --- /dev/null +++ b/src/control/vendor/github.com/desertbit/closer/v3/README.md @@ -0,0 +1,101 @@ +# Closer - A simple, thread-safe closer + +[![GoDoc](https://godoc.org/github.com/desertbit/closer?status.svg)](https://godoc.org/github.com/desertbit/closer) +[![Go Report Card](https://goreportcard.com/badge/github.com/desertbit/closer)](https://goreportcard.com/report/github.com/desertbit/closer) +[![coverage](https://codecov.io/gh/desertbit/closer/branch/master/graph/badge.svg)](https://codecov.io/gh/desertbit/closer/branch/master) +[![license](https://img.shields.io/github/license/desertbit/closer.svg)](https://opensource.org/licenses/MIT) + +This package aims to provide a simple and performance oriented mechanism to manage the graceful and reliable shutdown of an application, or parts of it. + +It can also be a handy alternative to the context package, though it does not solve the problem that common go libraries only accept context as a valid cancellation method. Therefore, you are only able to cancel "in-between" slow operations. + +### Examples +Check out the sample program for a good overview of this package's functionality. +##### Closing +Let us assume you want a server that should close its connection once it gets closed. We close the connection in the `onClose()` method of the server's closer and demonstrate that it does not matter how often you call `Close()`, the connection is closed exactly once. + +```go +type Server struct { + closer.Closer // Embedded + conn net.Conn +} + +func New() *Server { + // ... + s := &Server { + conn: conn, + } + s.Closer = closer.New(s.onClose) + return s +} + +func (s *server) onClose() error { + return s.conn.Close() +} + +func main() { + s := New() + // ... + + // The s.onClose function will be called only once. + s.Close() + s.Close() +} +``` +##### OneWay +Now we want an application that (among other things) connects as a client to a remote server. In case the connection is interrupted, the app should continue to run and not fail. But if the app itself closes, of course we want to take down the client connection as well. +```go +type App struct { + closer.Closer +} + +func NewApp() *App { + return &App{ + Closer: closer.New() + } +} + +type Client struct { + closer.Closer + conn net.Conn +} + +func NewClient(cl closer.Closer) *Client { + c := &Client{ + Closer: cl, + } + c.OnClose(func() error { + return c.conn.Close() + }) + return c +} + +func main() { + a := NewApp() + // Close c, when a closes, but do not close a, when c closes. + c := NewClient(a.CloserOneWay()) + + c.Close() + // a still alive. +} +``` +##### TwoWay +Of course, there is the opposite to the OneWay closer that closes its parent as well. If we take the example from before, we can simply exchange the closer that is passed to the client. +```go +//... + +func main() { + a := NewApp() + // Close c, when a closes, and close a, when c closes. + c := NewClient(a.CloserTwoWay()) + + c.Close() + // a has been closed. +} +``` +### Documentation +Check out [godoc](https://godoc.org/github.com/desertbit/closer) for the documentation. +### Install +`go get github.com/desertbit/closer` +### Contribution +We love contributions, so feel free to do so! Coding and contribution guide lines will come in the future. Simply file a new issue, if you encounter problems with this package or have feature requests. \ No newline at end of file diff --git a/src/control/vendor/github.com/desertbit/closer/v3/closer.go b/src/control/vendor/github.com/desertbit/closer/v3/closer.go new file mode 100644 index 00000000000..5db317e9e91 --- /dev/null +++ b/src/control/vendor/github.com/desertbit/closer/v3/closer.go @@ -0,0 +1,457 @@ +/* + * closer - A simple, thread-safe closer + * + * The MIT License (MIT) + * + * Copyright (c) 2019 Roland Singer + * Copyright (c) 2019 Sebastian Borchers + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +// Package closer offers a simple, thread-safe closer. +// +// It allows to build up a tree of closing relationships, where you typically +// start with a root closer that branches into different children and +// children's children. When a parent closer spawns a child closer, the child +// either has a one-way or two-way connection to its parent. One-way children +// are closed when their parent closes. In addition, two-way children also close +// their parent, if they are closed themselves. +// +// A closer is also useful to ensure that certain dependencies, such as network +// connections, are reliably taken down, once the closer closes. +// In addition, a closer can be concurrently closed many times, without closing +// more than once, but still returning the errors to every caller. +// +// This allows to represent complex closing relationships and helps avoiding +// leaking goroutines, gracefully shutting down, etc. +package closer + +import ( + "context" + "fmt" + "sync" + + multierror "github.com/hashicorp/go-multierror" +) + +//#############// +//### Types ###// +//#############// + +// CloseFunc defines the general close function. +type CloseFunc func() error + +//#################// +//### Interface ###// +//#################// + +// A Closer is a thread-safe helper for common close actions. +type Closer interface { + // Close closes this closer in a thread-safe manner. + // + // Implements the io.Closer interface. + // + // This method always returns the close error, + // regardless of how often it gets called. + // + // The closing order looks like this: + // 1: the closing chan is closed. + // 2: the OnClosing funcs are executed. + // 3: each of the closer's children is closed. + // 4: it waits for the wait group. + // 5: the OnClose funcs are executed. + // 6: the closed chan is closed. + // 7: the parent is closed, if it has one. + // + // Close blocks, until all steps of the closing order + // have been done. + // No matter which goroutine called this method. + // Returns a hashicorp multierror. + Close() error + + // Close_ is a convenience version of Close(), for use in defer + // where the error is not of interest. + Close_() + + // CloseAndDone performs the same operation as Close(), but decrements + // the closer's wait group by one beforehand. + // Attention: Calling this without first adding to the WaitGroup by + // calling AddWaitGroup() results in a panic. + CloseAndDone() error + + // CloseAndDone_ is a convenience version of CloseAndDone(), for use in + // defer where the error is not of interest. + CloseAndDone_() + + // ClosedChan returns a channel, which is closed as + // soon as the closer is completely closed. + // See Close() for the position in the closing order. + ClosedChan() <-chan struct{} + + // CloserAddWait adds the given delta to the closer's + // wait group. Useful to wait for routines associated + // with this closer to gracefully shutdown. + // See Close() for the position in the closing order. + CloserAddWait(delta int) + + // CloserDone decrements the closer's wait group by one. + // Attention: Calling this without first adding to the WaitGroup by + // calling AddWaitGroup() results in a panic. + CloserDone() + + // CloserOneWay creates a new child closer that has a one-way relationship + // with the current closer. This means that the child is closed whenever + // the parent closes, but not vice versa. + // See Close() for the position in the closing order. + CloserOneWay() Closer + + // CloserTwoWay creates a new child closer that has a two-way relationship + // with the current closer. This means that the child is closed whenever + // the parent closes and vice versa. + // See Close() for the position in the closing order. + CloserTwoWay() Closer + + // ClosingChan returns a channel, which is closed as + // soon as the closer is about to close. + // Remains closed, once ClosedChan() has also been closed. + // See Close() for the position in the closing order. + ClosingChan() <-chan struct{} + + // Context returns a context.Context, which is cancelled + // as soon as the closer is closing. + // The returned cancel func should be called as soon as the + // context is no longer needed, to free resources. + Context() (context.Context, context.CancelFunc) + + // IsClosed returns a boolean indicating + // whether this instance has been closed completely. + IsClosed() bool + + // IsClosing returns a boolean indicating + // whether this instance is about to close. + // Also returns true, if IsClosed() returns true. + IsClosing() bool + + // OnClose adds the given CloseFuncs to the closer. + // Their errors are appended to the Close() multi error. + // Close functions are called in LIFO order. + // See Close() for their position in the closing order. + OnClose(f ...CloseFunc) + + // OnClosing adds the given CloseFuncs to the closer. + // Their errors are appended to the Close() multi error. + // Closing functions are called in LIFO order. + // It is guaranteed that all closing funcs are executed before + // any close funcs. + // See Close() for their position in the closing order. + OnClosing(f ...CloseFunc) +} + +//######################// +//### Implementation ###// +//######################// + +const ( + minChildrenCap = 100 +) + +// The closer type is this package's implementation of the Closer interface. +type closer struct { + // An unbuffered channel that expresses whether the + // closer is about to close. + // The channel itself gets closed to represent the closing + // of the closer, which leads to reads off of it to succeed. + closingChan chan struct{} + // An unbuffered channel that expresses whether the + // closer has been completely closed. + // The channel itself gets closed to represent the closing + // of the closer, which leads to reads off of it to succeed. + closedChan chan struct{} + // The error collected by executing the Close() func + // and combining all encountered errors from the close funcs. + closeErr error + + // Synchronises the access to the following properties. + mx sync.Mutex + // The close funcs that are executed when this closer closes. + closeFuncs []CloseFunc + // The closing funcs that are executed when this closer closes. + closingFuncs []CloseFunc + // The parent of this closer. May be nil. + parent *closer + // The closer children that this closer spawned. + children []*closer + // Used to wait for external dependencies of the closer + // before the Close() method actually returns. + wg sync.WaitGroup + + // A flag that indicates whether this closer is a two-way closer. + // In comparison to a standard one-way closer, which closes when + // its parent closes, a two-way closer closes also its parent, when + // it itself gets closed. + twoWay bool + + // The index of this closer in its parent's children slice. + // Needed to efficiently remove the closer from its parent. + parentIndex int +} + +// New creates a new closer. +func New() Closer { + return newCloser() +} + +// Implements the Closer interface. +func (c *closer) Close() error { + // Mutex is not unlocked on defer! Therefore, be cautious when adding + // new control flow statements like return. + c.mx.Lock() + + // If the closer is already closing, just return the error. + if c.IsClosing() { + c.mx.Unlock() + return c.closeErr + } + + // Close the closing channel to signal that this closer is about to close now. + close(c.closingChan) + + // Execute all closing funcs of this closer. + c.closeErr = c.execCloseFuncs(c.closingFuncs) + // Delete them, to free resources. + c.closingFuncs = nil + + // Close all children. + for _, child := range c.children { + child.Close_() + } + + // Wait, until all dependencies of this closer have closed. + c.wg.Wait() + + // Execute all close funcs of this closer. + c.closeErr = c.execCloseFuncs(c.closeFuncs) + // Delete them, to free resources. + c.closeFuncs = nil + + // Close the closed channel to signal that this closer is closed now. + close(c.closedChan) + + c.mx.Unlock() + + // Close the parent now as well, if this is a two way closer. + // Otherwise, the closer must remove its reference from its parent's children + // to prevent a leak. + // Only perform these actions, if the parent is not closing already! + if c.parent != nil && !c.parent.IsClosing() { + if c.twoWay { + c.parent.Close_() + } else { + c.parent.removeChild(c) + } + } + + return c.closeErr +} + +// Implements the Closer interface. +func (c *closer) Close_() { + _ = c.Close() +} + +// Implements the Closer interface. +func (c *closer) CloseAndDone() error { + c.wg.Done() + return c.Close() +} + +// Implements the Closer interface. +func (c *closer) CloseAndDone_() { + _ = c.CloseAndDone() +} + +// Implements the Closer interface. +func (c *closer) ClosedChan() <-chan struct{} { + return c.closedChan +} + +// Implements the Closer interface. +func (c *closer) CloserAddWait(delta int) { + c.wg.Add(delta) +} + +// Implements the Closer interface. +func (c *closer) CloserDone() { + c.wg.Done() +} + +// Implements the Closer interface. +func (c *closer) CloserOneWay() Closer { + return c.addChild(false) +} + +// Implements the Closer interface. +func (c *closer) CloserTwoWay() Closer { + return c.addChild(true) +} + +// Implements the Closer interface. +func (c *closer) ClosingChan() <-chan struct{} { + return c.closingChan +} + +// Implements the Closer interface. +func (c *closer) Context() (context.Context, context.CancelFunc) { + ctx, cancel := context.WithCancel(context.Background()) + + go func() { + select { + case <-c.closingChan: + cancel() + case <-ctx.Done(): + } + }() + + return ctx, cancel +} + +// Implements the Closer interface. +func (c *closer) IsClosed() bool { + select { + case <-c.closedChan: + return true + default: + return false + } +} + +// Implements the Closer interface. +func (c *closer) IsClosing() bool { + select { + case <-c.closingChan: + return true + default: + return false + } +} + +// Implements the Closer interface. +func (c *closer) OnClose(f ...CloseFunc) { + c.mx.Lock() + c.closeFuncs = append(c.closeFuncs, f...) + c.mx.Unlock() +} + +// Implements the Closer interface. +func (c *closer) OnClosing(f ...CloseFunc) { + c.mx.Lock() + c.closingFuncs = append(c.closingFuncs, f...) + c.mx.Unlock() +} + +//###############// +//### Private ###// +//###############// + +// newCloser creates a new closer with the given close funcs. +func newCloser() *closer { + return &closer{ + closingChan: make(chan struct{}), + closedChan: make(chan struct{}), + } +} + +// addChild creates a new closer and adds it as either +// a one-way or two-way child to this closer. +func (c *closer) addChild(twoWay bool) *closer { + // Create a new closer and set the current closer as its parent. + // Also set the twoWay flag. + child := newCloser() + child.parent = c + child.twoWay = twoWay + + // Add the closer to the current closer's children. + c.mx.Lock() + child.parentIndex = len(c.children) + c.children = append(c.children, child) + c.mx.Unlock() + + return child +} + +// removeChild removes the given child from this closer's children. +// If the child can not be found, this is a no-op. +func (c *closer) removeChild(child *closer) { + c.mx.Lock() + defer c.mx.Unlock() + + last := len(c.children) - 1 + c.children[last].parentIndex = child.parentIndex + c.children[child.parentIndex] = c.children[last] + c.children[last] = nil + c.children = c.children[:last] + + // Prevent endless growth. + // If the capacity is bigger than our min value and + // four times larger than the length, shrink it by half. + cp := cap(c.children) + le := len(c.children) + if cp > minChildrenCap && cp > 4*le { + children := make([]*closer, le, le*2) + copy(children, c.children) + c.children = children + } +} + +// execCloseFuncs executes the given close funcs and appends them +// to the closer's closeErr, which is a hashicorp.multiError. +// The error is then returned. +func (c *closer) execCloseFuncs(f []CloseFunc) error { + // Batch errors together. + var mErr *multierror.Error + + // If an error is already set, append the next errors to it. + if c.closeErr != nil { + mErr = multierror.Append(mErr, c.closeErr) + } + + // Call in LIFO order. Append the errors. + for i := len(f) - 1; i >= 0; i-- { + if err := f[i](); err != nil { + mErr = multierror.Append(mErr, err) + } + } + + // If no error is available, return. + if mErr == nil { + return nil + } + + // The default multiCloser error formatting uses too much space. + mErr.ErrorFormat = func(errors []error) string { + str := fmt.Sprintf("%v close errors occurred:", len(errors)) + for _, err := range errors { + str += "\n- " + err.Error() + } + return str + } + + return mErr +} diff --git a/src/control/vendor/github.com/desertbit/columnize/.travis.yml b/src/control/vendor/github.com/desertbit/columnize/.travis.yml new file mode 100644 index 00000000000..1a0bbea6c77 --- /dev/null +++ b/src/control/vendor/github.com/desertbit/columnize/.travis.yml @@ -0,0 +1,3 @@ +language: go +go: + - tip diff --git a/src/control/vendor/github.com/desertbit/columnize/COPYING b/src/control/vendor/github.com/desertbit/columnize/COPYING new file mode 100644 index 00000000000..86f4501489f --- /dev/null +++ b/src/control/vendor/github.com/desertbit/columnize/COPYING @@ -0,0 +1,20 @@ +MIT LICENSE + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/src/control/vendor/github.com/desertbit/columnize/README.md b/src/control/vendor/github.com/desertbit/columnize/README.md new file mode 100644 index 00000000000..6852911fcc9 --- /dev/null +++ b/src/control/vendor/github.com/desertbit/columnize/README.md @@ -0,0 +1,75 @@ +Columnize +========= + +Easy column-formatted output for golang + +[![Build Status](https://travis-ci.org/ryanuber/columnize.svg)](https://travis-ci.org/ryanuber/columnize) + +Columnize is a really small Go package that makes building CLI's a little bit +easier. In some CLI designs, you want to output a number similar items in a +human-readable way with nicely aligned columns. However, figuring out how wide +to make each column is a boring problem to solve and eats your valuable time. + +Here is an example: + +```go +package main + +import ( + "fmt" + "github.com/ryanuber/columnize" +) + +func main() { + output := []string{ + "Name | Gender | Age", + "Bob | Male | 38", + "Sally | Female | 26", + } + result := columnize.SimpleFormat(output) + fmt.Println(result) +} +``` + +As you can see, you just pass in a list of strings. And the result: + +``` +Name Gender Age +Bob Male 38 +Sally Female 26 +``` + +Columnize is tolerant of missing or empty fields, or even empty lines, so +passing in extra lines for spacing should show up as you would expect. + +Configuration +============= + +Columnize is configured using a `Config`, which can be obtained by calling the +`DefaultConfig()` method. You can then tweak the settings in the resulting +`Config`: + +``` +config := columnize.DefaultConfig() +config.Delim = "|" +config.Glue = " " +config.Prefix = "" +config.Empty = "" +``` + +* `Delim` is the string by which columns of **input** are delimited +* `Glue` is the string by which columns of **output** are delimited +* `Prefix` is a string by which each line of **output** is prefixed +* `Empty` is a string used to replace blank values found in output + +You can then pass the `Config` in using the `Format` method (signature below) to +have text formatted to your liking. + +Usage +===== + +```go +SimpleFormat(intput []string) string + +Format(input []string, config *Config) string +``` diff --git a/src/control/vendor/github.com/desertbit/columnize/columnize.go b/src/control/vendor/github.com/desertbit/columnize/columnize.go new file mode 100644 index 00000000000..d87785940c6 --- /dev/null +++ b/src/control/vendor/github.com/desertbit/columnize/columnize.go @@ -0,0 +1,134 @@ +package columnize + +import ( + "fmt" + "strings" +) + +type Config struct { + // The string by which the lines of input will be split. + Delim string + + // The string by which columns of output will be separated. + Glue string + + // The string by which columns of output will be prefixed. + Prefix string + + // A replacement string to replace empty fields + Empty string +} + +// Returns a Config with default values. +func DefaultConfig() *Config { + return &Config{ + Delim: "|", + Glue: " ", + Prefix: "", + } +} + +// Returns a list of elements, each representing a single item which will +// belong to a column of output. +func getElementsFromLine(config *Config, line string) []interface{} { + elements := make([]interface{}, 0) + for _, field := range strings.Split(line, config.Delim) { + value := strings.TrimSpace(field) + if value == "" && config.Empty != "" { + value = config.Empty + } + elements = append(elements, value) + } + return elements +} + +// Examines a list of strings and determines how wide each column should be +// considering all of the elements that need to be printed within it. +func getWidthsFromLines(config *Config, lines []string) []int { + var widths []int + + for _, line := range lines { + elems := getElementsFromLine(config, line) + for i := 0; i < len(elems); i++ { + l := len(elems[i].(string)) + if len(widths) <= i { + widths = append(widths, l) + } else if widths[i] < l { + widths[i] = l + } + } + } + return widths +} + +// Given a set of column widths and the number of columns in the current line, +// returns a sprintf-style format string which can be used to print output +// aligned properly with other lines using the same widths set. +func (c *Config) getStringFormat(widths []int, columns int) string { + // Start with the prefix, if any was given. + stringfmt := c.Prefix + + // Create the format string from the discovered widths + for i := 0; i < columns && i < len(widths); i++ { + if i == columns-1 { + stringfmt += "%s\n" + } else { + stringfmt += fmt.Sprintf("%%-%ds%s", widths[i], c.Glue) + } + } + return stringfmt +} + +// MergeConfig merges two config objects together and returns the resulting +// configuration. Values from the right take precedence over the left side. +func MergeConfig(a, b *Config) *Config { + var result Config = *a + + // Return quickly if either side was nil + if a == nil || b == nil { + return &result + } + + if b.Delim != "" { + result.Delim = b.Delim + } + if b.Glue != "" { + result.Glue = b.Glue + } + if b.Prefix != "" { + result.Prefix = b.Prefix + } + if b.Empty != "" { + result.Empty = b.Empty + } + + return &result +} + +// Format is the public-facing interface that takes either a plain string +// or a list of strings and returns nicely aligned output. +func Format(lines []string, config *Config) string { + var result string + + conf := MergeConfig(DefaultConfig(), config) + widths := getWidthsFromLines(conf, lines) + + // Create the formatted output using the format string + for _, line := range lines { + elems := getElementsFromLine(conf, line) + stringfmt := conf.getStringFormat(widths, len(elems)) + result += fmt.Sprintf(stringfmt, elems...) + } + + // Remove trailing newline without removing leading/trailing space + if n := len(result); n > 0 && result[n-1] == '\n' { + result = result[:n-1] + } + + return result +} + +// Convenience function for using Columnize as easy as possible. +func SimpleFormat(lines []string) string { + return Format(lines, nil) +} diff --git a/src/control/vendor/github.com/desertbit/go-shlex/.gitignore b/src/control/vendor/github.com/desertbit/go-shlex/.gitignore new file mode 100644 index 00000000000..c32a10a524b --- /dev/null +++ b/src/control/vendor/github.com/desertbit/go-shlex/.gitignore @@ -0,0 +1,3 @@ +shlex.test +.idea +.vscode diff --git a/src/control/vendor/github.com/desertbit/go-shlex/LICENSE b/src/control/vendor/github.com/desertbit/go-shlex/LICENSE new file mode 100644 index 00000000000..4a17268ac0d --- /dev/null +++ b/src/control/vendor/github.com/desertbit/go-shlex/LICENSE @@ -0,0 +1,20 @@ +Copyright (c) anmitsu + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/src/control/vendor/github.com/desertbit/go-shlex/README.md b/src/control/vendor/github.com/desertbit/go-shlex/README.md new file mode 100644 index 00000000000..33c02efff9c --- /dev/null +++ b/src/control/vendor/github.com/desertbit/go-shlex/README.md @@ -0,0 +1,38 @@ +# go-shlex + +go-shlex is a library to make a lexical analyzer like Unix shell for +Go. + +## Install + +go get -u "github.com/desertbit/go-shlex" + +## Usage + +```go +package main + +import ( + "fmt" + "log" + + "github.com/desertbit/go-shlex" +) + +func main() { + cmd := `cp -Rdp "file name" 'file name2' dir\ name` + words, err := shlex.Split(cmd, true) + if err != nil { + log.Fatal(err) + } + + for _, w := range words { + fmt.Println(w) + } +} +``` + +## Documentation + +http://godoc.org/github.com/desertbit/go-shlex + diff --git a/src/control/vendor/github.com/desertbit/go-shlex/shlex.go b/src/control/vendor/github.com/desertbit/go-shlex/shlex.go new file mode 100644 index 00000000000..30bea86c87e --- /dev/null +++ b/src/control/vendor/github.com/desertbit/go-shlex/shlex.go @@ -0,0 +1,195 @@ +// Package shlex provides a simple lexical analysis like Unix shell. +package shlex + +import ( + "bufio" + "errors" + "io" + "strings" + "unicode" +) + +var ( + ErrNoClosing = errors.New("no closing quotation") + ErrNoEscaped = errors.New("no escaped character") +) + +// Tokenizer is the interface that classifies a token according to +// words, whitespaces, quotations, escapes and escaped quotations. +type Tokenizer interface { + IsWord(rune) bool + IsWhitespace(rune) bool + IsQuote(rune) bool + IsEscape(rune) bool + IsEscapedQuote(rune) bool +} + +// DefaultTokenizer implements a simple tokenizer like Unix shell. +type DefaultTokenizer struct{} + +func (t *DefaultTokenizer) IsWord(r rune) bool { + return r == '_' || unicode.IsLetter(r) || unicode.IsNumber(r) +} +func (t *DefaultTokenizer) IsQuote(r rune) bool { + switch r { + case '\'', '"': + return true + default: + return false + } +} +func (t *DefaultTokenizer) IsWhitespace(r rune) bool { + return unicode.IsSpace(r) +} +func (t *DefaultTokenizer) IsEscape(r rune) bool { + return r == '\\' +} +func (t *DefaultTokenizer) IsEscapedQuote(r rune) bool { + return r == '"' +} + +// Lexer represents a lexical analyzer. +type Lexer struct { + reader *bufio.Reader + tokenizer Tokenizer + posix bool + whitespaceSplit bool +} + +// NewLexer creates a new Lexer reading from io.Reader. This Lexer +// has a DefaultTokenizer according to posix and whitespaceSplit +// rules. +func NewLexer(r io.Reader, posix, whitespaceSplit bool) *Lexer { + return &Lexer{ + reader: bufio.NewReader(r), + tokenizer: &DefaultTokenizer{}, + posix: posix, + whitespaceSplit: whitespaceSplit, + } +} + +// NewLexerString creates a new Lexer reading from a string. This +// Lexer has a DefaultTokenizer according to posix and whitespaceSplit +// rules. +func NewLexerString(s string, posix, whitespaceSplit bool) *Lexer { + return NewLexer(strings.NewReader(s), posix, whitespaceSplit) +} + +// Split splits a string according to posix or non-posix rules. +func Split(s string, posix bool) ([]string, error) { + return NewLexerString(s, posix, true).Split() +} + +// SetTokenizer sets a Tokenizer. +func (l *Lexer) SetTokenizer(t Tokenizer) { + l.tokenizer = t +} + +func (l *Lexer) Split() ([]string, error) { + result := make([]string, 0) + for { + token, err := l.readToken() + if token != nil { + result = append(result, string(token)) + } + + if err == io.EOF { + break + } else if err != nil { + return result, err + } + } + return result, nil +} + +func (l *Lexer) readToken() (token []rune, err error) { + t := l.tokenizer + quoted := false + state := ' ' + escapedState := ' ' +scanning: + for { + next, _, err := l.reader.ReadRune() + if err != nil { + if t.IsQuote(state) { + return token, ErrNoClosing + } else if t.IsEscape(state) { + return token, ErrNoEscaped + } + return token, err + } + + switch { + case t.IsWhitespace(state): + switch { + case t.IsWhitespace(next): + break scanning + case l.posix && t.IsEscape(next): + escapedState = 'a' + state = next + case t.IsWord(next): + token = append(token, next) + state = 'a' + case t.IsQuote(next): + if !l.posix { + token = append(token, next) + } + state = next + default: + token = []rune{next} + if l.whitespaceSplit { + state = 'a' + } else if token != nil || (l.posix && quoted) { + break scanning + } + } + case t.IsQuote(state): + quoted = true + switch { + case next == state: + if !l.posix { + token = append(token, next) + break scanning + } else { + if token == nil { + token = []rune{} + } + state = 'a' + } + case l.posix && t.IsEscape(next) && t.IsEscapedQuote(state): + escapedState = state + state = next + default: + token = append(token, next) + } + case t.IsEscape(state): + if t.IsQuote(escapedState) && next != state && next != escapedState { + token = append(token, state) + } + token = append(token, next) + state = escapedState + case t.IsWord(state): + switch { + case t.IsWhitespace(next): + if token != nil || (l.posix && quoted) { + break scanning + } + case l.posix && t.IsQuote(next): + state = next + case l.posix && t.IsEscape(next): + escapedState = 'a' + state = next + case t.IsWord(next) || t.IsQuote(next): + token = append(token, next) + default: + if l.whitespaceSplit { + token = append(token, next) + } else if token != nil { + l.reader.UnreadRune() + break scanning + } + } + } + } + return token, nil +} diff --git a/src/control/vendor/github.com/desertbit/grumble/.gitignore b/src/control/vendor/github.com/desertbit/grumble/.gitignore new file mode 100644 index 00000000000..20f2ad4aa40 --- /dev/null +++ b/src/control/vendor/github.com/desertbit/grumble/.gitignore @@ -0,0 +1,5 @@ +*~ +/sample/full/full +/sample/simple/simple +.idea +.vscode diff --git a/src/control/vendor/github.com/desertbit/grumble/AUTHORS b/src/control/vendor/github.com/desertbit/grumble/AUTHORS new file mode 100644 index 00000000000..0b768ebddda --- /dev/null +++ b/src/control/vendor/github.com/desertbit/grumble/AUTHORS @@ -0,0 +1 @@ +Roland Singer diff --git a/src/control/vendor/github.com/desertbit/grumble/LICENSE b/src/control/vendor/github.com/desertbit/grumble/LICENSE new file mode 100644 index 00000000000..2b0bbdb7576 --- /dev/null +++ b/src/control/vendor/github.com/desertbit/grumble/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2018 Roland Singer [roland.singer@deserbit.com] + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/src/control/vendor/github.com/desertbit/grumble/README.md b/src/control/vendor/github.com/desertbit/grumble/README.md new file mode 100644 index 00000000000..77c4ac69900 --- /dev/null +++ b/src/control/vendor/github.com/desertbit/grumble/README.md @@ -0,0 +1,101 @@ +# Grumble - A powerful modern CLI and SHELL + +[![GoDoc](https://godoc.org/github.com/desertbit/grumble?status.svg)](https://godoc.org/github.com/desertbit/grumble) +[![Go Report Card](https://goreportcard.com/badge/github.com/desertbit/grumble)](https://goreportcard.com/report/github.com/desertbit/grumble) + +There are a handful of powerful go CLI libraries available ([spf13/cobra](https://github.com/spf13/cobra), [urfave/cli](https://github.com/urfave/cli)). +However sometimes an integrated shell interface is a great and useful extension for the actual application. +This library offers a simple API to create powerful CLI applications and automatically starts +an **integrated interactive shell**, if the application is started without any command arguments. + +**Hint:** We do not guarantee 100% backwards compatiblity between minor versions (1.x). However, the API is mostly stable and should not change much. + +[![asciicast](https://asciinema.org/a/155332.png)](https://asciinema.org/a/155332?t=5) + +## Introduction + +Create a grumble APP. + +```go +var app = grumble.New(&grumble.Config{ + Name: "app", + Description: "short app description", + + Flags: func(f *grumble.Flags) { + f.String("d", "directory", "DEFAULT", "set an alternative directory path") + f.Bool("v", "verbose", false, "enable verbose mode") + }, +}) +``` + +Register a top-level command. *Note: Sub commands are also supported...* + +```go +app.AddCommand(&grumble.Command{ + Name: "daemon", + Help: "run the daemon", + Aliases: []string{"run"}, + + Flags: func(f *grumble.Flags) { + f.Duration("t", "timeout", time.Second, "timeout duration") + }, + + Args: func(a *grumble.Args) { + a.String("service", "which service to start", grumble.Default("server")) + }, + + Run: func(c *grumble.Context) error { + // Parent Flags. + c.App.Println("directory:", c.Flags.String("directory")) + c.App.Println("verbose:", c.Flags.Bool("verbose")) + // Flags. + c.App.Println("timeout:", c.Flags.Duration("timeout")) + // Args. + c.App.Println("service:", c.Args.String("service")) + return nil + }, +}) +``` + +Run the application. + +```go +err := app.Run() +``` + +Or use the builtin *grumble.Main* function to handle errors automatically. + +```go +func main() { + grumble.Main(app) +} +``` + +## Shell Multiline Input + +Builtin support for multiple lines. + +``` +>>> This is \ +... a multi line \ +... command +``` + +## Samples + +Check out the [sample directory](/sample) for some detailed examples. + +The [grml project](https://github.com/desertbit/grml) uses grumble. + +## Additional Useful Packages + +- https://github.com/AlecAivazis/survey +- https://github.com/tj/go-spin + +## Credits + +This project is based on ideas from the great [ishell](https://github.com/abiosoft/ishell) library. + +## License + +MIT diff --git a/src/control/vendor/github.com/desertbit/grumble/app.go b/src/control/vendor/github.com/desertbit/grumble/app.go new file mode 100644 index 00000000000..ffb5155b9f0 --- /dev/null +++ b/src/control/vendor/github.com/desertbit/grumble/app.go @@ -0,0 +1,483 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2018 Roland Singer [roland.singer@deserbit.com] + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package grumble + +import ( + "fmt" + "io" + "os" + "strings" + + "github.com/desertbit/closer/v3" + shlex "github.com/desertbit/go-shlex" + "github.com/desertbit/readline" + "github.com/fatih/color" +) + +// App is the entrypoint. +type App struct { + closer.Closer + + rl *readline.Instance + config *Config + commands Commands + isShell bool + currentPrompt string + + flags Flags + flagMap FlagMap + + args Args + + initHook func(a *App, flags FlagMap) error + shellHook func(a *App) error + + printHelp func(a *App, shell bool) + printCommandHelp func(a *App, cmd *Command, shell bool) + interruptHandler func(a *App, count int) + printASCIILogo func(a *App) +} + +// New creates a new app. +// Panics if the config is invalid. +func New(c *Config) (a *App) { + // Prepare the config. + c.SetDefaults() + err := c.Validate() + if err != nil { + panic(err) + } + + // APP. + a = &App{ + Closer: closer.New(), + config: c, + currentPrompt: c.prompt(), + flagMap: make(FlagMap), + printHelp: defaultPrintHelp, + printCommandHelp: defaultPrintCommandHelp, + interruptHandler: defaultInterruptHandler, + } + + // Register the builtin flags. + a.flags.Bool("h", "help", false, "display help") + a.flags.BoolL("nocolor", false, "disable color output") + + // Register the user flags, if present. + if c.Flags != nil { + c.Flags(&a.flags) + } + + return +} + +// SetPrompt sets a new prompt. +func (a *App) SetPrompt(p string) { + if !a.config.NoColor { + p = a.config.PromptColor.Sprint(p) + } + a.currentPrompt = p +} + +// SetDefaultPrompt resets the current prompt to the default prompt as +// configured in the config. +func (a *App) SetDefaultPrompt() { + a.currentPrompt = a.config.prompt() +} + +// IsShell indicates, if this is a shell session. +func (a *App) IsShell() bool { + return a.isShell +} + +// Config returns the app's config value. +func (a *App) Config() *Config { + return a.config +} + +// Commands returns the app's commands. +// Access is not thread-safe. Only access during command execution. +func (a *App) Commands() *Commands { + return &a.commands +} + +// PrintError prints the given error. +func (a *App) PrintError(err error) { + if a.config.NoColor { + a.Printf("error: %v\n", err) + } else { + a.config.ErrorColor.Fprint(a, "error: ") + a.Printf("%v\n", err) + } +} + +// Print writes to terminal output. +// Print writes to standard output if terminal output is not yet active. +func (a *App) Print(args ...interface{}) (int, error) { + return fmt.Fprint(a, args...) +} + +// Printf formats according to a format specifier and writes to terminal output. +// Printf writes to standard output if terminal output is not yet active. +func (a *App) Printf(format string, args ...interface{}) (int, error) { + return fmt.Fprintf(a, format, args...) +} + +// Println writes to terminal output followed by a newline. +// Println writes to standard output if terminal output is not yet active. +func (a *App) Println(args ...interface{}) (int, error) { + return fmt.Fprintln(a, args...) +} + +// OnInit sets the function which will be executed before the first command +// is executed. App flags can be handled here. +func (a *App) OnInit(f func(a *App, flags FlagMap) error) { + a.initHook = f +} + +// OnShell sets the function which will be executed before the shell starts. +func (a *App) OnShell(f func(a *App) error) { + a.shellHook = f +} + +// SetInterruptHandler sets the interrupt handler function. +func (a *App) SetInterruptHandler(f func(a *App, count int)) { + a.interruptHandler = f +} + +// SetPrintHelp sets the print help function. +func (a *App) SetPrintHelp(f func(a *App, shell bool)) { + a.printHelp = f +} + +// SetPrintCommandHelp sets the print help function for a single command. +func (a *App) SetPrintCommandHelp(f func(a *App, c *Command, shell bool)) { + a.printCommandHelp = f +} + +// SetPrintASCIILogo sets the function to print the ASCII logo. +func (a *App) SetPrintASCIILogo(f func(a *App)) { + a.printASCIILogo = func(a *App) { + if !a.config.NoColor { + a.config.ASCIILogoColor.Set() + defer color.Unset() + } + f(a) + } +} + +// Write to the underlying output, using readline if available. +func (a *App) Write(p []byte) (int, error) { + return a.Stdout().Write(p) +} + +// Stdout returns a writer to Stdout, using readline if available. +// Note that calling before Run() will return a different instance. +func (a *App) Stdout() io.Writer { + if a.rl != nil { + return a.rl.Stdout() + } + return os.Stdout +} + +// Stderr returns a writer to Stderr, using readline if available. +// Note that calling before Run() will return a different instance. +func (a *App) Stderr() io.Writer { + if a.rl != nil { + return a.rl.Stderr() + } + return os.Stderr +} + +// AddCommand adds a new command. +// Panics on error. +func (a *App) AddCommand(cmd *Command) { + a.addCommand(cmd, true) +} + +// addCommand adds a new command. +// If addHelpFlag is true, a help flag is automatically +// added to the command which displays its usage on use. +// Panics on error. +func (a *App) addCommand(cmd *Command, addHelpFlag bool) { + err := cmd.validate() + if err != nil { + panic(err) + } + cmd.registerFlagsAndArgs(addHelpFlag) + + a.commands.Add(cmd) +} + +// RunCommand runs a single command. +func (a *App) RunCommand(args []string) error { + // Parse the arguments string and obtain the command path to the root, + // and the command flags. + cmds, fg, args, err := a.commands.parse(args, a.flagMap, false) + if err != nil { + return err + } else if len(cmds) == 0 { + return fmt.Errorf("unknown command, try 'help'") + } + + // The last command is the final command. + cmd := cmds[len(cmds)-1] + + // Print the command help if the command run function is nil or if the help flag is set. + if fg.Bool("help") || cmd.Run == nil { + a.printCommandHelp(a, cmd, a.isShell) + return nil + } + + // Parse the arguments. + cmdArgMap := make(ArgMap) + args, err = cmd.args.parse(args, cmdArgMap) + if err != nil { + return err + } + + // Check, if values from the argument string are not consumed (and therefore invalid). + if len(args) > 0 { + return fmt.Errorf("invalid usage of command '%s' (unconsumed input '%s'), try 'help'", cmd.Name, strings.Join(args, " ")) + } + + // Create the context and pass the rest args. + ctx := newContext(a, cmd, fg, cmdArgMap) + + // Run the command. + err = cmd.Run(ctx) + if err != nil { + return err + } + + return nil +} + +// Run the application and parse the command line arguments. +// This method blocks. +func (a *App) Run() (err error) { + defer a.Close() + + // Sort all commands by their name. + a.commands.SortRecursive() + + // Remove the program name from the args. + args := os.Args + if len(args) > 0 { + args = args[1:] + } + + // Parse the app command line flags. + args, err = a.flags.parse(args, a.flagMap) + if err != nil { + return err + } + + // Check if nocolor was set. + a.config.NoColor = a.flagMap.Bool("nocolor") + + // Determine if this is a shell session. + a.isShell = len(args) == 0 + + // Add general builtin commands. + a.addCommand(&Command{ + Name: "help", + Help: "use 'help [command]' for command help", + Args: func(a *Args) { + a.StringList("command", "the name of the command") + }, + Run: func(c *Context) error { + args := c.Args.StringList("command") + if len(args) == 0 { + a.printHelp(a, a.isShell) + return nil + } + cmd, _, err := a.commands.FindCommand(args) + if err != nil { + return err + } else if cmd == nil { + a.PrintError(fmt.Errorf("command not found")) + return nil + } + a.printCommandHelp(a, cmd, a.isShell) + return nil + }, + isBuiltin: true, + }, false) + + // Check if help should be displayed. + if a.flagMap.Bool("help") { + a.printHelp(a, false) + return nil + } + + // Add shell builtin commands. + // Ensure to add all commands before running the init hook. + // If the init hook does something with the app commands, then these should also be included. + if a.isShell { + a.AddCommand(&Command{ + Name: "exit", + Help: "exit the shell", + Run: func(c *Context) error { + c.Stop() + return nil + }, + isBuiltin: true, + }) + a.AddCommand(&Command{ + Name: "clear", + Help: "clear the screen", + Run: func(c *Context) error { + readline.ClearScreen(a.rl) + return nil + }, + isBuiltin: true, + }) + } + + // Run the init hook. + if a.initHook != nil { + err = a.initHook(a, a.flagMap) + if err != nil { + return err + } + } + + // Check if a command chould be executed in non-interactive mode. + if !a.isShell { + return a.RunCommand(args) + } + + // Create the readline instance. + a.rl, err = readline.NewEx(&readline.Config{ + Prompt: a.currentPrompt, + HistorySearchFold: true, // enable case-insensitive history searching + DisableAutoSaveHistory: true, + HistoryFile: a.config.HistoryFile, + HistoryLimit: a.config.HistoryLimit, + AutoComplete: newCompleter(&a.commands), + }) + if err != nil { + return err + } + a.OnClose(a.rl.Close) + + // Run the shell hook. + if a.shellHook != nil { + err = a.shellHook(a) + if err != nil { + return err + } + } + + // Print the ASCII logo. + if a.printASCIILogo != nil { + a.printASCIILogo(a) + } + + // Run the shell. + return a.runShell() +} + +func (a *App) runShell() error { + var interruptCount int + var lines []string + multiActive := false + +Loop: + for !a.IsClosing() { + // Set the prompt. + if multiActive { + a.rl.SetPrompt(a.config.multiPrompt()) + } else { + a.rl.SetPrompt(a.currentPrompt) + } + multiActive = false + + // Readline. + line, err := a.rl.Readline() + if err != nil { + if err == readline.ErrInterrupt { + interruptCount++ + a.interruptHandler(a, interruptCount) + continue Loop + } else if err == io.EOF { + return nil + } else { + return err + } + } + + // Reset the interrupt count. + interruptCount = 0 + + // Handle multiline input. + if strings.HasSuffix(line, "\\") { + multiActive = true + line = strings.TrimSpace(line[:len(line)-1]) // Add without suffix and trim spaces. + lines = append(lines, line) + continue Loop + } + lines = append(lines, strings.TrimSpace(line)) + + line = strings.Join(lines, " ") + line = strings.TrimSpace(line) + lines = lines[:0] + + // Skip if the line is empty. + if len(line) == 0 { + continue Loop + } + + // Save command history. + err = a.rl.SaveHistory(line) + if err != nil { + a.PrintError(err) + continue Loop + } + + // Split the line to args. + args, err := shlex.Split(line, true) + if err != nil { + a.PrintError(fmt.Errorf("invalid args: %v", err)) + continue Loop + } + + // Execute the command. + err = a.RunCommand(args) + if err != nil { + a.PrintError(err) + // Do not continue the Loop here. We want to handle command changes below. + } + + // Sort the commands again if they have changed (Add or remove action). + if a.commands.hasChanged() { + a.commands.SortRecursive() + a.commands.unsetChanged() + } + } + + return nil +} diff --git a/src/control/vendor/github.com/desertbit/grumble/argmap.go b/src/control/vendor/github.com/desertbit/grumble/argmap.go new file mode 100644 index 00000000000..7a103f51715 --- /dev/null +++ b/src/control/vendor/github.com/desertbit/grumble/argmap.go @@ -0,0 +1,288 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2018 Roland Singer [roland.singer@deserbit.com] + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package grumble + +import ( + "fmt" + "time" +) + +// ArgMapItem holds the specific arg data. +type ArgMapItem struct { + Value interface{} + IsDefault bool +} + +// ArgMap holds all the parsed arg values. +type ArgMap map[string]*ArgMapItem + +// String returns the given arg value as string. +// Panics if not present. Args must be registered. +func (a ArgMap) String(name string) string { + i := a[name] + if i == nil { + panic(fmt.Errorf("missing argument value: arg '%s' not registered", name)) + } + s, ok := i.Value.(string) + if !ok { + panic(fmt.Errorf("failed to assert argument '%s' to string", name)) + } + return s +} + +// StringList returns the given arg value as string slice. +// Panics if not present. Args must be registered. +// If optional and not provided, nil is returned. +func (a ArgMap) StringList(long string) []string { + i := a[long] + if i == nil { + panic(fmt.Errorf("missing arg value: arg '%s' not registered", long)) + } + if i.Value == nil { + return nil + } + s, ok := i.Value.([]string) + if !ok { + panic(fmt.Errorf("failed to assert arg '%s' to string list", long)) + } + return s +} + +// Bool returns the given arg value as bool. +// Panics if not present. Args must be registered. +func (a ArgMap) Bool(long string) bool { + i := a[long] + if i == nil { + panic(fmt.Errorf("missing arg value: arg '%s' not registered", long)) + } + b, ok := i.Value.(bool) + if !ok { + panic(fmt.Errorf("failed to assert arg '%s' to bool", long)) + } + return b +} + +// BoolList returns the given arg value as bool slice. +// Panics if not present. Args must be registered. +func (a ArgMap) BoolList(long string) []bool { + i := a[long] + if i == nil { + panic(fmt.Errorf("missing arg value: arg '%s' not registered", long)) + } + if i.Value == nil { + return nil + } + b, ok := i.Value.([]bool) + if !ok { + panic(fmt.Errorf("failed to assert arg '%s' to bool list", long)) + } + return b +} + +// Int returns the given arg value as int. +// Panics if not present. Args must be registered. +func (a ArgMap) Int(long string) int { + i := a[long] + if i == nil { + panic(fmt.Errorf("missing arg value: arg '%s' not registered", long)) + } + v, ok := i.Value.(int) + if !ok { + panic(fmt.Errorf("failed to assert arg '%s' to int", long)) + } + return v +} + +// IntList returns the given arg value as int slice. +// Panics if not present. Args must be registered. +func (a ArgMap) IntList(long string) []int { + i := a[long] + if i == nil { + panic(fmt.Errorf("missing arg value: arg '%s' not registered", long)) + } + if i.Value == nil { + return nil + } + v, ok := i.Value.([]int) + if !ok { + panic(fmt.Errorf("failed to assert arg '%s' to int list", long)) + } + return v +} + +// Int64 returns the given arg value as int64. +// Panics if not present. Args must be registered. +func (a ArgMap) Int64(long string) int64 { + i := a[long] + if i == nil { + panic(fmt.Errorf("missing arg value: arg '%s' not registered", long)) + } + v, ok := i.Value.(int64) + if !ok { + panic(fmt.Errorf("failed to assert arg '%s' to int64", long)) + } + return v +} + +// Int64List returns the given arg value as int64. +// Panics if not present. Args must be registered. +func (a ArgMap) Int64List(long string) []int64 { + i := a[long] + if i == nil { + panic(fmt.Errorf("missing arg value: arg '%s' not registered", long)) + } + if i.Value == nil { + return nil + } + v, ok := i.Value.([]int64) + if !ok { + panic(fmt.Errorf("failed to assert arg '%s' to int64 list", long)) + } + return v +} + +// Uint returns the given arg value as uint. +// Panics if not present. Args must be registered. +func (a ArgMap) Uint(long string) uint { + i := a[long] + if i == nil { + panic(fmt.Errorf("missing arg value: arg '%s' not registered", long)) + } + v, ok := i.Value.(uint) + if !ok { + panic(fmt.Errorf("failed to assert arg '%s' to uint", long)) + } + return v +} + +// UintList returns the given arg value as uint. +// Panics if not present. Args must be registered. +func (a ArgMap) UintList(long string) []uint { + i := a[long] + if i == nil { + panic(fmt.Errorf("missing arg value: arg '%s' not registered", long)) + } + if i.Value == nil { + return nil + } + v, ok := i.Value.([]uint) + if !ok { + panic(fmt.Errorf("failed to assert arg '%s' to uint list", long)) + } + return v +} + +// Uint64 returns the given arg value as uint64. +// Panics if not present. Args must be registered. +func (a ArgMap) Uint64(long string) uint64 { + i := a[long] + if i == nil { + panic(fmt.Errorf("missing arg value: arg '%s' not registered", long)) + } + v, ok := i.Value.(uint64) + if !ok { + panic(fmt.Errorf("failed to assert arg '%s' to uint64", long)) + } + return v +} + +// Uint64List returns the given arg value as uint64. +// Panics if not present. Args must be registered. +func (a ArgMap) Uint64List(long string) []uint64 { + i := a[long] + if i == nil { + panic(fmt.Errorf("missing arg value: arg '%s' not registered", long)) + } + if i.Value == nil { + return nil + } + v, ok := i.Value.([]uint64) + if !ok { + panic(fmt.Errorf("failed to assert arg '%s' to uint64 list", long)) + } + return v +} + +// Float64 returns the given arg value as float64. +// Panics if not present. Args must be registered. +func (a ArgMap) Float64(long string) float64 { + i := a[long] + if i == nil { + panic(fmt.Errorf("missing arg value: arg '%s' not registered", long)) + } + v, ok := i.Value.(float64) + if !ok { + panic(fmt.Errorf("failed to assert arg '%s' to float64", long)) + } + return v +} + +// Float64List returns the given arg value as float64. +// Panics if not present. Args must be registered. +func (a ArgMap) Float64List(long string) []float64 { + i := a[long] + if i == nil { + panic(fmt.Errorf("missing arg value: arg '%s' not registered", long)) + } + if i.Value == nil { + return nil + } + v, ok := i.Value.([]float64) + if !ok { + panic(fmt.Errorf("failed to assert arg '%s' to float64 list", long)) + } + return v +} + +// Duration returns the given arg value as duration. +// Panics if not present. Args must be registered. +func (a ArgMap) Duration(long string) time.Duration { + i := a[long] + if i == nil { + panic(fmt.Errorf("missing arg value: arg '%s' not registered", long)) + } + v, ok := i.Value.(time.Duration) + if !ok { + panic(fmt.Errorf("failed to assert arg '%s' to duration", long)) + } + return v +} + +// DurationList returns the given arg value as duration. +// Panics if not present. Args must be registered. +func (a ArgMap) DurationList(long string) []time.Duration { + i := a[long] + if i == nil { + panic(fmt.Errorf("missing arg value: arg '%s' not registered", long)) + } + if i.Value == nil { + return nil + } + v, ok := i.Value.([]time.Duration) + if !ok { + panic(fmt.Errorf("failed to assert arg '%s' to duration list", long)) + } + return v +} diff --git a/src/control/vendor/github.com/desertbit/grumble/argopt.go b/src/control/vendor/github.com/desertbit/grumble/argopt.go new file mode 100644 index 00000000000..022c5ac19d0 --- /dev/null +++ b/src/control/vendor/github.com/desertbit/grumble/argopt.go @@ -0,0 +1,71 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2018 Roland Singer [roland.singer@deserbit.com] + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package grumble + +// ArgOption can be supplied to modify an argument. +type ArgOption func(*argItem) + +// Min sets the minimum required number of elements for a list argument. +func Min(m int) ArgOption { + if m < 0 { + panic("min must be >= 0") + } + + return func(i *argItem) { + if !i.isList { + panic("min option only valid for list arguments") + } + + i.listMin = m + } +} + +// Max sets the maximum required number of elements for a list argument. +func Max(m int) ArgOption { + if m < 1 { + panic("max must be >= 1") + } + + return func(i *argItem) { + if !i.isList { + panic("max option only valid for list arguments") + } + + i.listMax = m + } +} + +// Default sets a default value for the argument. +// The argument becomes optional then. +func Default(v interface{}) ArgOption { + if v == nil { + panic("nil default value not allowed") + } + + return func(i *argItem) { + i.Default = v + i.optional = true + } +} diff --git a/src/control/vendor/github.com/desertbit/grumble/args.go b/src/control/vendor/github.com/desertbit/grumble/args.go new file mode 100644 index 00000000000..06320c66a95 --- /dev/null +++ b/src/control/vendor/github.com/desertbit/grumble/args.go @@ -0,0 +1,446 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2018 Roland Singer [roland.singer@deserbit.com] + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package grumble + +import ( + "fmt" + "strconv" + "time" +) + +// The parseArgFunc describes a func that parses from the given command line arguments +// the values for its argument and saves them to the ArgMap. +// It returns the not-consumed arguments and an error. +type parseArgFunc func(args []string, res ArgMap) ([]string, error) + +type argItem struct { + Name string + Help string + HelpArgs string + Default interface{} + + parser parseArgFunc + isList bool + optional bool + listMin int + listMax int +} + +// Args holds all the registered args. +type Args struct { + list []*argItem +} + +func (a *Args) register( + name, help, helpArgs string, + isList bool, + pf parseArgFunc, + opts ...ArgOption, +) { + // Validate. + if name == "" { + panic("empty argument name") + } else if help == "" { + panic(fmt.Errorf("missing help message for argument '%s'", name)) + } + + // Ensure the name is unique. + for _, ai := range a.list { + if ai.Name == name { + panic(fmt.Errorf("argument '%s' registered twice", name)) + } + } + + // Create the item. + item := &argItem{ + Name: name, + Help: help, + HelpArgs: helpArgs, + parser: pf, + isList: isList, + optional: isList, + listMin: -1, + listMax: -1, + } + + // Apply options. + // Afterwards, we can make some final checks. + for _, opt := range opts { + opt(item) + } + + if item.isList && item.listMax > 0 && item.listMax < item.listMin { + panic("max must not be less than min for list arguments") + } + + if !a.empty() { + last := a.list[len(a.list)-1] + + // Check, if a list argument has been supplied already. + if last.isList { + panic("list argument has been registered, nothing can come after it") + } + + // Check, that after an optional argument no mandatory one follows. + if !item.optional && last.optional { + panic("mandatory argument not allowed after optional one") + } + } + + a.list = append(a.list, item) +} + +// empty returns true, if the args are empty. +func (a *Args) empty() bool { + return len(a.list) == 0 +} + +func (a *Args) parse(args []string, res ArgMap) ([]string, error) { + // Iterate over all arguments that have been registered. + // There must be either a default value or a value available, + // otherwise the argument is missing. + var err error + for _, item := range a.list { + // If it is a list argument, it will consume the rest of the input. + // Check that it matches its range. + if item.isList { + if len(args) < item.listMin { + return nil, fmt.Errorf("argument '%s' requires at least %d element(s)", item.Name, item.listMin) + } + if item.listMax > 0 && len(args) > item.listMax { + return nil, fmt.Errorf("argument '%s' requires at most %d element(s)", item.Name, item.listMax) + } + } + + // If no arguments are left, simply set the default values. + if len(args) == 0 { + // Check, if the argument is mandatory. + if !item.optional { + return nil, fmt.Errorf("missing argument '%s'", item.Name) + } + + // Register its default value. + res[item.Name] = &ArgMapItem{Value: item.Default, IsDefault: true} + continue + } + + args, err = item.parser(args, res) + if err != nil { + return nil, err + } + } + + return args, nil +} + +// String registers a string argument. +func (a *Args) String(name, help string, opts ...ArgOption) { + a.register(name, help, "string", false, + func(args []string, res ArgMap) ([]string, error) { + res[name] = &ArgMapItem{Value: args[0]} + return args[1:], nil + }, + opts..., + ) +} + +// StringList registers a string list argument. +func (a *Args) StringList(name, help string, opts ...ArgOption) { + a.register(name, help, "string list", true, + func(args []string, res ArgMap) ([]string, error) { + res[name] = &ArgMapItem{Value: args} + return []string{}, nil + }, + opts..., + ) +} + +// Bool registers a bool argument. +func (a *Args) Bool(name, help string, opts ...ArgOption) { + a.register(name, help, "bool", false, + func(args []string, res ArgMap) ([]string, error) { + b, err := strconv.ParseBool(args[0]) + if err != nil { + return nil, fmt.Errorf("invalid bool value '%s' for argument: %s", args[0], name) + } + + res[name] = &ArgMapItem{Value: b} + return args[1:], nil + }, + opts..., + ) +} + +// BoolList registers a bool list argument. +func (a *Args) BoolList(name, help string, opts ...ArgOption) { + a.register(name, help, "bool list", true, + func(args []string, res ArgMap) ([]string, error) { + var ( + err error + bs = make([]bool, len(args)) + ) + for i, a := range args { + bs[i], err = strconv.ParseBool(a) + if err != nil { + return nil, fmt.Errorf("invalid bool value '%s' for argument: %s", a, name) + } + } + + res[name] = &ArgMapItem{Value: bs} + return []string{}, nil + }, + opts..., + ) +} + +// Int registers an int argument. +func (a *Args) Int(name, help string, opts ...ArgOption) { + a.register(name, help, "int", false, + func(args []string, res ArgMap) ([]string, error) { + i, err := strconv.Atoi(args[0]) + if err != nil { + return nil, fmt.Errorf("invalid int value '%s' for argument: %s", args[0], name) + } + + res[name] = &ArgMapItem{Value: i} + return args[1:], nil + }, + opts..., + ) +} + +// IntList registers an int list argument. +func (a *Args) IntList(name, help string, opts ...ArgOption) { + a.register(name, help, "int list", true, + func(args []string, res ArgMap) ([]string, error) { + var ( + err error + is = make([]int, len(args)) + ) + for i, a := range args { + is[i], err = strconv.Atoi(a) + if err != nil { + return nil, fmt.Errorf("invalid int value '%s' for argument: %s", a, name) + } + } + + res[name] = &ArgMapItem{Value: is} + return []string{}, nil + }, + opts..., + ) +} + +// Int64 registers an int64 argument. +func (a *Args) Int64(name, help string, opts ...ArgOption) { + a.register(name, help, "int64", false, + func(args []string, res ArgMap) ([]string, error) { + i, err := strconv.ParseInt(args[0], 10, 64) + if err != nil { + return nil, fmt.Errorf("invalid int64 value '%s' for argument: %s", args[0], name) + } + + res[name] = &ArgMapItem{Value: i} + return args[1:], nil + }, + opts..., + ) +} + +// Int64List registers an int64 list argument. +func (a *Args) Int64List(name, help string, opts ...ArgOption) { + a.register(name, help, "int64 list", true, + func(args []string, res ArgMap) ([]string, error) { + var ( + err error + is = make([]int64, len(args)) + ) + for i, a := range args { + is[i], err = strconv.ParseInt(a, 10, 64) + if err != nil { + return nil, fmt.Errorf("invalid int64 value '%s' for argument: %s", a, name) + } + } + + res[name] = &ArgMapItem{Value: is} + return []string{}, nil + }, + opts..., + ) +} + +// Uint registers an uint argument. +func (a *Args) Uint(name, help string, opts ...ArgOption) { + a.register(name, help, "uint", false, + func(args []string, res ArgMap) ([]string, error) { + u, err := strconv.ParseUint(args[0], 10, 64) + if err != nil { + return nil, fmt.Errorf("invalid uint value '%s' for argument: %s", args[0], name) + } + + res[name] = &ArgMapItem{Value: uint(u)} + return args[1:], nil + }, + opts..., + ) +} + +// UintList registers an uint list argument. +func (a *Args) UintList(name, help string, opts ...ArgOption) { + a.register(name, help, "uint list", true, + func(args []string, res ArgMap) ([]string, error) { + var ( + err error + u uint64 + is = make([]uint, len(args)) + ) + for i, a := range args { + u, err = strconv.ParseUint(a, 10, 64) + if err != nil { + return nil, fmt.Errorf("invalid uint value '%s' for argument: %s", a, name) + } + is[i] = uint(u) + } + + res[name] = &ArgMapItem{Value: is} + return []string{}, nil + }, + opts..., + ) +} + +// Uint64 registers an uint64 argument. +func (a *Args) Uint64(name, help string, opts ...ArgOption) { + a.register(name, help, "uint64", false, + func(args []string, res ArgMap) ([]string, error) { + u, err := strconv.ParseUint(args[0], 10, 64) + if err != nil { + return nil, fmt.Errorf("invalid uint64 value '%s' for argument: %s", args[0], name) + } + + res[name] = &ArgMapItem{Value: u} + return args[1:], nil + }, + opts..., + ) +} + +// Uint64List registers an uint64 list argument. +func (a *Args) Uint64List(name, help string, opts ...ArgOption) { + a.register(name, help, "uint64 list", true, + func(args []string, res ArgMap) ([]string, error) { + var ( + err error + us = make([]uint64, len(args)) + ) + for i, a := range args { + us[i], err = strconv.ParseUint(a, 10, 64) + if err != nil { + return nil, fmt.Errorf("invalid uint64 value '%s' for argument: %s", a, name) + } + } + + res[name] = &ArgMapItem{Value: us} + return []string{}, nil + }, + opts..., + ) +} + +// Float64 registers a float64 argument. +func (a *Args) Float64(name, help string, opts ...ArgOption) { + a.register(name, help, "float64", false, + func(args []string, res ArgMap) ([]string, error) { + f, err := strconv.ParseFloat(args[0], 64) + if err != nil { + return nil, fmt.Errorf("invalid float64 value '%s' for argument: %s", args[0], name) + } + + res[name] = &ArgMapItem{Value: f} + return args[1:], nil + }, + opts..., + ) +} + +// Float64List registers an float64 list argument. +func (a *Args) Float64List(name, help string, opts ...ArgOption) { + a.register(name, help, "float64 list", true, + func(args []string, res ArgMap) ([]string, error) { + var ( + err error + fs = make([]float64, len(args)) + ) + for i, a := range args { + fs[i], err = strconv.ParseFloat(a, 64) + if err != nil { + return nil, fmt.Errorf("invalid float64 value '%s' for argument: %s", a, name) + } + } + + res[name] = &ArgMapItem{Value: fs} + return []string{}, nil + }, + opts..., + ) +} + +// Duration registers a duration argument. +func (a *Args) Duration(name, help string, opts ...ArgOption) { + a.register(name, help, "duration", false, + func(args []string, res ArgMap) ([]string, error) { + d, err := time.ParseDuration(args[0]) + if err != nil { + return nil, fmt.Errorf("invalid duration value '%s' for argument: %s", args[0], name) + } + + res[name] = &ArgMapItem{Value: d} + return args[1:], nil + }, + opts..., + ) +} + +// DurationList registers an duration list argument. +func (a *Args) DurationList(name, help string, opts ...ArgOption) { + a.register(name, help, "duration list", true, + func(args []string, res ArgMap) ([]string, error) { + var ( + err error + ds = make([]time.Duration, len(args)) + ) + for i, a := range args { + ds[i], err = time.ParseDuration(a) + if err != nil { + return nil, fmt.Errorf("invalid duration value '%s' for argument: %s", a, name) + } + } + + res[name] = &ArgMapItem{Value: ds} + return []string{}, nil + }, + opts..., + ) +} diff --git a/src/control/vendor/github.com/desertbit/grumble/command.go b/src/control/vendor/github.com/desertbit/grumble/command.go new file mode 100644 index 00000000000..b4d3de2dd62 --- /dev/null +++ b/src/control/vendor/github.com/desertbit/grumble/command.go @@ -0,0 +1,119 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2018 Roland Singer [roland.singer@deserbit.com] + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package grumble + +import ( + "fmt" +) + +// Command is just that, a command for your application. +type Command struct { + // Command name. + // This field is required. + Name string + + // Command name aliases. + Aliases []string + + // One liner help message for the command. + // This field is required. + Help string + + // More descriptive help message for the command. + LongHelp string + + // HelpGroup defines the help group headline. + // Note: this is only used for primary top-level commands. + HelpGroup string + + // Usage should define how to use the command. + // Sample: start [OPTIONS] CONTAINER [CONTAINER...] + Usage string + + // Define all command flags within this function. + Flags func(f *Flags) + + // Define all command arguments within this function. + Args func(a *Args) + + // Function to execute for the command. + Run func(c *Context) error + + // Completer is custom autocompleter for command. + // It takes in command arguments and returns autocomplete options. + // By default all commands get autocomplete of subcommands. + // A non-nil Completer overrides the default behaviour. + Completer func(prefix string, args []string) []string + + parent *Command + flags Flags + args Args + commands Commands + isBuiltin bool // Whenever this is a build-in command not added by the user. +} + +func (c *Command) validate() error { + if len(c.Name) == 0 { + return fmt.Errorf("empty command name") + } else if c.Name[0] == '-' { + return fmt.Errorf("command name must not start with a '-'") + } else if len(c.Help) == 0 { + return fmt.Errorf("empty command help") + } + return nil +} + +func (c *Command) registerFlagsAndArgs(addHelpFlag bool) { + if addHelpFlag { + // Add default help command. + c.flags.Bool("h", "help", false, "display help") + } + + if c.Flags != nil { + c.Flags(&c.flags) + } + if c.Args != nil { + c.Args(&c.args) + } +} + +// Parent returns the parent command or nil. +func (c *Command) Parent() *Command { + return c.parent +} + +// AddCommand adds a new command. +// Panics on error. +func (c *Command) AddCommand(cmd *Command) { + err := cmd.validate() + if err != nil { + panic(err) + } + + cmd.parent = c + cmd.registerFlagsAndArgs(true) + + c.commands.Add(cmd) +} diff --git a/src/control/vendor/github.com/desertbit/grumble/commands.go b/src/control/vendor/github.com/desertbit/grumble/commands.go new file mode 100644 index 00000000000..9b1ea61220d --- /dev/null +++ b/src/control/vendor/github.com/desertbit/grumble/commands.go @@ -0,0 +1,203 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2018 Roland Singer [roland.singer@deserbit.com] + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package grumble + +import ( + "sort" +) + +// Commands collection. +type Commands struct { + list []*Command + changed bool // Used to resort if something changes. +} + +// Add the command to the slice. +// Duplicates are ignored. +func (c *Commands) Add(cmd *Command) { + c.list = append(c.list, cmd) + c.changed = true +} + +// Remove a command from the slice. +func (c *Commands) Remove(name string) (found bool) { + for index, cmd := range c.list { + if cmd.Name == name { + found = true + c.changed = true + c.list = append(c.list[:index], c.list[index+1:]...) + return + } + } + return +} + +func (c *Commands) RemoveAll() { + var builtins []*Command + + // Hint: There are no built-in sub commands. Ignore them. + for _, cmd := range c.list { + if cmd.isBuiltin { + builtins = append(builtins, cmd) + } + } + + // Only keep the builtins. + c.list = builtins + c.changed = true +} + +// All returns a slice of all commands. +func (c *Commands) All() []*Command { + return c.list +} + +// Get the command by the name. Aliases are also checked. +// Returns nil if not found. +func (c *Commands) Get(name string) *Command { + for _, cmd := range c.list { + if cmd.Name == name { + return cmd + } + for _, a := range cmd.Aliases { + if a == name { + return cmd + } + } + } + return nil +} + +// FindCommand searches for the final command through all children. +// Returns a slice of non processed following command args. +// Returns cmd=nil if not found. +func (c *Commands) FindCommand(args []string) (cmd *Command, rest []string, err error) { + var cmds []*Command + cmds, _, rest, err = c.parse(args, nil, true) + if err != nil { + return + } + + if len(cmds) > 0 { + cmd = cmds[len(cmds)-1] + } + + return +} + +// Sort the commands by their name. +func (c *Commands) Sort() { + sort.Slice(c.list, func(i, j int) bool { + return c.list[i].Name < c.list[j].Name + }) +} + +// SortRecursive sorts the commands by their name including all sub commands. +func (c *Commands) SortRecursive() { + c.Sort() + for _, cmd := range c.list { + cmd.commands.SortRecursive() + } +} + +func (c *Commands) hasChanged() bool { + if c.changed { + return true + } + for _, sc := range c.list { + if sc.commands.hasChanged() { + return true + } + } + return false +} + +func (c *Commands) unsetChanged() { + c.changed = false + for _, sc := range c.list { + sc.commands.unsetChanged() + } +} + +// parse the args and return a command path to the root. +// cmds slice is empty, if no command was found. +func (c *Commands) parse( + args []string, + parentFlagMap FlagMap, + skipFlagMaps bool, +) ( + cmds []*Command, + flagsMap FlagMap, + rest []string, + err error, +) { + var fgs []FlagMap + cur := c + + for len(args) > 0 && cur != nil { + // Extract the command name from the arguments. + name := args[0] + + // Try to find the command. + cmd := cur.Get(name) + if cmd == nil { + break + } + + args = args[1:] + cmds = append(cmds, cmd) + cur = &cmd.commands + + // Parse the command flags. + fg := make(FlagMap) + args, err = cmd.flags.parse(args, fg) + if err != nil { + return + } + + if !skipFlagMaps { + fgs = append(fgs, fg) + } + } + + if !skipFlagMaps { + // Merge all the flag maps without default values. + flagsMap = make(FlagMap) + for i := len(fgs) - 1; i >= 0; i-- { + flagsMap.copyMissingValues(fgs[i], false) + } + flagsMap.copyMissingValues(parentFlagMap, false) + + // Now include default values. This will ensure, that default values have + // lower rank. + for i := len(fgs) - 1; i >= 0; i-- { + flagsMap.copyMissingValues(fgs[i], true) + } + flagsMap.copyMissingValues(parentFlagMap, true) + } + + rest = args + return +} diff --git a/src/control/vendor/github.com/desertbit/grumble/completer.go b/src/control/vendor/github.com/desertbit/grumble/completer.go new file mode 100644 index 00000000000..453781b801a --- /dev/null +++ b/src/control/vendor/github.com/desertbit/grumble/completer.go @@ -0,0 +1,145 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2018 Roland Singer [roland.singer@deserbit.com] + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package grumble + +import ( + "strings" + + shlex "github.com/desertbit/go-shlex" +) + +type completer struct { + commands *Commands +} + +func newCompleter(commands *Commands) *completer { + return &completer{ + commands: commands, + } +} + +func (c *completer) Do(line []rune, pos int) (newLine [][]rune, length int) { + // Discard anything after the cursor position. + // This is similar behaviour to shell/bash. + line = line[:pos] + + var words []string + if w, err := shlex.Split(string(line), true); err == nil { + words = w + } else { + words = strings.Fields(string(line)) // fallback + } + + prefix := "" + if len(words) > 0 && pos >= 1 && line[pos-1] != ' ' { + prefix = words[len(words)-1] + words = words[:len(words)-1] + } + + // Simple hack to allow auto completion for help. + if len(words) > 0 && words[0] == "help" { + words = words[1:] + } + + var ( + cmds *Commands + flags *Flags + suggestions [][]rune + ) + + // Find the last commands list. + if len(words) == 0 { + cmds = c.commands + } else { + cmd, rest, err := c.commands.FindCommand(words) + if err != nil || cmd == nil { + return + } + + // Call the custom completer if present. + if cmd.Completer != nil { + words = cmd.Completer(prefix, rest) + for _, w := range words { + suggestions = append(suggestions, []rune(strings.TrimPrefix(w, prefix))) + } + return suggestions, len(prefix) + } + + // No rest must be there. + if len(rest) != 0 { + return + } + + cmds = &cmd.commands + flags = &cmd.flags + } + + if len(prefix) > 0 { + for _, cmd := range cmds.list { + if strings.HasPrefix(cmd.Name, prefix) { + suggestions = append(suggestions, []rune(strings.TrimPrefix(cmd.Name, prefix))) + } + for _, a := range cmd.Aliases { + if strings.HasPrefix(a, prefix) { + suggestions = append(suggestions, []rune(strings.TrimPrefix(a, prefix))) + } + } + } + + if flags != nil { + for _, f := range flags.list { + if len(f.Short) > 0 { + short := "-" + f.Short + if len(prefix) < len(short) && strings.HasPrefix(short, prefix) { + suggestions = append(suggestions, []rune(strings.TrimPrefix(short, prefix))) + } + } + long := "--" + f.Long + if len(prefix) < len(long) && strings.HasPrefix(long, prefix) { + suggestions = append(suggestions, []rune(strings.TrimPrefix(long, prefix))) + } + } + } + } else { + for _, cmd := range cmds.list { + suggestions = append(suggestions, []rune(cmd.Name)) + } + if flags != nil { + for _, f := range flags.list { + suggestions = append(suggestions, []rune("--"+f.Long)) + if len(f.Short) > 0 { + suggestions = append(suggestions, []rune("-"+f.Short)) + } + } + } + } + + // Append an empty space to each suggestions. + for i, s := range suggestions { + suggestions[i] = append(s, ' ') + } + + return suggestions, len(prefix) +} diff --git a/src/control/vendor/github.com/desertbit/grumble/config.go b/src/control/vendor/github.com/desertbit/grumble/config.go new file mode 100644 index 00000000000..e8ac5b7e3ff --- /dev/null +++ b/src/control/vendor/github.com/desertbit/grumble/config.go @@ -0,0 +1,120 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2018 Roland Singer [roland.singer@deserbit.com] + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package grumble + +import ( + "fmt" + + "github.com/fatih/color" +) + +const ( + defaultMultiPrompt = "... " +) + +// Config specifies the application options. +type Config struct { + // Name specifies the application name. This field is required. + Name string + + // Description specifies the application description. + Description string + + // Define all app command flags within this function. + Flags func(f *Flags) + + // Persist readline historys to file if specified. + HistoryFile string + + // Specify the max length of historys, it's 500 by default, set it to -1 to disable history. + HistoryLimit int + + // NoColor defines if color output should be disabled. + NoColor bool + + // Prompt defines the shell prompt. + Prompt string + PromptColor *color.Color + + // MultiPrompt defines the prompt shown on multi readline. + MultiPrompt string + MultiPromptColor *color.Color + + // Some more optional color settings. + ASCIILogoColor *color.Color + ErrorColor *color.Color + + // Help styling. + HelpHeadlineUnderline bool + HelpSubCommands bool + HelpHeadlineColor *color.Color +} + +// SetDefaults sets the default values if not set. +func (c *Config) SetDefaults() { + if c.HistoryLimit == 0 { + c.HistoryLimit = 500 + } + if c.PromptColor == nil { + c.PromptColor = color.New(color.FgYellow, color.Bold) + } + if len(c.Prompt) == 0 { + c.Prompt = c.Name + " » " + } + if c.MultiPromptColor == nil { + c.MultiPromptColor = c.PromptColor + } + if len(c.MultiPrompt) == 0 { + c.MultiPrompt = defaultMultiPrompt + } + if c.ASCIILogoColor == nil { + c.ASCIILogoColor = c.PromptColor + } + if c.ErrorColor == nil { + c.ErrorColor = color.New(color.FgRed, color.Bold) + } +} + +// Validate the required config fields. +func (c *Config) Validate() error { + if len(c.Name) == 0 { + return fmt.Errorf("application name is not set") + } + return nil +} + +func (c *Config) prompt() string { + if c.NoColor { + return c.Prompt + } + return c.PromptColor.Sprint(c.Prompt) +} + +func (c *Config) multiPrompt() string { + if c.NoColor { + return c.MultiPrompt + } + return c.MultiPromptColor.Sprint(c.MultiPrompt) +} diff --git a/src/control/vendor/github.com/desertbit/grumble/context.go b/src/control/vendor/github.com/desertbit/grumble/context.go new file mode 100644 index 00000000000..b3990c705b0 --- /dev/null +++ b/src/control/vendor/github.com/desertbit/grumble/context.go @@ -0,0 +1,54 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2018 Roland Singer [roland.singer@deserbit.com] + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package grumble + +// Context defines a command context. +type Context struct { + // Reference to the app. + App *App + + // Flags contains all command line flags. + Flags FlagMap + + // Args contains all command line arguments. + Args ArgMap + + // Cmd is the currently executing command. + Command *Command +} + +func newContext(a *App, cmd *Command, flags FlagMap, args ArgMap) *Context { + return &Context{ + App: a, + Command: cmd, + Flags: flags, + Args: args, + } +} + +// Stop signalizes the app to exit. +func (c *Context) Stop() { + _ = c.App.Close() +} diff --git a/src/control/vendor/github.com/desertbit/grumble/flagmap.go b/src/control/vendor/github.com/desertbit/grumble/flagmap.go new file mode 100644 index 00000000000..66a0c29d40a --- /dev/null +++ b/src/control/vendor/github.com/desertbit/grumble/flagmap.go @@ -0,0 +1,163 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2018 Roland Singer [roland.singer@deserbit.com] + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package grumble + +import ( + "fmt" + "time" +) + +// FlagMapItem holds the specific flag data. +type FlagMapItem struct { + Value interface{} + IsDefault bool +} + +// FlagMap holds all the parsed flag values. +type FlagMap map[string]*FlagMapItem + +// copyMissingValues adds all missing values to the flags map. +func (f FlagMap) copyMissingValues(m FlagMap, copyDefault bool) { + for k, v := range m { + if _, ok := f[k]; !ok { + if !copyDefault && v.IsDefault { + continue + } + f[k] = v + } + } +} + +// String returns the given flag value as string. +// Panics if not present. Flags must be registered. +func (f FlagMap) String(long string) string { + i := f[long] + if i == nil { + panic(fmt.Errorf("missing flag value: flag '%s' not registered", long)) + } + s, ok := i.Value.(string) + if !ok { + panic(fmt.Errorf("failed to assert flag '%s' to string", long)) + } + return s +} + +// Bool returns the given flag value as boolean. +// Panics if not present. Flags must be registered. +func (f FlagMap) Bool(long string) bool { + i := f[long] + if i == nil { + panic(fmt.Errorf("missing flag value: flag '%s' not registered", long)) + } + b, ok := i.Value.(bool) + if !ok { + panic(fmt.Errorf("failed to assert flag '%s' to bool", long)) + } + return b +} + +// Int returns the given flag value as int. +// Panics if not present. Flags must be registered. +func (f FlagMap) Int(long string) int { + i := f[long] + if i == nil { + panic(fmt.Errorf("missing flag value: flag '%s' not registered", long)) + } + v, ok := i.Value.(int) + if !ok { + panic(fmt.Errorf("failed to assert flag '%s' to int", long)) + } + return v +} + +// Int64 returns the given flag value as int64. +// Panics if not present. Flags must be registered. +func (f FlagMap) Int64(long string) int64 { + i := f[long] + if i == nil { + panic(fmt.Errorf("missing flag value: flag '%s' not registered", long)) + } + v, ok := i.Value.(int64) + if !ok { + panic(fmt.Errorf("failed to assert flag '%s' to int64", long)) + } + return v +} + +// Uint returns the given flag value as uint. +// Panics if not present. Flags must be registered. +func (f FlagMap) Uint(long string) uint { + i := f[long] + if i == nil { + panic(fmt.Errorf("missing flag value: flag '%s' not registered", long)) + } + v, ok := i.Value.(uint) + if !ok { + panic(fmt.Errorf("failed to assert flag '%s' to uint", long)) + } + return v +} + +// Uint64 returns the given flag value as uint64. +// Panics if not present. Flags must be registered. +func (f FlagMap) Uint64(long string) uint64 { + i := f[long] + if i == nil { + panic(fmt.Errorf("missing flag value: flag '%s' not registered", long)) + } + v, ok := i.Value.(uint64) + if !ok { + panic(fmt.Errorf("failed to assert flag '%s' to uint64", long)) + } + return v +} + +// Float64 returns the given flag value as float64. +// Panics if not present. Flags must be registered. +func (f FlagMap) Float64(long string) float64 { + i := f[long] + if i == nil { + panic(fmt.Errorf("missing flag value: flag '%s' not registered", long)) + } + v, ok := i.Value.(float64) + if !ok { + panic(fmt.Errorf("failed to assert flag '%s' to float64", long)) + } + return v +} + +// Duration returns the given flag value as duration. +// Panics if not present. Flags must be registered. +func (f FlagMap) Duration(long string) time.Duration { + i := f[long] + if i == nil { + panic(fmt.Errorf("missing flag value: flag '%s' not registered", long)) + } + v, ok := i.Value.(time.Duration) + if !ok { + panic(fmt.Errorf("failed to assert flag '%s' to duration", long)) + } + return v +} diff --git a/src/control/vendor/github.com/desertbit/grumble/flags.go b/src/control/vendor/github.com/desertbit/grumble/flags.go new file mode 100644 index 00000000000..5ef98a88ef7 --- /dev/null +++ b/src/control/vendor/github.com/desertbit/grumble/flags.go @@ -0,0 +1,488 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2018 Roland Singer [roland.singer@deserbit.com] + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package grumble + +import ( + "fmt" + "sort" + "strconv" + "strings" + "time" +) + +type parseFlagFunc func(flag, equalVal string, args []string, res FlagMap) ([]string, bool, error) +type defaultFlagFunc func(res FlagMap) + +type flagItem struct { + Short string + Long string + Help string + HelpArgs string + HelpShowDefault bool + Default interface{} +} + +// Flags holds all the registered flags. +type Flags struct { + parsers []parseFlagFunc + defaults map[string]defaultFlagFunc + list []*flagItem +} + +// empty returns true, if the flags are empty. +func (f *Flags) empty() bool { + return len(f.list) == 0 +} + +// sort the flags by their name. +func (f *Flags) sort() { + sort.Slice(f.list, func(i, j int) bool { + return f.list[i].Long < f.list[j].Long + }) +} + +func (f *Flags) register( + short, long, help, helpArgs string, + helpShowDefault bool, + defaultValue interface{}, + df defaultFlagFunc, + pf parseFlagFunc, +) { + // Validate. + if len(short) > 1 { + panic(fmt.Errorf("invalid short flag: '%s': must be a single character", short)) + } else if strings.HasPrefix(short, "-") { + panic(fmt.Errorf("invalid short flag: '%s': must not start with a '-'", short)) + } else if len(long) == 0 { + panic(fmt.Errorf("empty long flag: short='%s'", short)) + } else if strings.HasPrefix(long, "-") { + panic(fmt.Errorf("invalid long flag: '%s': must not start with a '-'", long)) + } else if len(help) == 0 { + panic(fmt.Errorf("empty flag help message for flag: '%s'", long)) + } + + // Check, that both short and long are unique. + // Short flags are empty if not set. + for _, fi := range f.list { + if fi.Short != "" && short != "" && fi.Short == short { + panic(fmt.Errorf("flag shortcut '%s' registered twice", short)) + } + if fi.Long == long { + panic(fmt.Errorf("flag '%s' registered twice", long)) + } + } + + f.list = append(f.list, &flagItem{ + Short: short, + Long: long, + Help: help, + HelpShowDefault: helpShowDefault, + HelpArgs: helpArgs, + Default: defaultValue, + }) + + if f.defaults == nil { + f.defaults = make(map[string]defaultFlagFunc) + } + f.defaults[long] = df + + f.parsers = append(f.parsers, pf) +} + +func (f *Flags) match(flag, short, long string) bool { + return (len(short) > 0 && flag == "-"+short) || + (len(long) > 0 && flag == "--"+long) +} + +func (f *Flags) parse(args []string, res FlagMap) ([]string, error) { + var err error + var parsed bool + + // Parse all leading flags. +Loop: + for len(args) > 0 { + a := args[0] + if !strings.HasPrefix(a, "-") { + break Loop + } + args = args[1:] + + // A double dash (--) is used to signify the end of command options, + // after which only positional arguments are accepted. + if a == "--" { + break Loop + } + + pos := strings.Index(a, "=") + equalVal := "" + if pos > 0 { + equalVal = a[pos+1:] + a = a[:pos] + } + + for _, p := range f.parsers { + args, parsed, err = p(a, equalVal, args, res) + if err != nil { + return nil, err + } else if parsed { + continue Loop + } + } + return nil, fmt.Errorf("invalid flag: %s", a) + } + + // Finally set all the default values for not passed flags. + if f.defaults == nil { + return args, nil + } + + for _, i := range f.list { + if _, ok := res[i.Long]; ok { + continue + } + df, ok := f.defaults[i.Long] + if !ok { + return nil, fmt.Errorf("invalid flag: missing default function: %s", i.Long) + } + df(res) + } + + return args, nil +} + +// StringL same as String, but without a shorthand. +func (f *Flags) StringL(long, defaultValue, help string) { + f.String("", long, defaultValue, help) +} + +// String registers a string flag. +func (f *Flags) String(short, long, defaultValue, help string) { + f.register(short, long, help, "string", true, defaultValue, + func(res FlagMap) { + res[long] = &FlagMapItem{ + Value: defaultValue, + IsDefault: true, + } + }, + func(flag, equalVal string, args []string, res FlagMap) ([]string, bool, error) { + if !f.match(flag, short, long) { + return args, false, nil + } + if len(equalVal) > 0 { + res[long] = &FlagMapItem{ + Value: trimQuotes(equalVal), + IsDefault: false, + } + return args, true, nil + } + if len(args) == 0 { + return args, false, fmt.Errorf("missing string value for flag: %s", flag) + } + res[long] = &FlagMapItem{ + Value: args[0], + IsDefault: false, + } + args = args[1:] + return args, true, nil + }) +} + +// BoolL same as Bool, but without a shorthand. +func (f *Flags) BoolL(long string, defaultValue bool, help string) { + f.Bool("", long, defaultValue, help) +} + +// Bool registers a boolean flag. +func (f *Flags) Bool(short, long string, defaultValue bool, help string) { + f.register(short, long, help, "", false, defaultValue, + func(res FlagMap) { + res[long] = &FlagMapItem{ + Value: defaultValue, + IsDefault: true, + } + }, + func(flag, equalVal string, args []string, res FlagMap) ([]string, bool, error) { + if !f.match(flag, short, long) { + return args, false, nil + } + if len(equalVal) > 0 { + b, err := strconv.ParseBool(equalVal) + if err != nil { + return args, false, fmt.Errorf("invalid boolean value for flag: %s", flag) + } + res[long] = &FlagMapItem{ + Value: b, + IsDefault: false, + } + return args, true, nil + } + res[long] = &FlagMapItem{ + Value: true, + IsDefault: false, + } + return args, true, nil + }) +} + +// IntL same as Int, but without a shorthand. +func (f *Flags) IntL(long string, defaultValue int, help string) { + f.Int("", long, defaultValue, help) +} + +// Int registers an int flag. +func (f *Flags) Int(short, long string, defaultValue int, help string) { + f.register(short, long, help, "int", true, defaultValue, + func(res FlagMap) { + res[long] = &FlagMapItem{ + Value: defaultValue, + IsDefault: true, + } + }, + func(flag, equalVal string, args []string, res FlagMap) ([]string, bool, error) { + if !f.match(flag, short, long) { + return args, false, nil + } + var vStr string + if len(equalVal) > 0 { + vStr = equalVal + } else if len(args) > 0 { + vStr = args[0] + args = args[1:] + } else { + return args, false, fmt.Errorf("missing int value for flag: %s", flag) + } + i, err := strconv.Atoi(vStr) + if err != nil { + return args, false, fmt.Errorf("invalid int value for flag: %s", flag) + } + res[long] = &FlagMapItem{ + Value: i, + IsDefault: false, + } + return args, true, nil + }) +} + +// Int64L same as Int64, but without a shorthand. +func (f *Flags) Int64L(long string, defaultValue int64, help string) { + f.Int64("", long, defaultValue, help) +} + +// Int64 registers an int64 flag. +func (f *Flags) Int64(short, long string, defaultValue int64, help string) { + f.register(short, long, help, "int", true, defaultValue, + func(res FlagMap) { + res[long] = &FlagMapItem{ + Value: defaultValue, + IsDefault: true, + } + }, + func(flag, equalVal string, args []string, res FlagMap) ([]string, bool, error) { + if !f.match(flag, short, long) { + return args, false, nil + } + var vStr string + if len(equalVal) > 0 { + vStr = equalVal + } else if len(args) > 0 { + vStr = args[0] + args = args[1:] + } else { + return args, false, fmt.Errorf("missing int value for flag: %s", flag) + } + i, err := strconv.ParseInt(vStr, 10, 64) + if err != nil { + return args, false, fmt.Errorf("invalid int value for flag: %s", flag) + } + res[long] = &FlagMapItem{ + Value: i, + IsDefault: false, + } + return args, true, nil + }) +} + +// UintL same as Uint, but without a shorthand. +func (f *Flags) UintL(long string, defaultValue uint, help string) { + f.Uint("", long, defaultValue, help) +} + +// Uint registers an uint flag. +func (f *Flags) Uint(short, long string, defaultValue uint, help string) { + f.register(short, long, help, "uint", true, defaultValue, + func(res FlagMap) { + res[long] = &FlagMapItem{ + Value: defaultValue, + IsDefault: true, + } + }, + func(flag, equalVal string, args []string, res FlagMap) ([]string, bool, error) { + if !f.match(flag, short, long) { + return args, false, nil + } + var vStr string + if len(equalVal) > 0 { + vStr = equalVal + } else if len(args) > 0 { + vStr = args[0] + args = args[1:] + } else { + return args, false, fmt.Errorf("missing uint value for flag: %s", flag) + } + i, err := strconv.ParseUint(vStr, 10, 64) + if err != nil { + return args, false, fmt.Errorf("invalid uint value for flag: %s", flag) + } + res[long] = &FlagMapItem{ + Value: uint(i), + IsDefault: false, + } + return args, true, nil + }) +} + +// Uint64L same as Uint64, but without a shorthand. +func (f *Flags) Uint64L(long string, defaultValue uint64, help string) { + f.Uint64("", long, defaultValue, help) +} + +// Uint64 registers an uint64 flag. +func (f *Flags) Uint64(short, long string, defaultValue uint64, help string) { + f.register(short, long, help, "uint", true, defaultValue, + func(res FlagMap) { + res[long] = &FlagMapItem{ + Value: defaultValue, + IsDefault: true, + } + }, + func(flag, equalVal string, args []string, res FlagMap) ([]string, bool, error) { + if !f.match(flag, short, long) { + return args, false, nil + } + var vStr string + if len(equalVal) > 0 { + vStr = equalVal + } else if len(args) > 0 { + vStr = args[0] + args = args[1:] + } else { + return args, false, fmt.Errorf("missing uint value for flag: %s", flag) + } + i, err := strconv.ParseUint(vStr, 10, 64) + if err != nil { + return args, false, fmt.Errorf("invalid uint value for flag: %s", flag) + } + res[long] = &FlagMapItem{ + Value: i, + IsDefault: false, + } + return args, true, nil + }) +} + +// Float64L same as Float64, but without a shorthand. +func (f *Flags) Float64L(long string, defaultValue float64, help string) { + f.Float64("", long, defaultValue, help) +} + +// Float64 registers an float64 flag. +func (f *Flags) Float64(short, long string, defaultValue float64, help string) { + f.register(short, long, help, "float", true, defaultValue, + func(res FlagMap) { + res[long] = &FlagMapItem{ + Value: defaultValue, + IsDefault: true, + } + }, + func(flag, equalVal string, args []string, res FlagMap) ([]string, bool, error) { + if !f.match(flag, short, long) { + return args, false, nil + } + var vStr string + if len(equalVal) > 0 { + vStr = equalVal + } else if len(args) > 0 { + vStr = args[0] + args = args[1:] + } else { + return args, false, fmt.Errorf("missing float value for flag: %s", flag) + } + i, err := strconv.ParseFloat(vStr, 64) + if err != nil { + return args, false, fmt.Errorf("invalid float value for flag: %s", flag) + } + res[long] = &FlagMapItem{ + Value: i, + IsDefault: false, + } + return args, true, nil + }) +} + +// DurationL same as Duration, but without a shorthand. +func (f *Flags) DurationL(long string, defaultValue time.Duration, help string) { + f.Duration("", long, defaultValue, help) +} + +// Duration registers a duration flag. +func (f *Flags) Duration(short, long string, defaultValue time.Duration, help string) { + f.register(short, long, help, "duration", true, defaultValue, + func(res FlagMap) { + res[long] = &FlagMapItem{ + Value: defaultValue, + IsDefault: true, + } + }, + func(flag, equalVal string, args []string, res FlagMap) ([]string, bool, error) { + if !f.match(flag, short, long) { + return args, false, nil + } + var vStr string + if len(equalVal) > 0 { + vStr = equalVal + } else if len(args) > 0 { + vStr = args[0] + args = args[1:] + } else { + return args, false, fmt.Errorf("missing duration value for flag: %s", flag) + } + d, err := time.ParseDuration(vStr) + if err != nil { + return args, false, fmt.Errorf("invalid duration value for flag: %s", flag) + } + res[long] = &FlagMapItem{ + Value: d, + IsDefault: false, + } + return args, true, nil + }) +} + +func trimQuotes(s string) string { + if len(s) >= 2 && s[0] == '"' && s[len(s)-1] == '"' { + return s[1 : len(s)-1] + } + return s +} diff --git a/src/control/vendor/github.com/desertbit/grumble/functions.go b/src/control/vendor/github.com/desertbit/grumble/functions.go new file mode 100644 index 00000000000..36410096a08 --- /dev/null +++ b/src/control/vendor/github.com/desertbit/grumble/functions.go @@ -0,0 +1,320 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2018 Roland Singer [roland.singer@deserbit.com] + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package grumble + +import ( + "fmt" + "os" + "sort" + + "github.com/desertbit/columnize" +) + +func defaultInterruptHandler(a *App, count int) { + if count >= 2 { + a.Println("interrupted") + os.Exit(1) + } + a.Println("input Ctrl-c once more to exit") +} + +func defaultPrintHelp(a *App, shell bool) { + // Columnize options. + config := columnize.DefaultConfig() + config.Delim = "|" + config.Glue = " " + config.Prefix = " " + + // ASCII logo. + if a.printASCIILogo != nil { + a.printASCIILogo(a) + } + + // Description. + if (len(a.config.Description)) > 0 { + a.Printf("\n%s\n", a.config.Description) + } + + // Usage. + if !shell { + a.Println() + printHeadline(a, "Usage:") + a.Printf(" %s [command]\n", a.config.Name) + } + + // Group the commands by their help group if present. + groups := make(map[string]*Commands) + for _, c := range a.commands.list { + key := c.HelpGroup + if len(key) == 0 { + key = "Commands:" + } + cc := groups[key] + if cc == nil { + cc = new(Commands) + groups[key] = cc + } + cc.Add(c) + } + + // Sort the map by the keys. + var keys []string + for k := range groups { + keys = append(keys, k) + } + sort.Strings(keys) + + // Print each commands group. + for _, headline := range keys { + cc := groups[headline] + cc.Sort() + + var output []string + for _, c := range cc.list { + name := c.Name + for _, a := range c.Aliases { + name += ", " + a + } + output = append(output, fmt.Sprintf("%s | %v", name, c.Help)) + } + + if len(output) > 0 { + a.Println() + printHeadline(a, headline) + a.Printf("%s\n", columnize.Format(output, config)) + } + } + + // Sub Commands. + if a.config.HelpSubCommands { + // Check if there is at least one sub command. + hasSubCmds := false + for _, c := range a.commands.list { + if len(c.commands.list) > 0 { + hasSubCmds = true + break + } + } + if hasSubCmds { + // Headline. + a.Println() + printHeadline(a, "Sub Commands:") + hp := headlinePrinter(a) + + // Only print the first level of sub commands. + for _, c := range a.commands.list { + if len(c.commands.list) == 0 { + continue + } + + var output []string + for _, c := range c.commands.list { + name := c.Name + for _, a := range c.Aliases { + name += ", " + a + } + output = append(output, fmt.Sprintf("%s | %v", name, c.Help)) + } + + a.Println() + _, _ = hp(c.Name + ":") + a.Printf("%s\n", columnize.Format(output, config)) + } + } + } + + // Flags. + if !shell { + printFlags(a, &a.flags) + } + + a.Println() +} + +func defaultPrintCommandHelp(a *App, cmd *Command, shell bool) { + // Columnize options. + config := columnize.DefaultConfig() + config.Delim = "|" + config.Glue = " " + config.Prefix = " " + + // Help description. + if len(cmd.LongHelp) > 0 { + a.Printf("\n%s\n", cmd.LongHelp) + } else { + a.Printf("\n%s\n", cmd.Help) + } + + // Usage. + printUsage(a, cmd) + + // Arguments. + printArgs(a, &cmd.args) + + // Flags. + printFlags(a, &cmd.flags) + + // Sub Commands. + if len(cmd.commands.list) > 0 { + // Only print the first level of sub commands. + var output []string + for _, c := range cmd.commands.list { + name := c.Name + for _, a := range c.Aliases { + name += ", " + a + } + output = append(output, fmt.Sprintf("%s | %v", name, c.Help)) + } + + a.Println() + printHeadline(a, "Sub Commands:") + a.Printf("%s\n", columnize.Format(output, config)) + } + + a.Println() +} + +func headlinePrinter(a *App) func(v ...interface{}) (int, error) { + if a.config.NoColor || a.config.HelpHeadlineColor == nil { + return a.Println + } + return func(v ...interface{}) (int, error) { + return a.config.HelpHeadlineColor.Fprintln(a, v...) + } +} + +func printHeadline(a *App, s string) { + hp := headlinePrinter(a) + if a.config.HelpHeadlineUnderline { + _, _ = hp(s) + u := "" + for i := 0; i < len(s); i++ { + u += "=" + } + _, _ = hp(u) + } else { + _, _ = hp(s) + } +} + +func printUsage(a *App, cmd *Command) { + a.Println() + printHeadline(a, "Usage:") + + // Print either the user-provided usage message or compose + // one on our own from the flags and args. + if len(cmd.Usage) > 0 { + a.Printf(" %s\n", cmd.Usage) + return + } + + // Layout: Cmd [Flags] Args + a.Printf(" %s", cmd.Name) + if !cmd.flags.empty() { + a.Printf(" [flags]") + } + if !cmd.args.empty() { + for _, arg := range cmd.args.list { + name := arg.Name + if arg.isList { + name += "..." + } + + if arg.optional { + a.Printf(" [%s]", name) + } else { + a.Printf(" %s", name) + } + + if arg.isList && (arg.listMin != -1 || arg.listMax != -1) { + a.Printf("{") + if arg.listMin != -1 { + a.Printf("%d", arg.listMin) + } + a.Printf(",") + if arg.listMax != -1 { + a.Printf("%d", arg.listMax) + } + a.Printf("}") + } + } + } + a.Println() +} + +func printArgs(a *App, args *Args) { + // Columnize options. + config := columnize.DefaultConfig() + config.Delim = "|" + config.Glue = " " + config.Prefix = " " + + var output []string + for _, a := range args.list { + defaultValue := "" + if a.Default != nil && len(fmt.Sprintf("%v", a.Default)) > 0 && a.optional { + defaultValue = fmt.Sprintf("(default: %v)", a.Default) + } + output = append(output, fmt.Sprintf("%s || %s |||| %s %s", a.Name, a.HelpArgs, a.Help, defaultValue)) + } + + if len(output) > 0 { + a.Println() + printHeadline(a, "Args:") + a.Printf("%s\n", columnize.Format(output, config)) + } +} + +func printFlags(a *App, flags *Flags) { + // Columnize options. + config := columnize.DefaultConfig() + config.Delim = "|" + config.Glue = " " + config.Prefix = " " + + flags.sort() + + var output []string + for _, f := range flags.list { + long := "--" + f.Long + short := "" + if len(f.Short) > 0 { + short = "-" + f.Short + "," + } + + defaultValue := "" + if f.Default != nil && f.HelpShowDefault && len(fmt.Sprintf("%v", f.Default)) > 0 { + defaultValue = fmt.Sprintf("(default: %v)", f.Default) + } + + output = append(output, fmt.Sprintf("%s | %s | %s |||| %s %s", short, long, f.HelpArgs, f.Help, defaultValue)) + } + + if len(output) > 0 { + a.Println() + printHeadline(a, "Flags:") + a.Printf("%s\n", columnize.Format(output, config)) + } +} diff --git a/src/control/vendor/github.com/desertbit/grumble/grumble.go b/src/control/vendor/github.com/desertbit/grumble/grumble.go new file mode 100644 index 00000000000..d49312ba645 --- /dev/null +++ b/src/control/vendor/github.com/desertbit/grumble/grumble.go @@ -0,0 +1,41 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2018 Roland Singer [roland.singer@deserbit.com] + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +// Package grumble is a powerful modern CLI and SHELL. +package grumble + +import ( + "fmt" + "os" +) + +// Main is a shorthand to run the app within the main function. +// This function will handle the error and exit the application on error. +func Main(a *App) { + err := a.Run() + if err != nil { + _, _ = fmt.Fprintf(os.Stderr, "error: %v\n", err) + os.Exit(1) + } +} diff --git a/src/control/vendor/github.com/desertbit/readline/.gitignore b/src/control/vendor/github.com/desertbit/readline/.gitignore new file mode 100644 index 00000000000..a3062beae38 --- /dev/null +++ b/src/control/vendor/github.com/desertbit/readline/.gitignore @@ -0,0 +1 @@ +.vscode/* diff --git a/src/control/vendor/github.com/desertbit/readline/.travis.yml b/src/control/vendor/github.com/desertbit/readline/.travis.yml new file mode 100644 index 00000000000..9c359554320 --- /dev/null +++ b/src/control/vendor/github.com/desertbit/readline/.travis.yml @@ -0,0 +1,8 @@ +language: go +go: + - 1.x +script: + - GOOS=windows go install github.com/chzyer/readline/example/... + - GOOS=linux go install github.com/chzyer/readline/example/... + - GOOS=darwin go install github.com/chzyer/readline/example/... + - go test -race -v diff --git a/src/control/vendor/github.com/desertbit/readline/CHANGELOG.md b/src/control/vendor/github.com/desertbit/readline/CHANGELOG.md new file mode 100644 index 00000000000..14ff5be1313 --- /dev/null +++ b/src/control/vendor/github.com/desertbit/readline/CHANGELOG.md @@ -0,0 +1,58 @@ +# ChangeLog + +### 1.4 - 2016-07-25 + +* [#60][60] Support dynamic autocompletion +* Fix ANSI parser on Windows +* Fix wrong column width in complete mode on Windows +* Remove dependent package "golang.org/x/crypto/ssh/terminal" + +### 1.3 - 2016-05-09 + +* [#38][38] add SetChildren for prefix completer interface +* [#42][42] improve multiple lines compatibility +* [#43][43] remove sub-package(runes) for gopkg compatibility +* [#46][46] Auto complete with space prefixed line +* [#48][48] support suspend process (ctrl+Z) +* [#49][49] fix bug that check equals with previous command +* [#53][53] Fix bug which causes integer divide by zero panicking when input buffer is empty + +### 1.2 - 2016-03-05 + +* Add a demo for checking password strength [example/readline-pass-strength](https://github.com/chzyer/readline/blob/master/example/readline-pass-strength/readline-pass-strength.go), , written by [@sahib](https://github.com/sahib) +* [#23][23], support stdin remapping +* [#27][27], add a `UniqueEditLine` to `Config`, which will erase the editing line after user submited it, usually use in IM. +* Add a demo for multiline [example/readline-multiline](https://github.com/chzyer/readline/blob/master/example/readline-multiline/readline-multiline.go) which can submit one SQL by multiple lines. +* Supports performs even stdin/stdout is not a tty. +* Add a new simple apis for single instance, check by [here](https://github.com/chzyer/readline/blob/master/std.go). It need to save history manually if using this api. +* [#28][28], fixes the history is not working as expected. +* [#33][33], vim mode now support `c`, `d`, `x (delete character)`, `r (replace character)` + +### 1.1 - 2015-11-20 + +* [#12][12] Add support for key ``/``/`` +* Only enter raw mode as needed (calling `Readline()`), program will receive signal(e.g. Ctrl+C) if not interact with `readline`. +* Bugs fixed for `PrefixCompleter` +* Press `Ctrl+D` in empty line will cause `io.EOF` in error, Press `Ctrl+C` in anytime will cause `ErrInterrupt` instead of `io.EOF`, this will privodes a shell-like user experience. +* Customable Interrupt/EOF prompt in `Config` +* [#17][17] Change atomic package to use 32bit function to let it runnable on arm 32bit devices +* Provides a new password user experience(`readline.ReadPasswordEx()`). + +### 1.0 - 2015-10-14 + +* Initial public release. + +[12]: https://github.com/chzyer/readline/pull/12 +[17]: https://github.com/chzyer/readline/pull/17 +[23]: https://github.com/chzyer/readline/pull/23 +[27]: https://github.com/chzyer/readline/pull/27 +[28]: https://github.com/chzyer/readline/pull/28 +[33]: https://github.com/chzyer/readline/pull/33 +[38]: https://github.com/chzyer/readline/pull/38 +[42]: https://github.com/chzyer/readline/pull/42 +[43]: https://github.com/chzyer/readline/pull/43 +[46]: https://github.com/chzyer/readline/pull/46 +[48]: https://github.com/chzyer/readline/pull/48 +[49]: https://github.com/chzyer/readline/pull/49 +[53]: https://github.com/chzyer/readline/pull/53 +[60]: https://github.com/chzyer/readline/pull/60 diff --git a/src/control/vendor/github.com/desertbit/readline/LICENSE b/src/control/vendor/github.com/desertbit/readline/LICENSE new file mode 100644 index 00000000000..c9afab3dcd0 --- /dev/null +++ b/src/control/vendor/github.com/desertbit/readline/LICENSE @@ -0,0 +1,22 @@ +The MIT License (MIT) + +Copyright (c) 2015 Chzyer + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + diff --git a/src/control/vendor/github.com/desertbit/readline/README.md b/src/control/vendor/github.com/desertbit/readline/README.md new file mode 100644 index 00000000000..fab974b7f34 --- /dev/null +++ b/src/control/vendor/github.com/desertbit/readline/README.md @@ -0,0 +1,114 @@ +[![Build Status](https://travis-ci.org/chzyer/readline.svg?branch=master)](https://travis-ci.org/chzyer/readline) +[![Software License](https://img.shields.io/badge/license-MIT-brightgreen.svg)](LICENSE.md) +[![Version](https://img.shields.io/github/tag/chzyer/readline.svg)](https://github.com/chzyer/readline/releases) +[![GoDoc](https://godoc.org/github.com/chzyer/readline?status.svg)](https://godoc.org/github.com/chzyer/readline) +[![OpenCollective](https://opencollective.com/readline/badge/backers.svg)](#backers) +[![OpenCollective](https://opencollective.com/readline/badge/sponsors.svg)](#sponsors) + +

+ + + +

+ +A powerful readline library in `Linux` `macOS` `Windows` `Solaris` + +## Guide + +* [Demo](example/readline-demo/readline-demo.go) +* [Shortcut](doc/shortcut.md) + +## Repos using readline + +[![cockroachdb](https://img.shields.io/github/stars/cockroachdb/cockroach.svg?label=cockroachdb/cockroach)](https://github.com/cockroachdb/cockroach) +[![robertkrimen/otto](https://img.shields.io/github/stars/robertkrimen/otto.svg?label=robertkrimen/otto)](https://github.com/robertkrimen/otto) +[![empire](https://img.shields.io/github/stars/remind101/empire.svg?label=remind101/empire)](https://github.com/remind101/empire) +[![mehrdadrad/mylg](https://img.shields.io/github/stars/mehrdadrad/mylg.svg?label=mehrdadrad/mylg)](https://github.com/mehrdadrad/mylg) +[![knq/usql](https://img.shields.io/github/stars/knq/usql.svg?label=knq/usql)](https://github.com/knq/usql) +[![youtube/doorman](https://img.shields.io/github/stars/youtube/doorman.svg?label=youtube/doorman)](https://github.com/youtube/doorman) +[![bom-d-van/harp](https://img.shields.io/github/stars/bom-d-van/harp.svg?label=bom-d-van/harp)](https://github.com/bom-d-van/harp) +[![abiosoft/ishell](https://img.shields.io/github/stars/abiosoft/ishell.svg?label=abiosoft/ishell)](https://github.com/abiosoft/ishell) +[![Netflix/hal-9001](https://img.shields.io/github/stars/Netflix/hal-9001.svg?label=Netflix/hal-9001)](https://github.com/Netflix/hal-9001) +[![docker/go-p9p](https://img.shields.io/github/stars/docker/go-p9p.svg?label=docker/go-p9p)](https://github.com/docker/go-p9p) + + +## Feedback + +If you have any questions, please submit a github issue and any pull requests is welcomed :) + +* [https://twitter.com/chzyer](https://twitter.com/chzyer) +* [http://weibo.com/2145262190](http://weibo.com/2145262190) + + +## Backers + +Love Readline? Help me keep it alive by donating funds to cover project expenses!
+[[Become a backer](https://opencollective.com/readline#backer)] + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +## Sponsors + +Become a sponsor and get your logo here on our Github page. [[Become a sponsor](https://opencollective.com/readline#sponsor)] + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/control/vendor/github.com/desertbit/readline/ansi_windows.go b/src/control/vendor/github.com/desertbit/readline/ansi_windows.go new file mode 100644 index 00000000000..63b908c187a --- /dev/null +++ b/src/control/vendor/github.com/desertbit/readline/ansi_windows.go @@ -0,0 +1,249 @@ +// +build windows + +package readline + +import ( + "bufio" + "io" + "strconv" + "strings" + "sync" + "unicode/utf8" + "unsafe" +) + +const ( + _ = uint16(0) + COLOR_FBLUE = 0x0001 + COLOR_FGREEN = 0x0002 + COLOR_FRED = 0x0004 + COLOR_FINTENSITY = 0x0008 + + COLOR_BBLUE = 0x0010 + COLOR_BGREEN = 0x0020 + COLOR_BRED = 0x0040 + COLOR_BINTENSITY = 0x0080 + + COMMON_LVB_UNDERSCORE = 0x8000 + COMMON_LVB_BOLD = 0x0007 +) + +var ColorTableFg = []word{ + 0, // 30: Black + COLOR_FRED, // 31: Red + COLOR_FGREEN, // 32: Green + COLOR_FRED | COLOR_FGREEN, // 33: Yellow + COLOR_FBLUE, // 34: Blue + COLOR_FRED | COLOR_FBLUE, // 35: Magenta + COLOR_FGREEN | COLOR_FBLUE, // 36: Cyan + COLOR_FRED | COLOR_FBLUE | COLOR_FGREEN, // 37: White +} + +var ColorTableBg = []word{ + 0, // 40: Black + COLOR_BRED, // 41: Red + COLOR_BGREEN, // 42: Green + COLOR_BRED | COLOR_BGREEN, // 43: Yellow + COLOR_BBLUE, // 44: Blue + COLOR_BRED | COLOR_BBLUE, // 45: Magenta + COLOR_BGREEN | COLOR_BBLUE, // 46: Cyan + COLOR_BRED | COLOR_BBLUE | COLOR_BGREEN, // 47: White +} + +type ANSIWriter struct { + target io.Writer + wg sync.WaitGroup + ctx *ANSIWriterCtx + sync.Mutex +} + +func NewANSIWriter(w io.Writer) *ANSIWriter { + a := &ANSIWriter{ + target: w, + ctx: NewANSIWriterCtx(w), + } + return a +} + +func (a *ANSIWriter) Close() error { + a.wg.Wait() + return nil +} + +type ANSIWriterCtx struct { + isEsc bool + isEscSeq bool + arg []string + target *bufio.Writer + wantFlush bool +} + +func NewANSIWriterCtx(target io.Writer) *ANSIWriterCtx { + return &ANSIWriterCtx{ + target: bufio.NewWriter(target), + } +} + +func (a *ANSIWriterCtx) Flush() { + a.target.Flush() +} + +func (a *ANSIWriterCtx) process(r rune) bool { + if a.wantFlush { + if r == 0 || r == CharEsc { + a.wantFlush = false + a.target.Flush() + } + } + if a.isEscSeq { + a.isEscSeq = a.ioloopEscSeq(a.target, r, &a.arg) + return true + } + + switch r { + case CharEsc: + a.isEsc = true + case '[': + if a.isEsc { + a.arg = nil + a.isEscSeq = true + a.isEsc = false + break + } + fallthrough + default: + a.target.WriteRune(r) + a.wantFlush = true + } + return true +} + +func (a *ANSIWriterCtx) ioloopEscSeq(w *bufio.Writer, r rune, argptr *[]string) bool { + arg := *argptr + var err error + + if r >= 'A' && r <= 'D' { + count := short(GetInt(arg, 1)) + info, err := GetConsoleScreenBufferInfo() + if err != nil { + return false + } + switch r { + case 'A': // up + info.dwCursorPosition.y -= count + case 'B': // down + info.dwCursorPosition.y += count + case 'C': // right + info.dwCursorPosition.x += count + case 'D': // left + info.dwCursorPosition.x -= count + } + SetConsoleCursorPosition(&info.dwCursorPosition) + return false + } + + switch r { + case 'J': + killLines() + case 'K': + eraseLine() + case 'm': + color := word(0) + for _, item := range arg { + var c int + c, err = strconv.Atoi(item) + if err != nil { + w.WriteString("[" + strings.Join(arg, ";") + "m") + break + } + if c >= 30 && c < 40 { + color ^= COLOR_FINTENSITY + color |= ColorTableFg[c-30] + } else if c >= 40 && c < 50 { + color ^= COLOR_BINTENSITY + color |= ColorTableBg[c-40] + } else if c == 4 { + color |= COMMON_LVB_UNDERSCORE | ColorTableFg[7] + } else if c == 1 { + color |= COMMON_LVB_BOLD | COLOR_FINTENSITY + } else { // unknown code treat as reset + color = ColorTableFg[7] + } + } + if err != nil { + break + } + kernel.SetConsoleTextAttribute(stdout, uintptr(color)) + case '\007': // set title + case ';': + if len(arg) == 0 || arg[len(arg)-1] != "" { + arg = append(arg, "") + *argptr = arg + } + return true + default: + if len(arg) == 0 { + arg = append(arg, "") + } + arg[len(arg)-1] += string(r) + *argptr = arg + return true + } + *argptr = nil + return false +} + +func (a *ANSIWriter) Write(b []byte) (int, error) { + a.Lock() + defer a.Unlock() + + off := 0 + for len(b) > off { + r, size := utf8.DecodeRune(b[off:]) + if size == 0 { + return off, io.ErrShortWrite + } + off += size + a.ctx.process(r) + } + a.ctx.Flush() + return off, nil +} + +func killLines() error { + sbi, err := GetConsoleScreenBufferInfo() + if err != nil { + return err + } + + size := (sbi.dwCursorPosition.y - sbi.dwSize.y) * sbi.dwSize.x + size += sbi.dwCursorPosition.x + + var written int + kernel.FillConsoleOutputAttribute(stdout, uintptr(ColorTableFg[7]), + uintptr(size), + sbi.dwCursorPosition.ptr(), + uintptr(unsafe.Pointer(&written)), + ) + return kernel.FillConsoleOutputCharacterW(stdout, uintptr(' '), + uintptr(size), + sbi.dwCursorPosition.ptr(), + uintptr(unsafe.Pointer(&written)), + ) +} + +func eraseLine() error { + sbi, err := GetConsoleScreenBufferInfo() + if err != nil { + return err + } + + size := sbi.dwSize.x + sbi.dwCursorPosition.x = 0 + var written int + return kernel.FillConsoleOutputCharacterW(stdout, uintptr(' '), + uintptr(size), + sbi.dwCursorPosition.ptr(), + uintptr(unsafe.Pointer(&written)), + ) +} diff --git a/src/control/vendor/github.com/desertbit/readline/complete.go b/src/control/vendor/github.com/desertbit/readline/complete.go new file mode 100644 index 00000000000..d1351f77d70 --- /dev/null +++ b/src/control/vendor/github.com/desertbit/readline/complete.go @@ -0,0 +1,285 @@ +package readline + +import ( + "bufio" + "bytes" + "fmt" + "io" +) + +type AutoCompleter interface { + // Readline will pass the whole line and current offset to it + // Completer need to pass all the candidates, and how long they shared the same characters in line + // Example: + // [go, git, git-shell, grep] + // Do("g", 1) => ["o", "it", "it-shell", "rep"], 1 + // Do("gi", 2) => ["t", "t-shell"], 2 + // Do("git", 3) => ["", "-shell"], 3 + Do(line []rune, pos int) (newLine [][]rune, length int) +} + +type TabCompleter struct{} + +func (t *TabCompleter) Do([]rune, int) ([][]rune, int) { + return [][]rune{[]rune("\t")}, 0 +} + +type opCompleter struct { + w io.Writer + op *Operation + width int + + inCompleteMode bool + inSelectMode bool + candidate [][]rune + candidateSource []rune + candidateOff int + candidateChoise int + candidateColNum int +} + +func newOpCompleter(w io.Writer, op *Operation, width int) *opCompleter { + return &opCompleter{ + w: w, + op: op, + width: width, + } +} + +func (o *opCompleter) doSelect() { + if len(o.candidate) == 1 { + o.op.buf.WriteRunes(o.candidate[0]) + o.ExitCompleteMode(false) + return + } + o.nextCandidate(1) + o.CompleteRefresh() +} + +func (o *opCompleter) nextCandidate(i int) { + o.candidateChoise += i + o.candidateChoise = o.candidateChoise % len(o.candidate) + if o.candidateChoise < 0 { + o.candidateChoise = len(o.candidate) + o.candidateChoise + } +} + +func (o *opCompleter) OnComplete() bool { + if o.width == 0 { + return false + } + if o.IsInCompleteSelectMode() { + o.doSelect() + return true + } + + buf := o.op.buf + rs := buf.Runes() + + if o.IsInCompleteMode() && o.candidateSource != nil && runes.Equal(rs, o.candidateSource) { + o.EnterCompleteSelectMode() + o.doSelect() + return true + } + + o.ExitCompleteSelectMode() + o.candidateSource = rs + newLines, offset := o.op.cfg.AutoComplete.Do(rs, buf.idx) + if len(newLines) == 0 { + o.ExitCompleteMode(false) + return true + } + + // only Aggregate candidates in non-complete mode + if !o.IsInCompleteMode() { + if len(newLines) == 1 { + buf.WriteRunes(newLines[0]) + o.ExitCompleteMode(false) + return true + } + + same, size := runes.Aggregate(newLines) + if size > 0 { + buf.WriteRunes(same) + o.ExitCompleteMode(false) + return true + } + } + + o.EnterCompleteMode(offset, newLines) + return true +} + +func (o *opCompleter) IsInCompleteSelectMode() bool { + return o.inSelectMode +} + +func (o *opCompleter) IsInCompleteMode() bool { + return o.inCompleteMode +} + +func (o *opCompleter) HandleCompleteSelect(r rune) bool { + next := true + switch r { + case CharEnter, CharCtrlJ: + next = false + o.op.buf.WriteRunes(o.op.candidate[o.op.candidateChoise]) + o.ExitCompleteMode(false) + case CharLineStart: + num := o.candidateChoise % o.candidateColNum + o.nextCandidate(-num) + case CharLineEnd: + num := o.candidateColNum - o.candidateChoise%o.candidateColNum - 1 + o.candidateChoise += num + if o.candidateChoise >= len(o.candidate) { + o.candidateChoise = len(o.candidate) - 1 + } + case CharBackspace: + o.ExitCompleteSelectMode() + next = false + case CharTab, CharForward: + o.doSelect() + case CharBell, CharInterrupt: + o.ExitCompleteMode(true) + next = false + case CharNext: + tmpChoise := o.candidateChoise + o.candidateColNum + if tmpChoise >= o.getMatrixSize() { + tmpChoise -= o.getMatrixSize() + } else if tmpChoise >= len(o.candidate) { + tmpChoise += o.candidateColNum + tmpChoise -= o.getMatrixSize() + } + o.candidateChoise = tmpChoise + case CharBackward: + o.nextCandidate(-1) + case CharPrev: + tmpChoise := o.candidateChoise - o.candidateColNum + if tmpChoise < 0 { + tmpChoise += o.getMatrixSize() + if tmpChoise >= len(o.candidate) { + tmpChoise -= o.candidateColNum + } + } + o.candidateChoise = tmpChoise + default: + next = false + o.ExitCompleteSelectMode() + } + if next { + o.CompleteRefresh() + return true + } + return false +} + +func (o *opCompleter) getMatrixSize() int { + line := len(o.candidate) / o.candidateColNum + if len(o.candidate)%o.candidateColNum != 0 { + line++ + } + return line * o.candidateColNum +} + +func (o *opCompleter) OnWidthChange(newWidth int) { + o.width = newWidth +} + +func (o *opCompleter) CompleteRefresh() { + if !o.inCompleteMode { + return + } + lineCnt := o.op.buf.CursorLineCount() + colWidth := 0 + for _, c := range o.candidate { + w := runes.WidthAll(c) + if w > colWidth { + colWidth = w + } + } + colWidth += o.candidateOff + 1 + same := o.op.buf.RuneSlice(-o.candidateOff) + + // -1 to avoid reach the end of line + width := o.width - 1 + colNum := width / colWidth + if colNum != 0 { + colWidth += (width - (colWidth * colNum)) / colNum + } + + o.candidateColNum = colNum + buf := bufio.NewWriter(o.w) + buf.Write(bytes.Repeat([]byte("\n"), lineCnt)) + + colIdx := 0 + lines := 1 + buf.WriteString("\033[J") + for idx, c := range o.candidate { + inSelect := idx == o.candidateChoise && o.IsInCompleteSelectMode() + if inSelect { + buf.WriteString("\033[30;47m") + } + buf.WriteString(string(same)) + buf.WriteString(string(c)) + buf.Write(bytes.Repeat([]byte(" "), colWidth-len(c)-len(same))) + + if inSelect { + buf.WriteString("\033[0m") + } + + colIdx++ + if colIdx == colNum { + buf.WriteString("\n") + lines++ + colIdx = 0 + } + } + + // move back + fmt.Fprintf(buf, "\033[%dA\r", lineCnt-1+lines) + fmt.Fprintf(buf, "\033[%dC", o.op.buf.idx+o.op.buf.PromptLen()) + buf.Flush() +} + +func (o *opCompleter) aggCandidate(candidate [][]rune) int { + offset := 0 + for i := 0; i < len(candidate[0]); i++ { + for j := 0; j < len(candidate)-1; j++ { + if i > len(candidate[j]) { + goto aggregate + } + if candidate[j][i] != candidate[j+1][i] { + goto aggregate + } + } + offset = i + } +aggregate: + return offset +} + +func (o *opCompleter) EnterCompleteSelectMode() { + o.inSelectMode = true + o.candidateChoise = -1 + o.CompleteRefresh() +} + +func (o *opCompleter) EnterCompleteMode(offset int, candidate [][]rune) { + o.inCompleteMode = true + o.candidate = candidate + o.candidateOff = offset + o.CompleteRefresh() +} + +func (o *opCompleter) ExitCompleteSelectMode() { + o.inSelectMode = false + o.candidate = nil + o.candidateChoise = -1 + o.candidateOff = -1 + o.candidateSource = nil +} + +func (o *opCompleter) ExitCompleteMode(revent bool) { + o.inCompleteMode = false + o.ExitCompleteSelectMode() +} diff --git a/src/control/vendor/github.com/desertbit/readline/complete_helper.go b/src/control/vendor/github.com/desertbit/readline/complete_helper.go new file mode 100644 index 00000000000..58d724872bf --- /dev/null +++ b/src/control/vendor/github.com/desertbit/readline/complete_helper.go @@ -0,0 +1,165 @@ +package readline + +import ( + "bytes" + "strings" +) + +// Caller type for dynamic completion +type DynamicCompleteFunc func(string) []string + +type PrefixCompleterInterface interface { + Print(prefix string, level int, buf *bytes.Buffer) + Do(line []rune, pos int) (newLine [][]rune, length int) + GetName() []rune + GetChildren() []PrefixCompleterInterface + SetChildren(children []PrefixCompleterInterface) +} + +type DynamicPrefixCompleterInterface interface { + PrefixCompleterInterface + IsDynamic() bool + GetDynamicNames(line []rune) [][]rune +} + +type PrefixCompleter struct { + Name []rune + Dynamic bool + Callback DynamicCompleteFunc + Children []PrefixCompleterInterface +} + +func (p *PrefixCompleter) Tree(prefix string) string { + buf := bytes.NewBuffer(nil) + p.Print(prefix, 0, buf) + return buf.String() +} + +func Print(p PrefixCompleterInterface, prefix string, level int, buf *bytes.Buffer) { + if strings.TrimSpace(string(p.GetName())) != "" { + buf.WriteString(prefix) + if level > 0 { + buf.WriteString("├") + buf.WriteString(strings.Repeat("─", (level*4)-2)) + buf.WriteString(" ") + } + buf.WriteString(string(p.GetName()) + "\n") + level++ + } + for _, ch := range p.GetChildren() { + ch.Print(prefix, level, buf) + } +} + +func (p *PrefixCompleter) Print(prefix string, level int, buf *bytes.Buffer) { + Print(p, prefix, level, buf) +} + +func (p *PrefixCompleter) IsDynamic() bool { + return p.Dynamic +} + +func (p *PrefixCompleter) GetName() []rune { + return p.Name +} + +func (p *PrefixCompleter) GetDynamicNames(line []rune) [][]rune { + var names = [][]rune{} + for _, name := range p.Callback(string(line)) { + names = append(names, []rune(name+" ")) + } + return names +} + +func (p *PrefixCompleter) GetChildren() []PrefixCompleterInterface { + return p.Children +} + +func (p *PrefixCompleter) SetChildren(children []PrefixCompleterInterface) { + p.Children = children +} + +func NewPrefixCompleter(pc ...PrefixCompleterInterface) *PrefixCompleter { + return PcItem("", pc...) +} + +func PcItem(name string, pc ...PrefixCompleterInterface) *PrefixCompleter { + name += " " + return &PrefixCompleter{ + Name: []rune(name), + Dynamic: false, + Children: pc, + } +} + +func PcItemDynamic(callback DynamicCompleteFunc, pc ...PrefixCompleterInterface) *PrefixCompleter { + return &PrefixCompleter{ + Callback: callback, + Dynamic: true, + Children: pc, + } +} + +func (p *PrefixCompleter) Do(line []rune, pos int) (newLine [][]rune, offset int) { + return doInternal(p, line, pos, line) +} + +func Do(p PrefixCompleterInterface, line []rune, pos int) (newLine [][]rune, offset int) { + return doInternal(p, line, pos, line) +} + +func doInternal(p PrefixCompleterInterface, line []rune, pos int, origLine []rune) (newLine [][]rune, offset int) { + line = runes.TrimSpaceLeft(line[:pos]) + goNext := false + var lineCompleter PrefixCompleterInterface + for _, child := range p.GetChildren() { + childNames := make([][]rune, 1) + + childDynamic, ok := child.(DynamicPrefixCompleterInterface) + if ok && childDynamic.IsDynamic() { + childNames = childDynamic.GetDynamicNames(origLine) + } else { + childNames[0] = child.GetName() + } + + for _, childName := range childNames { + if len(line) >= len(childName) { + if runes.HasPrefix(line, childName) { + if len(line) == len(childName) { + newLine = append(newLine, []rune{' '}) + } else { + newLine = append(newLine, childName) + } + offset = len(childName) + lineCompleter = child + goNext = true + } + } else { + if runes.HasPrefix(childName, line) { + newLine = append(newLine, childName[len(line):]) + offset = len(line) + lineCompleter = child + } + } + } + } + + if len(newLine) != 1 { + return + } + + tmpLine := make([]rune, 0, len(line)) + for i := offset; i < len(line); i++ { + if line[i] == ' ' { + continue + } + + tmpLine = append(tmpLine, line[i:]...) + return doInternal(lineCompleter, tmpLine, len(tmpLine), origLine) + } + + if goNext { + return doInternal(lineCompleter, nil, 0, origLine) + } + return +} diff --git a/src/control/vendor/github.com/desertbit/readline/complete_segment.go b/src/control/vendor/github.com/desertbit/readline/complete_segment.go new file mode 100644 index 00000000000..5ceadd80f97 --- /dev/null +++ b/src/control/vendor/github.com/desertbit/readline/complete_segment.go @@ -0,0 +1,82 @@ +package readline + +type SegmentCompleter interface { + // a + // |- a1 + // |--- a11 + // |- a2 + // b + // input: + // DoTree([], 0) [a, b] + // DoTree([a], 1) [a] + // DoTree([a, ], 0) [a1, a2] + // DoTree([a, a], 1) [a1, a2] + // DoTree([a, a1], 2) [a1] + // DoTree([a, a1, ], 0) [a11] + // DoTree([a, a1, a], 1) [a11] + DoSegment([][]rune, int) [][]rune +} + +type dumpSegmentCompleter struct { + f func([][]rune, int) [][]rune +} + +func (d *dumpSegmentCompleter) DoSegment(segment [][]rune, n int) [][]rune { + return d.f(segment, n) +} + +func SegmentFunc(f func([][]rune, int) [][]rune) AutoCompleter { + return &SegmentComplete{&dumpSegmentCompleter{f}} +} + +func SegmentAutoComplete(completer SegmentCompleter) *SegmentComplete { + return &SegmentComplete{ + SegmentCompleter: completer, + } +} + +type SegmentComplete struct { + SegmentCompleter +} + +func RetSegment(segments [][]rune, cands [][]rune, idx int) ([][]rune, int) { + ret := make([][]rune, 0, len(cands)) + lastSegment := segments[len(segments)-1] + for _, cand := range cands { + if !runes.HasPrefix(cand, lastSegment) { + continue + } + ret = append(ret, cand[len(lastSegment):]) + } + return ret, idx +} + +func SplitSegment(line []rune, pos int) ([][]rune, int) { + segs := [][]rune{} + lastIdx := -1 + line = line[:pos] + pos = 0 + for idx, l := range line { + if l == ' ' { + pos = 0 + segs = append(segs, line[lastIdx+1:idx]) + lastIdx = idx + } else { + pos++ + } + } + segs = append(segs, line[lastIdx+1:]) + return segs, pos +} + +func (c *SegmentComplete) Do(line []rune, pos int) (newLine [][]rune, offset int) { + + segment, idx := SplitSegment(line, pos) + + cands := c.DoSegment(segment, idx) + newLine, offset = RetSegment(segment, cands, idx) + for idx := range newLine { + newLine[idx] = append(newLine[idx], ' ') + } + return newLine, offset +} diff --git a/src/control/vendor/github.com/desertbit/readline/history.go b/src/control/vendor/github.com/desertbit/readline/history.go new file mode 100644 index 00000000000..6b17c464baf --- /dev/null +++ b/src/control/vendor/github.com/desertbit/readline/history.go @@ -0,0 +1,330 @@ +package readline + +import ( + "bufio" + "container/list" + "fmt" + "os" + "strings" + "sync" +) + +type hisItem struct { + Source []rune + Version int64 + Tmp []rune +} + +func (h *hisItem) Clean() { + h.Source = nil + h.Tmp = nil +} + +type opHistory struct { + cfg *Config + history *list.List + historyVer int64 + current *list.Element + fd *os.File + fdLock sync.Mutex + enable bool +} + +func newOpHistory(cfg *Config) (o *opHistory) { + o = &opHistory{ + cfg: cfg, + history: list.New(), + enable: true, + } + return o +} + +func (o *opHistory) Reset() { + o.history = list.New() + o.current = nil +} + +func (o *opHistory) IsHistoryClosed() bool { + o.fdLock.Lock() + defer o.fdLock.Unlock() + return o.fd.Fd() == ^(uintptr(0)) +} + +func (o *opHistory) Init() { + if o.IsHistoryClosed() { + o.initHistory() + } +} + +func (o *opHistory) initHistory() { + if o.cfg.HistoryFile != "" { + o.historyUpdatePath(o.cfg.HistoryFile) + } +} + +// only called by newOpHistory +func (o *opHistory) historyUpdatePath(path string) { + o.fdLock.Lock() + defer o.fdLock.Unlock() + f, err := os.OpenFile(path, os.O_APPEND|os.O_CREATE|os.O_RDWR, 0666) + if err != nil { + return + } + o.fd = f + r := bufio.NewReader(o.fd) + total := 0 + for ; ; total++ { + line, err := r.ReadString('\n') + if err != nil { + break + } + // ignore the empty line + line = strings.TrimSpace(line) + if len(line) == 0 { + continue + } + o.Push([]rune(line)) + o.Compact() + } + if total > o.cfg.HistoryLimit { + o.rewriteLocked() + } + o.historyVer++ + o.Push(nil) + return +} + +func (o *opHistory) Compact() { + for o.history.Len() > o.cfg.HistoryLimit && o.history.Len() > 0 { + o.history.Remove(o.history.Front()) + } +} + +func (o *opHistory) Rewrite() { + o.fdLock.Lock() + defer o.fdLock.Unlock() + o.rewriteLocked() +} + +func (o *opHistory) rewriteLocked() { + if o.cfg.HistoryFile == "" { + return + } + + tmpFile := o.cfg.HistoryFile + ".tmp" + fd, err := os.OpenFile(tmpFile, os.O_CREATE|os.O_WRONLY|os.O_TRUNC|os.O_APPEND, 0666) + if err != nil { + return + } + + buf := bufio.NewWriter(fd) + for elem := o.history.Front(); elem != nil; elem = elem.Next() { + buf.WriteString(string(elem.Value.(*hisItem).Source) + "\n") + } + buf.Flush() + + // replace history file + if err = os.Rename(tmpFile, o.cfg.HistoryFile); err != nil { + fd.Close() + return + } + + if o.fd != nil { + o.fd.Close() + } + // fd is write only, just satisfy what we need. + o.fd = fd +} + +func (o *opHistory) Close() { + o.fdLock.Lock() + defer o.fdLock.Unlock() + if o.fd != nil { + o.fd.Close() + } +} + +func (o *opHistory) FindBck(isNewSearch bool, rs []rune, start int) (int, *list.Element) { + for elem := o.current; elem != nil; elem = elem.Prev() { + item := o.showItem(elem.Value) + if isNewSearch { + start += len(rs) + } + if elem == o.current { + if len(item) >= start { + item = item[:start] + } + } + idx := runes.IndexAllBckEx(item, rs, o.cfg.HistorySearchFold) + if idx < 0 { + continue + } + return idx, elem + } + return -1, nil +} + +func (o *opHistory) FindFwd(isNewSearch bool, rs []rune, start int) (int, *list.Element) { + for elem := o.current; elem != nil; elem = elem.Next() { + item := o.showItem(elem.Value) + if isNewSearch { + start -= len(rs) + if start < 0 { + start = 0 + } + } + if elem == o.current { + if len(item)-1 >= start { + item = item[start:] + } else { + continue + } + } + idx := runes.IndexAllEx(item, rs, o.cfg.HistorySearchFold) + if idx < 0 { + continue + } + if elem == o.current { + idx += start + } + return idx, elem + } + return -1, nil +} + +func (o *opHistory) showItem(obj interface{}) []rune { + item := obj.(*hisItem) + if item.Version == o.historyVer { + return item.Tmp + } + return item.Source +} + +func (o *opHistory) Prev() []rune { + if o.current == nil { + return nil + } + current := o.current.Prev() + if current == nil { + return nil + } + o.current = current + return runes.Copy(o.showItem(current.Value)) +} + +func (o *opHistory) Next() ([]rune, bool) { + if o.current == nil { + return nil, false + } + current := o.current.Next() + if current == nil { + return nil, false + } + + o.current = current + return runes.Copy(o.showItem(current.Value)), true +} + +// Disable the current history +func (o *opHistory) Disable() { + o.enable = false +} + +// Enable the current history +func (o *opHistory) Enable() { + o.enable = true +} + +func (o *opHistory) debug() { + Debug("-------") + for item := o.history.Front(); item != nil; item = item.Next() { + Debug(fmt.Sprintf("%+v", item.Value)) + } +} + +// save history +func (o *opHistory) New(current []rune) (err error) { + + // history deactivated + if !o.enable { + return nil + } + + current = runes.Copy(current) + + // if just use last command without modify + // just clean lastest history + if back := o.history.Back(); back != nil { + prev := back.Prev() + if prev != nil { + if runes.Equal(current, prev.Value.(*hisItem).Source) { + o.current = o.history.Back() + o.current.Value.(*hisItem).Clean() + o.historyVer++ + return nil + } + } + } + + if len(current) == 0 { + o.current = o.history.Back() + if o.current != nil { + o.current.Value.(*hisItem).Clean() + o.historyVer++ + return nil + } + } + + if o.current != o.history.Back() { + // move history item to current command + currentItem := o.current.Value.(*hisItem) + // set current to last item + o.current = o.history.Back() + + current = runes.Copy(currentItem.Tmp) + } + + // err only can be a IO error, just report + err = o.Update(current, true) + + // push a new one to commit current command + o.historyVer++ + o.Push(nil) + return +} + +func (o *opHistory) Revert() { + o.historyVer++ + o.current = o.history.Back() +} + +func (o *opHistory) Update(s []rune, commit bool) (err error) { + o.fdLock.Lock() + defer o.fdLock.Unlock() + s = runes.Copy(s) + if o.current == nil { + o.Push(s) + o.Compact() + return + } + r := o.current.Value.(*hisItem) + r.Version = o.historyVer + if commit { + r.Source = s + if o.fd != nil { + // just report the error + _, err = o.fd.Write([]byte(string(r.Source) + "\n")) + } + } else { + r.Tmp = append(r.Tmp[:0], s...) + } + o.current.Value = r + o.Compact() + return +} + +func (o *opHistory) Push(s []rune) { + s = runes.Copy(s) + elem := o.history.PushBack(&hisItem{Source: s}) + o.current = elem +} diff --git a/src/control/vendor/github.com/desertbit/readline/operation.go b/src/control/vendor/github.com/desertbit/readline/operation.go new file mode 100644 index 00000000000..4c31624f806 --- /dev/null +++ b/src/control/vendor/github.com/desertbit/readline/operation.go @@ -0,0 +1,531 @@ +package readline + +import ( + "errors" + "io" + "sync" +) + +var ( + ErrInterrupt = errors.New("Interrupt") +) + +type InterruptError struct { + Line []rune +} + +func (*InterruptError) Error() string { + return "Interrupted" +} + +type Operation struct { + m sync.Mutex + cfg *Config + t *Terminal + buf *RuneBuffer + outchan chan []rune + errchan chan error + w io.Writer + + history *opHistory + *opSearch + *opCompleter + *opPassword + *opVim +} + +func (o *Operation) SetBuffer(what string) { + o.buf.Set([]rune(what)) +} + +type wrapWriter struct { + r *Operation + t *Terminal + target io.Writer +} + +func (w *wrapWriter) Write(b []byte) (int, error) { + if !w.t.IsReading() { + return w.target.Write(b) + } + + var ( + n int + err error + ) + w.r.buf.Refresh(func() { + n, err = w.target.Write(b) + }) + + if w.r.IsSearchMode() { + w.r.SearchRefresh(-1) + } + if w.r.IsInCompleteMode() { + w.r.CompleteRefresh() + } + return n, err +} + +func NewOperation(t *Terminal, cfg *Config) *Operation { + width := cfg.FuncGetWidth() + op := &Operation{ + t: t, + buf: NewRuneBuffer(t, cfg.Prompt, cfg, width), + outchan: make(chan []rune), + errchan: make(chan error, 1), + } + op.w = op.buf.w + op.SetConfig(cfg) + op.opVim = newVimMode(op) + op.opCompleter = newOpCompleter(op.buf.w, op, width) + op.opPassword = newOpPassword(op) + op.cfg.FuncOnWidthChanged(func() { + newWidth := cfg.FuncGetWidth() + op.opCompleter.OnWidthChange(newWidth) + op.opSearch.OnWidthChange(newWidth) + op.buf.OnWidthChange(newWidth) + }) + go op.ioloop() + return op +} + +func (o *Operation) SetPrompt(s string) { + o.buf.SetPrompt(s) +} + +func (o *Operation) SetMaskRune(r rune) { + o.buf.SetMask(r) +} + +func (o *Operation) GetConfig() *Config { + o.m.Lock() + cfg := *o.cfg + o.m.Unlock() + return &cfg +} + +func (o *Operation) ioloop() { + for { + keepInSearchMode := false + keepInCompleteMode := false + r := o.t.ReadRune() + if o.GetConfig().FuncFilterInputRune != nil { + var process bool + r, process = o.GetConfig().FuncFilterInputRune(r) + if !process { + o.buf.Refresh(nil) // to refresh the line + continue // ignore this rune + } + } + + if r == 0 { // io.EOF + if o.buf.Len() == 0 { + o.buf.Clean() + select { + case o.errchan <- io.EOF: + } + break + } else { + // if stdin got io.EOF and there is something left in buffer, + // let's flush them by sending CharEnter. + // And we will got io.EOF int next loop. + r = CharEnter + } + } + isUpdateHistory := true + + if o.IsInCompleteSelectMode() { + keepInCompleteMode = o.HandleCompleteSelect(r) + if keepInCompleteMode { + continue + } + + o.buf.Refresh(nil) + switch r { + case CharEnter, CharCtrlJ: + o.history.Update(o.buf.Runes(), false) + fallthrough + case CharInterrupt: + o.t.KickRead() + fallthrough + case CharBell: + continue + } + } + + if o.IsEnableVimMode() { + r = o.HandleVim(r, o.t.ReadRune) + if r == 0 { + continue + } + } + + switch r { + case CharBell: + if o.IsSearchMode() { + o.ExitSearchMode(true) + o.buf.Refresh(nil) + } + if o.IsInCompleteMode() { + o.ExitCompleteMode(true) + o.buf.Refresh(nil) + } + case CharTab: + if o.GetConfig().AutoComplete == nil { + o.t.Bell() + break + } + if o.OnComplete() { + keepInCompleteMode = true + } else { + o.t.Bell() + break + } + + case CharBckSearch: + if !o.SearchMode(S_DIR_BCK) { + o.t.Bell() + break + } + keepInSearchMode = true + case CharCtrlU: + o.buf.KillFront() + case CharFwdSearch: + if !o.SearchMode(S_DIR_FWD) { + o.t.Bell() + break + } + keepInSearchMode = true + case CharKill: + o.buf.Kill() + keepInCompleteMode = true + case MetaForward: + o.buf.MoveToNextWord() + case CharTranspose: + o.buf.Transpose() + case MetaBackward: + o.buf.MoveToPrevWord() + case MetaDelete: + o.buf.DeleteWord() + case CharLineStart: + o.buf.MoveToLineStart() + case CharLineEnd: + o.buf.MoveToLineEnd() + case CharBackspace, CharCtrlH: + if o.IsSearchMode() { + o.SearchBackspace() + keepInSearchMode = true + break + } + + if o.buf.Len() == 0 { + o.t.Bell() + break + } + o.buf.Backspace() + if o.IsInCompleteMode() { + o.OnComplete() + } + case CharCtrlZ: + o.buf.Clean() + o.t.SleepToResume() + o.Refresh() + case CharCtrlL: + ClearScreen(o.w) + o.Refresh() + case MetaBackspace, CharCtrlW: + o.buf.BackEscapeWord() + case CharCtrlY: + o.buf.Yank() + case CharEnter, CharCtrlJ: + if o.IsSearchMode() { + o.ExitSearchMode(false) + } + o.buf.MoveToLineEnd() + var data []rune + if !o.GetConfig().UniqueEditLine { + o.buf.WriteRune('\n') + data = o.buf.Reset() + data = data[:len(data)-1] // trim \n + } else { + o.buf.Clean() + data = o.buf.Reset() + } + o.outchan <- data + if !o.GetConfig().DisableAutoSaveHistory { + // ignore IO error + _ = o.history.New(data) + } else { + isUpdateHistory = false + } + case CharBackward: + o.buf.MoveBackward() + case CharForward: + o.buf.MoveForward() + case CharPrev: + buf := o.history.Prev() + if buf != nil { + o.buf.Set(buf) + } else { + o.t.Bell() + } + case CharNext: + buf, ok := o.history.Next() + if ok { + o.buf.Set(buf) + } else { + o.t.Bell() + } + case CharDelete: + if o.buf.Len() > 0 || !o.IsNormalMode() { + o.t.KickRead() + if !o.buf.Delete() { + o.t.Bell() + } + break + } + + // treat as EOF + if !o.GetConfig().UniqueEditLine { + o.buf.WriteString(o.GetConfig().EOFPrompt + "\n") + } + o.buf.Reset() + isUpdateHistory = false + o.history.Revert() + o.errchan <- io.EOF + if o.GetConfig().UniqueEditLine { + o.buf.Clean() + } + case CharInterrupt: + if o.IsSearchMode() { + o.t.KickRead() + o.ExitSearchMode(true) + break + } + if o.IsInCompleteMode() { + o.t.KickRead() + o.ExitCompleteMode(true) + o.buf.Refresh(nil) + break + } + o.buf.MoveToLineEnd() + o.buf.Refresh(nil) + hint := o.GetConfig().InterruptPrompt + "\n" + if !o.GetConfig().UniqueEditLine { + o.buf.WriteString(hint) + } + remain := o.buf.Reset() + if !o.GetConfig().UniqueEditLine { + remain = remain[:len(remain)-len([]rune(hint))] + } + isUpdateHistory = false + o.history.Revert() + o.errchan <- &InterruptError{remain} + default: + if o.IsSearchMode() { + o.SearchChar(r) + keepInSearchMode = true + break + } + o.buf.WriteRune(r) + if o.IsInCompleteMode() { + o.OnComplete() + keepInCompleteMode = true + } + } + + listener := o.GetConfig().Listener + if listener != nil { + newLine, newPos, ok := listener.OnChange(o.buf.Runes(), o.buf.Pos(), r) + if ok { + o.buf.SetWithIdx(newPos, newLine) + } + } + + o.m.Lock() + if !keepInSearchMode && o.IsSearchMode() { + o.ExitSearchMode(false) + o.buf.Refresh(nil) + } else if o.IsInCompleteMode() { + if !keepInCompleteMode { + o.ExitCompleteMode(false) + o.Refresh() + } else { + o.buf.Refresh(nil) + o.CompleteRefresh() + } + } + if isUpdateHistory && !o.IsSearchMode() { + // it will cause null history + o.history.Update(o.buf.Runes(), false) + } + o.m.Unlock() + } +} + +func (o *Operation) Stderr() io.Writer { + return &wrapWriter{target: o.GetConfig().Stderr, r: o, t: o.t} +} + +func (o *Operation) Stdout() io.Writer { + return &wrapWriter{target: o.GetConfig().Stdout, r: o, t: o.t} +} + +func (o *Operation) String() (string, error) { + r, err := o.Runes() + return string(r), err +} + +func (o *Operation) Runes() ([]rune, error) { + o.t.EnterRawMode() + defer o.t.ExitRawMode() + + listener := o.GetConfig().Listener + if listener != nil { + listener.OnChange(nil, 0, 0) + } + + o.buf.Refresh(nil) // print prompt + o.t.KickRead() + select { + case r := <-o.outchan: + return r, nil + case err := <-o.errchan: + if e, ok := err.(*InterruptError); ok { + return e.Line, ErrInterrupt + } + return nil, err + } +} + +func (o *Operation) PasswordEx(prompt string, l Listener) ([]byte, error) { + cfg := o.GenPasswordConfig() + cfg.Prompt = prompt + cfg.Listener = l + return o.PasswordWithConfig(cfg) +} + +func (o *Operation) GenPasswordConfig() *Config { + return o.opPassword.PasswordConfig() +} + +func (o *Operation) PasswordWithConfig(cfg *Config) ([]byte, error) { + if err := o.opPassword.EnterPasswordMode(cfg); err != nil { + return nil, err + } + defer o.opPassword.ExitPasswordMode() + return o.Slice() +} + +func (o *Operation) Password(prompt string) ([]byte, error) { + return o.PasswordEx(prompt, nil) +} + +func (o *Operation) SetTitle(t string) { + o.w.Write([]byte("\033[2;" + t + "\007")) +} + +func (o *Operation) Slice() ([]byte, error) { + r, err := o.Runes() + if err != nil { + return nil, err + } + return []byte(string(r)), nil +} + +func (o *Operation) Close() { + o.history.Close() +} + +func (o *Operation) SetHistoryPath(path string) { + if o.history != nil { + o.history.Close() + } + o.cfg.HistoryFile = path + o.history = newOpHistory(o.cfg) +} + +func (o *Operation) IsNormalMode() bool { + return !o.IsInCompleteMode() && !o.IsSearchMode() +} + +func (op *Operation) SetConfig(cfg *Config) (*Config, error) { + op.m.Lock() + defer op.m.Unlock() + if op.cfg == cfg { + return op.cfg, nil + } + if err := cfg.Init(); err != nil { + return op.cfg, err + } + old := op.cfg + op.cfg = cfg + op.SetPrompt(cfg.Prompt) + op.SetMaskRune(cfg.MaskRune) + op.buf.SetConfig(cfg) + width := op.cfg.FuncGetWidth() + + if cfg.opHistory == nil { + op.SetHistoryPath(cfg.HistoryFile) + cfg.opHistory = op.history + cfg.opSearch = newOpSearch(op.buf.w, op.buf, op.history, cfg, width) + } + op.history = cfg.opHistory + + // SetHistoryPath will close opHistory which already exists + // so if we use it next time, we need to reopen it by `InitHistory()` + op.history.Init() + + if op.cfg.AutoComplete != nil { + op.opCompleter = newOpCompleter(op.buf.w, op, width) + } + + op.opSearch = cfg.opSearch + return old, nil +} + +func (o *Operation) ResetHistory() { + o.history.Reset() +} + +// if err is not nil, it just mean it fail to write to file +// other things goes fine. +func (o *Operation) SaveHistory(content string) error { + return o.history.New([]rune(content)) +} + +func (o *Operation) Refresh() { + if o.t.IsReading() { + o.buf.Refresh(nil) + } +} + +func (o *Operation) Clean() { + o.buf.Clean() +} + +func FuncListener(f func(line []rune, pos int, key rune) (newLine []rune, newPos int, ok bool)) Listener { + return &DumpListener{f: f} +} + +type DumpListener struct { + f func(line []rune, pos int, key rune) (newLine []rune, newPos int, ok bool) +} + +func (d *DumpListener) OnChange(line []rune, pos int, key rune) (newLine []rune, newPos int, ok bool) { + return d.f(line, pos, key) +} + +type Listener interface { + OnChange(line []rune, pos int, key rune) (newLine []rune, newPos int, ok bool) +} + +type Painter interface { + Paint(line []rune, pos int) []rune +} + +type defaultPainter struct{} + +func (p *defaultPainter) Paint(line []rune, _ int) []rune { + return line +} diff --git a/src/control/vendor/github.com/desertbit/readline/password.go b/src/control/vendor/github.com/desertbit/readline/password.go new file mode 100644 index 00000000000..414288c2a50 --- /dev/null +++ b/src/control/vendor/github.com/desertbit/readline/password.go @@ -0,0 +1,33 @@ +package readline + +type opPassword struct { + o *Operation + backupCfg *Config +} + +func newOpPassword(o *Operation) *opPassword { + return &opPassword{o: o} +} + +func (o *opPassword) ExitPasswordMode() { + o.o.SetConfig(o.backupCfg) + o.backupCfg = nil +} + +func (o *opPassword) EnterPasswordMode(cfg *Config) (err error) { + o.backupCfg, err = o.o.SetConfig(cfg) + return +} + +func (o *opPassword) PasswordConfig() *Config { + return &Config{ + EnableMask: true, + InterruptPrompt: "\n", + EOFPrompt: "\n", + HistoryLimit: -1, + Painter: &defaultPainter{}, + + Stdout: o.o.cfg.Stdout, + Stderr: o.o.cfg.Stderr, + } +} diff --git a/src/control/vendor/github.com/desertbit/readline/rawreader_windows.go b/src/control/vendor/github.com/desertbit/readline/rawreader_windows.go new file mode 100644 index 00000000000..073ef150a59 --- /dev/null +++ b/src/control/vendor/github.com/desertbit/readline/rawreader_windows.go @@ -0,0 +1,125 @@ +// +build windows + +package readline + +import "unsafe" + +const ( + VK_CANCEL = 0x03 + VK_BACK = 0x08 + VK_TAB = 0x09 + VK_RETURN = 0x0D + VK_SHIFT = 0x10 + VK_CONTROL = 0x11 + VK_MENU = 0x12 + VK_ESCAPE = 0x1B + VK_LEFT = 0x25 + VK_UP = 0x26 + VK_RIGHT = 0x27 + VK_DOWN = 0x28 + VK_DELETE = 0x2E + VK_LSHIFT = 0xA0 + VK_RSHIFT = 0xA1 + VK_LCONTROL = 0xA2 + VK_RCONTROL = 0xA3 +) + +// RawReader translate input record to ANSI escape sequence. +// To provides same behavior as unix terminal. +type RawReader struct { + ctrlKey bool + altKey bool +} + +func NewRawReader() *RawReader { + r := new(RawReader) + return r +} + +// only process one action in one read +func (r *RawReader) Read(buf []byte) (int, error) { + ir := new(_INPUT_RECORD) + var read int + var err error +next: + err = kernel.ReadConsoleInputW(stdin, + uintptr(unsafe.Pointer(ir)), + 1, + uintptr(unsafe.Pointer(&read)), + ) + if err != nil { + return 0, err + } + if ir.EventType != EVENT_KEY { + goto next + } + ker := (*_KEY_EVENT_RECORD)(unsafe.Pointer(&ir.Event[0])) + if ker.bKeyDown == 0 { // keyup + if r.ctrlKey || r.altKey { + switch ker.wVirtualKeyCode { + case VK_RCONTROL, VK_LCONTROL: + r.ctrlKey = false + case VK_MENU: //alt + r.altKey = false + } + } + goto next + } + + if ker.unicodeChar == 0 { + var target rune + switch ker.wVirtualKeyCode { + case VK_RCONTROL, VK_LCONTROL: + r.ctrlKey = true + case VK_MENU: //alt + r.altKey = true + case VK_LEFT: + target = CharBackward + case VK_RIGHT: + target = CharForward + case VK_UP: + target = CharPrev + case VK_DOWN: + target = CharNext + } + if target != 0 { + return r.write(buf, target) + } + goto next + } + char := rune(ker.unicodeChar) + if r.ctrlKey { + switch char { + case 'A': + char = CharLineStart + case 'E': + char = CharLineEnd + case 'R': + char = CharBckSearch + case 'S': + char = CharFwdSearch + } + } else if r.altKey { + switch char { + case VK_BACK: + char = CharBackspace + } + return r.writeEsc(buf, char) + } + return r.write(buf, char) +} + +func (r *RawReader) writeEsc(b []byte, char rune) (int, error) { + b[0] = '\033' + n := copy(b[1:], []byte(string(char))) + return n + 1, nil +} + +func (r *RawReader) write(b []byte, char rune) (int, error) { + n := copy(b, []byte(string(char))) + return n, nil +} + +func (r *RawReader) Close() error { + return nil +} diff --git a/src/control/vendor/github.com/desertbit/readline/readline.go b/src/control/vendor/github.com/desertbit/readline/readline.go new file mode 100644 index 00000000000..0e7aca06d5a --- /dev/null +++ b/src/control/vendor/github.com/desertbit/readline/readline.go @@ -0,0 +1,326 @@ +// Readline is a pure go implementation for GNU-Readline kind library. +// +// example: +// rl, err := readline.New("> ") +// if err != nil { +// panic(err) +// } +// defer rl.Close() +// +// for { +// line, err := rl.Readline() +// if err != nil { // io.EOF +// break +// } +// println(line) +// } +// +package readline + +import "io" + +type Instance struct { + Config *Config + Terminal *Terminal + Operation *Operation +} + +type Config struct { + // prompt supports ANSI escape sequence, so we can color some characters even in windows + Prompt string + + // readline will persist historys to file where HistoryFile specified + HistoryFile string + // specify the max length of historys, it's 500 by default, set it to -1 to disable history + HistoryLimit int + DisableAutoSaveHistory bool + // enable case-insensitive history searching + HistorySearchFold bool + + // AutoCompleter will called once user press TAB + AutoComplete AutoCompleter + + // Any key press will pass to Listener + // NOTE: Listener will be triggered by (nil, 0, 0) immediately + Listener Listener + + Painter Painter + + // If VimMode is true, readline will in vim.insert mode by default + VimMode bool + + InterruptPrompt string + EOFPrompt string + + FuncGetWidth func() int + + Stdin io.ReadCloser + StdinWriter io.Writer + Stdout io.Writer + Stderr io.Writer + + EnableMask bool + MaskRune rune + + // erase the editing line after user submited it + // it use in IM usually. + UniqueEditLine bool + + // filter input runes (may be used to disable CtrlZ or for translating some keys to different actions) + // -> output = new (translated) rune and true/false if continue with processing this one + FuncFilterInputRune func(rune) (rune, bool) + + // force use interactive even stdout is not a tty + FuncIsTerminal func() bool + FuncMakeRaw func() error + FuncExitRaw func() error + FuncOnWidthChanged func(func()) + ForceUseInteractive bool + + // private fields + inited bool + opHistory *opHistory + opSearch *opSearch +} + +func (c *Config) useInteractive() bool { + if c.ForceUseInteractive { + return true + } + return c.FuncIsTerminal() +} + +func (c *Config) Init() error { + if c.inited { + return nil + } + c.inited = true + if c.Stdin == nil { + c.Stdin = NewCancelableStdin(Stdin) + } + + c.Stdin, c.StdinWriter = NewFillableStdin(c.Stdin) + + if c.Stdout == nil { + c.Stdout = Stdout + } + if c.Stderr == nil { + c.Stderr = Stderr + } + if c.HistoryLimit == 0 { + c.HistoryLimit = 500 + } + + if c.InterruptPrompt == "" { + c.InterruptPrompt = "^C" + } else if c.InterruptPrompt == "\n" { + c.InterruptPrompt = "" + } + if c.EOFPrompt == "" { + c.EOFPrompt = "^D" + } else if c.EOFPrompt == "\n" { + c.EOFPrompt = "" + } + + if c.AutoComplete == nil { + c.AutoComplete = &TabCompleter{} + } + if c.FuncGetWidth == nil { + c.FuncGetWidth = GetScreenWidth + } + if c.FuncIsTerminal == nil { + c.FuncIsTerminal = DefaultIsTerminal + } + rm := new(RawMode) + if c.FuncMakeRaw == nil { + c.FuncMakeRaw = rm.Enter + } + if c.FuncExitRaw == nil { + c.FuncExitRaw = rm.Exit + } + if c.FuncOnWidthChanged == nil { + c.FuncOnWidthChanged = DefaultOnWidthChanged + } + + return nil +} + +func (c Config) Clone() *Config { + c.opHistory = nil + c.opSearch = nil + return &c +} + +func (c *Config) SetListener(f func(line []rune, pos int, key rune) (newLine []rune, newPos int, ok bool)) { + c.Listener = FuncListener(f) +} + +func (c *Config) SetPainter(p Painter) { + c.Painter = p +} + +func NewEx(cfg *Config) (*Instance, error) { + t, err := NewTerminal(cfg) + if err != nil { + return nil, err + } + rl := t.Readline() + if cfg.Painter == nil { + cfg.Painter = &defaultPainter{} + } + return &Instance{ + Config: cfg, + Terminal: t, + Operation: rl, + }, nil +} + +func New(prompt string) (*Instance, error) { + return NewEx(&Config{Prompt: prompt}) +} + +func (i *Instance) ResetHistory() { + i.Operation.ResetHistory() +} + +func (i *Instance) SetPrompt(s string) { + i.Operation.SetPrompt(s) +} + +func (i *Instance) SetMaskRune(r rune) { + i.Operation.SetMaskRune(r) +} + +// change history persistence in runtime +func (i *Instance) SetHistoryPath(p string) { + i.Operation.SetHistoryPath(p) +} + +// readline will refresh automatic when write through Stdout() +func (i *Instance) Stdout() io.Writer { + return i.Operation.Stdout() +} + +// readline will refresh automatic when write through Stdout() +func (i *Instance) Stderr() io.Writer { + return i.Operation.Stderr() +} + +// switch VimMode in runtime +func (i *Instance) SetVimMode(on bool) { + i.Operation.SetVimMode(on) +} + +func (i *Instance) IsVimMode() bool { + return i.Operation.IsEnableVimMode() +} + +func (i *Instance) GenPasswordConfig() *Config { + return i.Operation.GenPasswordConfig() +} + +// we can generate a config by `i.GenPasswordConfig()` +func (i *Instance) ReadPasswordWithConfig(cfg *Config) ([]byte, error) { + return i.Operation.PasswordWithConfig(cfg) +} + +func (i *Instance) ReadPasswordEx(prompt string, l Listener) ([]byte, error) { + return i.Operation.PasswordEx(prompt, l) +} + +func (i *Instance) ReadPassword(prompt string) ([]byte, error) { + return i.Operation.Password(prompt) +} + +type Result struct { + Line string + Error error +} + +func (l *Result) CanContinue() bool { + return len(l.Line) != 0 && l.Error == ErrInterrupt +} + +func (l *Result) CanBreak() bool { + return !l.CanContinue() && l.Error != nil +} + +func (i *Instance) Line() *Result { + ret, err := i.Readline() + return &Result{ret, err} +} + +// err is one of (nil, io.EOF, readline.ErrInterrupt) +func (i *Instance) Readline() (string, error) { + return i.Operation.String() +} + +func (i *Instance) ReadlineWithDefault(what string) (string, error) { + i.Operation.SetBuffer(what) + return i.Operation.String() +} + +func (i *Instance) SaveHistory(content string) error { + return i.Operation.SaveHistory(content) +} + +// same as readline +func (i *Instance) ReadSlice() ([]byte, error) { + return i.Operation.Slice() +} + +// we must make sure that call Close() before process exit. +func (i *Instance) Close() error { + if err := i.Terminal.Close(); err != nil { + return err + } + i.Config.Stdin.Close() + i.Operation.Close() + return nil +} +func (i *Instance) Clean() { + i.Operation.Clean() +} + +func (i *Instance) Write(b []byte) (int, error) { + return i.Stdout().Write(b) +} + +// WriteStdin prefill the next Stdin fetch +// Next time you call ReadLine() this value will be writen before the user input +// ie : +// i := readline.New() +// i.WriteStdin([]byte("test")) +// _, _= i.Readline() +// +// gives +// +// > test[cursor] +func (i *Instance) WriteStdin(val []byte) (int, error) { + return i.Terminal.WriteStdin(val) +} + +func (i *Instance) SetConfig(cfg *Config) *Config { + if i.Config == cfg { + return cfg + } + old := i.Config + i.Config = cfg + i.Operation.SetConfig(cfg) + i.Terminal.SetConfig(cfg) + return old +} + +func (i *Instance) Refresh() { + i.Operation.Refresh() +} + +// HistoryDisable the save of the commands into the history +func (i *Instance) HistoryDisable() { + i.Operation.history.Disable() +} + +// HistoryEnable the save of the commands into the history (default on) +func (i *Instance) HistoryEnable() { + i.Operation.history.Enable() +} diff --git a/src/control/vendor/github.com/desertbit/readline/remote.go b/src/control/vendor/github.com/desertbit/readline/remote.go new file mode 100644 index 00000000000..74dbf569022 --- /dev/null +++ b/src/control/vendor/github.com/desertbit/readline/remote.go @@ -0,0 +1,475 @@ +package readline + +import ( + "bufio" + "bytes" + "encoding/binary" + "fmt" + "io" + "net" + "os" + "sync" + "sync/atomic" +) + +type MsgType int16 + +const ( + T_DATA = MsgType(iota) + T_WIDTH + T_WIDTH_REPORT + T_ISTTY_REPORT + T_RAW + T_ERAW // exit raw + T_EOF +) + +type RemoteSvr struct { + eof int32 + closed int32 + width int32 + reciveChan chan struct{} + writeChan chan *writeCtx + conn net.Conn + isTerminal bool + funcWidthChan func() + stopChan chan struct{} + + dataBufM sync.Mutex + dataBuf bytes.Buffer +} + +type writeReply struct { + n int + err error +} + +type writeCtx struct { + msg *Message + reply chan *writeReply +} + +func newWriteCtx(msg *Message) *writeCtx { + return &writeCtx{ + msg: msg, + reply: make(chan *writeReply), + } +} + +func NewRemoteSvr(conn net.Conn) (*RemoteSvr, error) { + rs := &RemoteSvr{ + width: -1, + conn: conn, + writeChan: make(chan *writeCtx), + reciveChan: make(chan struct{}), + stopChan: make(chan struct{}), + } + buf := bufio.NewReader(rs.conn) + + if err := rs.init(buf); err != nil { + return nil, err + } + + go rs.readLoop(buf) + go rs.writeLoop() + return rs, nil +} + +func (r *RemoteSvr) init(buf *bufio.Reader) error { + m, err := ReadMessage(buf) + if err != nil { + return err + } + // receive isTerminal + if m.Type != T_ISTTY_REPORT { + return fmt.Errorf("unexpected init message") + } + r.GotIsTerminal(m.Data) + + // receive width + m, err = ReadMessage(buf) + if err != nil { + return err + } + if m.Type != T_WIDTH_REPORT { + return fmt.Errorf("unexpected init message") + } + r.GotReportWidth(m.Data) + + return nil +} + +func (r *RemoteSvr) HandleConfig(cfg *Config) { + cfg.Stderr = r + cfg.Stdout = r + cfg.Stdin = r + cfg.FuncExitRaw = r.ExitRawMode + cfg.FuncIsTerminal = r.IsTerminal + cfg.FuncMakeRaw = r.EnterRawMode + cfg.FuncExitRaw = r.ExitRawMode + cfg.FuncGetWidth = r.GetWidth + cfg.FuncOnWidthChanged = func(f func()) { + r.funcWidthChan = f + } +} + +func (r *RemoteSvr) IsTerminal() bool { + return r.isTerminal +} + +func (r *RemoteSvr) checkEOF() error { + if atomic.LoadInt32(&r.eof) == 1 { + return io.EOF + } + return nil +} + +func (r *RemoteSvr) Read(b []byte) (int, error) { + r.dataBufM.Lock() + n, err := r.dataBuf.Read(b) + r.dataBufM.Unlock() + if n == 0 { + if err := r.checkEOF(); err != nil { + return 0, err + } + } + + if n == 0 && err == io.EOF { + <-r.reciveChan + r.dataBufM.Lock() + n, err = r.dataBuf.Read(b) + r.dataBufM.Unlock() + } + if n == 0 { + if err := r.checkEOF(); err != nil { + return 0, err + } + } + + return n, err +} + +func (r *RemoteSvr) writeMsg(m *Message) error { + ctx := newWriteCtx(m) + r.writeChan <- ctx + reply := <-ctx.reply + return reply.err +} + +func (r *RemoteSvr) Write(b []byte) (int, error) { + ctx := newWriteCtx(NewMessage(T_DATA, b)) + r.writeChan <- ctx + reply := <-ctx.reply + return reply.n, reply.err +} + +func (r *RemoteSvr) EnterRawMode() error { + return r.writeMsg(NewMessage(T_RAW, nil)) +} + +func (r *RemoteSvr) ExitRawMode() error { + return r.writeMsg(NewMessage(T_ERAW, nil)) +} + +func (r *RemoteSvr) writeLoop() { + defer r.Close() + +loop: + for { + select { + case ctx, ok := <-r.writeChan: + if !ok { + break + } + n, err := ctx.msg.WriteTo(r.conn) + ctx.reply <- &writeReply{n, err} + case <-r.stopChan: + break loop + } + } +} + +func (r *RemoteSvr) Close() error { + if atomic.CompareAndSwapInt32(&r.closed, 0, 1) { + close(r.stopChan) + r.conn.Close() + } + return nil +} + +func (r *RemoteSvr) readLoop(buf *bufio.Reader) { + defer r.Close() + for { + m, err := ReadMessage(buf) + if err != nil { + break + } + switch m.Type { + case T_EOF: + atomic.StoreInt32(&r.eof, 1) + select { + case r.reciveChan <- struct{}{}: + default: + } + case T_DATA: + r.dataBufM.Lock() + r.dataBuf.Write(m.Data) + r.dataBufM.Unlock() + select { + case r.reciveChan <- struct{}{}: + default: + } + case T_WIDTH_REPORT: + r.GotReportWidth(m.Data) + case T_ISTTY_REPORT: + r.GotIsTerminal(m.Data) + } + } +} + +func (r *RemoteSvr) GotIsTerminal(data []byte) { + if binary.BigEndian.Uint16(data) == 0 { + r.isTerminal = false + } else { + r.isTerminal = true + } +} + +func (r *RemoteSvr) GotReportWidth(data []byte) { + atomic.StoreInt32(&r.width, int32(binary.BigEndian.Uint16(data))) + if r.funcWidthChan != nil { + r.funcWidthChan() + } +} + +func (r *RemoteSvr) GetWidth() int { + return int(atomic.LoadInt32(&r.width)) +} + +// ----------------------------------------------------------------------------- + +type Message struct { + Type MsgType + Data []byte +} + +func ReadMessage(r io.Reader) (*Message, error) { + m := new(Message) + var length int32 + if err := binary.Read(r, binary.BigEndian, &length); err != nil { + return nil, err + } + if err := binary.Read(r, binary.BigEndian, &m.Type); err != nil { + return nil, err + } + m.Data = make([]byte, int(length)-2) + if _, err := io.ReadFull(r, m.Data); err != nil { + return nil, err + } + return m, nil +} + +func NewMessage(t MsgType, data []byte) *Message { + return &Message{t, data} +} + +func (m *Message) WriteTo(w io.Writer) (int, error) { + buf := bytes.NewBuffer(make([]byte, 0, len(m.Data)+2+4)) + binary.Write(buf, binary.BigEndian, int32(len(m.Data)+2)) + binary.Write(buf, binary.BigEndian, m.Type) + buf.Write(m.Data) + n, err := buf.WriteTo(w) + return int(n), err +} + +// ----------------------------------------------------------------------------- + +type RemoteCli struct { + conn net.Conn + raw RawMode + receiveChan chan struct{} + inited int32 + isTerminal *bool + + data bytes.Buffer + dataM sync.Mutex +} + +func NewRemoteCli(conn net.Conn) (*RemoteCli, error) { + r := &RemoteCli{ + conn: conn, + receiveChan: make(chan struct{}), + } + return r, nil +} + +func (r *RemoteCli) MarkIsTerminal(is bool) { + r.isTerminal = &is +} + +func (r *RemoteCli) init() error { + if !atomic.CompareAndSwapInt32(&r.inited, 0, 1) { + return nil + } + + if err := r.reportIsTerminal(); err != nil { + return err + } + + if err := r.reportWidth(); err != nil { + return err + } + + // register sig for width changed + DefaultOnWidthChanged(func() { + r.reportWidth() + }) + return nil +} + +func (r *RemoteCli) writeMsg(m *Message) error { + r.dataM.Lock() + _, err := m.WriteTo(r.conn) + r.dataM.Unlock() + return err +} + +func (r *RemoteCli) Write(b []byte) (int, error) { + m := NewMessage(T_DATA, b) + r.dataM.Lock() + _, err := m.WriteTo(r.conn) + r.dataM.Unlock() + return len(b), err +} + +func (r *RemoteCli) reportWidth() error { + screenWidth := GetScreenWidth() + data := make([]byte, 2) + binary.BigEndian.PutUint16(data, uint16(screenWidth)) + msg := NewMessage(T_WIDTH_REPORT, data) + + if err := r.writeMsg(msg); err != nil { + return err + } + return nil +} + +func (r *RemoteCli) reportIsTerminal() error { + var isTerminal bool + if r.isTerminal != nil { + isTerminal = *r.isTerminal + } else { + isTerminal = DefaultIsTerminal() + } + data := make([]byte, 2) + if isTerminal { + binary.BigEndian.PutUint16(data, 1) + } else { + binary.BigEndian.PutUint16(data, 0) + } + msg := NewMessage(T_ISTTY_REPORT, data) + if err := r.writeMsg(msg); err != nil { + return err + } + return nil +} + +func (r *RemoteCli) readLoop() { + buf := bufio.NewReader(r.conn) + for { + msg, err := ReadMessage(buf) + if err != nil { + break + } + switch msg.Type { + case T_ERAW: + r.raw.Exit() + case T_RAW: + r.raw.Enter() + case T_DATA: + os.Stdout.Write(msg.Data) + } + } +} + +func (r *RemoteCli) ServeBy(source io.Reader) error { + if err := r.init(); err != nil { + return err + } + + go func() { + defer r.Close() + for { + n, _ := io.Copy(r, source) + if n == 0 { + break + } + } + }() + defer r.raw.Exit() + r.readLoop() + return nil +} + +func (r *RemoteCli) Close() { + r.writeMsg(NewMessage(T_EOF, nil)) +} + +func (r *RemoteCli) Serve() error { + return r.ServeBy(os.Stdin) +} + +func ListenRemote(n, addr string, cfg *Config, h func(*Instance), onListen ...func(net.Listener) error) error { + ln, err := net.Listen(n, addr) + if err != nil { + return err + } + if len(onListen) > 0 { + if err := onListen[0](ln); err != nil { + return err + } + } + for { + conn, err := ln.Accept() + if err != nil { + break + } + go func() { + defer conn.Close() + rl, err := HandleConn(*cfg, conn) + if err != nil { + return + } + h(rl) + }() + } + return nil +} + +func HandleConn(cfg Config, conn net.Conn) (*Instance, error) { + r, err := NewRemoteSvr(conn) + if err != nil { + return nil, err + } + r.HandleConfig(&cfg) + + rl, err := NewEx(&cfg) + if err != nil { + return nil, err + } + return rl, nil +} + +func DialRemote(n, addr string) error { + conn, err := net.Dial(n, addr) + if err != nil { + return err + } + defer conn.Close() + + cli, err := NewRemoteCli(conn) + if err != nil { + return err + } + return cli.Serve() +} diff --git a/src/control/vendor/github.com/desertbit/readline/runebuf.go b/src/control/vendor/github.com/desertbit/readline/runebuf.go new file mode 100644 index 00000000000..727c250e4ed --- /dev/null +++ b/src/control/vendor/github.com/desertbit/readline/runebuf.go @@ -0,0 +1,630 @@ +package readline + +import ( + "bufio" + "bytes" + "io" + "strconv" + "strings" + "sync" +) + +type runeBufferBck struct { + buf []rune + idx int +} + +type RuneBuffer struct { + buf []rune + idx int + prompt []rune + w io.Writer + + hadClean bool + interactive bool + cfg *Config + + width int + + bck *runeBufferBck + + offset string + + lastKill []rune + + sync.Mutex +} + +func (r* RuneBuffer) pushKill(text []rune) { + r.lastKill = append([]rune{}, text...) +} + +func (r *RuneBuffer) OnWidthChange(newWidth int) { + r.Lock() + r.width = newWidth + r.Unlock() +} + +func (r *RuneBuffer) Backup() { + r.Lock() + r.bck = &runeBufferBck{r.buf, r.idx} + r.Unlock() +} + +func (r *RuneBuffer) Restore() { + r.Refresh(func() { + if r.bck == nil { + return + } + r.buf = r.bck.buf + r.idx = r.bck.idx + }) +} + +func NewRuneBuffer(w io.Writer, prompt string, cfg *Config, width int) *RuneBuffer { + rb := &RuneBuffer{ + w: w, + interactive: cfg.useInteractive(), + cfg: cfg, + width: width, + } + rb.SetPrompt(prompt) + return rb +} + +func (r *RuneBuffer) SetConfig(cfg *Config) { + r.Lock() + r.cfg = cfg + r.interactive = cfg.useInteractive() + r.Unlock() +} + +func (r *RuneBuffer) SetMask(m rune) { + r.Lock() + r.cfg.MaskRune = m + r.Unlock() +} + +func (r *RuneBuffer) CurrentWidth(x int) int { + r.Lock() + defer r.Unlock() + return runes.WidthAll(r.buf[:x]) +} + +func (r *RuneBuffer) PromptLen() int { + r.Lock() + width := r.promptLen() + r.Unlock() + return width +} + +func (r *RuneBuffer) promptLen() int { + return runes.WidthAll(runes.ColorFilter(r.prompt)) +} + +func (r *RuneBuffer) RuneSlice(i int) []rune { + r.Lock() + defer r.Unlock() + + if i > 0 { + rs := make([]rune, i) + copy(rs, r.buf[r.idx:r.idx+i]) + return rs + } + rs := make([]rune, -i) + copy(rs, r.buf[r.idx+i:r.idx]) + return rs +} + +func (r *RuneBuffer) Runes() []rune { + r.Lock() + newr := make([]rune, len(r.buf)) + copy(newr, r.buf) + r.Unlock() + return newr +} + +func (r *RuneBuffer) Pos() int { + r.Lock() + defer r.Unlock() + return r.idx +} + +func (r *RuneBuffer) Len() int { + r.Lock() + defer r.Unlock() + return len(r.buf) +} + +func (r *RuneBuffer) MoveToLineStart() { + r.Refresh(func() { + if r.idx == 0 { + return + } + r.idx = 0 + }) +} + +func (r *RuneBuffer) MoveBackward() { + r.Refresh(func() { + if r.idx == 0 { + return + } + r.idx-- + }) +} + +func (r *RuneBuffer) WriteString(s string) { + r.WriteRunes([]rune(s)) +} + +func (r *RuneBuffer) WriteRune(s rune) { + r.WriteRunes([]rune{s}) +} + +func (r *RuneBuffer) WriteRunes(s []rune) { + r.Refresh(func() { + tail := append(s, r.buf[r.idx:]...) + r.buf = append(r.buf[:r.idx], tail...) + r.idx += len(s) + }) +} + +func (r *RuneBuffer) MoveForward() { + r.Refresh(func() { + if r.idx == len(r.buf) { + return + } + r.idx++ + }) +} + +func (r *RuneBuffer) IsCursorInEnd() bool { + r.Lock() + defer r.Unlock() + return r.idx == len(r.buf) +} + +func (r *RuneBuffer) Replace(ch rune) { + r.Refresh(func() { + r.buf[r.idx] = ch + }) +} + +func (r *RuneBuffer) Erase() { + r.Refresh(func() { + r.idx = 0 + r.pushKill(r.buf[:]) + r.buf = r.buf[:0] + }) +} + +func (r *RuneBuffer) Delete() (success bool) { + r.Refresh(func() { + if r.idx == len(r.buf) { + return + } + r.pushKill(r.buf[r.idx : r.idx+1]) + r.buf = append(r.buf[:r.idx], r.buf[r.idx+1:]...) + success = true + }) + return +} + +func (r *RuneBuffer) DeleteWord() { + if r.idx == len(r.buf) { + return + } + init := r.idx + for init < len(r.buf) && IsWordBreak(r.buf[init]) { + init++ + } + for i := init + 1; i < len(r.buf); i++ { + if !IsWordBreak(r.buf[i]) && IsWordBreak(r.buf[i-1]) { + r.pushKill(r.buf[r.idx:i-1]) + r.Refresh(func() { + r.buf = append(r.buf[:r.idx], r.buf[i-1:]...) + }) + return + } + } + r.Kill() +} + +func (r *RuneBuffer) MoveToPrevWord() (success bool) { + r.Refresh(func() { + if r.idx == 0 { + return + } + + for i := r.idx - 1; i > 0; i-- { + if !IsWordBreak(r.buf[i]) && IsWordBreak(r.buf[i-1]) { + r.idx = i + success = true + return + } + } + r.idx = 0 + success = true + }) + return +} + +func (r *RuneBuffer) KillFront() { + r.Refresh(func() { + if r.idx == 0 { + return + } + + length := len(r.buf) - r.idx + r.pushKill(r.buf[:r.idx]) + copy(r.buf[:length], r.buf[r.idx:]) + r.idx = 0 + r.buf = r.buf[:length] + }) +} + +func (r *RuneBuffer) Kill() { + r.Refresh(func() { + r.pushKill(r.buf[r.idx:]) + r.buf = r.buf[:r.idx] + }) +} + +func (r *RuneBuffer) Transpose() { + r.Refresh(func() { + if len(r.buf) == 1 { + r.idx++ + } + + if len(r.buf) < 2 { + return + } + + if r.idx == 0 { + r.idx = 1 + } else if r.idx >= len(r.buf) { + r.idx = len(r.buf) - 1 + } + r.buf[r.idx], r.buf[r.idx-1] = r.buf[r.idx-1], r.buf[r.idx] + r.idx++ + }) +} + +func (r *RuneBuffer) MoveToNextWord() { + r.Refresh(func() { + for i := r.idx + 1; i < len(r.buf); i++ { + if !IsWordBreak(r.buf[i]) && IsWordBreak(r.buf[i-1]) { + r.idx = i + return + } + } + + r.idx = len(r.buf) + }) +} + +func (r *RuneBuffer) MoveToEndWord() { + r.Refresh(func() { + // already at the end, so do nothing + if r.idx == len(r.buf) { + return + } + // if we are at the end of a word already, go to next + if !IsWordBreak(r.buf[r.idx]) && IsWordBreak(r.buf[r.idx+1]) { + r.idx++ + } + + // keep going until at the end of a word + for i := r.idx + 1; i < len(r.buf); i++ { + if IsWordBreak(r.buf[i]) && !IsWordBreak(r.buf[i-1]) { + r.idx = i - 1 + return + } + } + r.idx = len(r.buf) + }) +} + +func (r *RuneBuffer) BackEscapeWord() { + r.Refresh(func() { + if r.idx == 0 { + return + } + for i := r.idx - 1; i > 0; i-- { + if !IsWordBreak(r.buf[i]) && IsWordBreak(r.buf[i-1]) { + r.pushKill(r.buf[i:r.idx]) + r.buf = append(r.buf[:i], r.buf[r.idx:]...) + r.idx = i + return + } + } + + r.buf = r.buf[:0] + r.idx = 0 + }) +} + +func (r *RuneBuffer) Yank() { + if len(r.lastKill) == 0 { + return + } + r.Refresh(func() { + buf := make([]rune, 0, len(r.buf) + len(r.lastKill)) + buf = append(buf, r.buf[:r.idx]...) + buf = append(buf, r.lastKill...) + buf = append(buf, r.buf[r.idx:]...) + r.buf = buf + r.idx += len(r.lastKill) + }) +} + +func (r *RuneBuffer) Backspace() { + r.Refresh(func() { + if r.idx == 0 { + return + } + + r.idx-- + r.buf = append(r.buf[:r.idx], r.buf[r.idx+1:]...) + }) +} + +func (r *RuneBuffer) MoveToLineEnd() { + r.Refresh(func() { + if r.idx == len(r.buf) { + return + } + + r.idx = len(r.buf) + }) +} + +func (r *RuneBuffer) LineCount(width int) int { + if width == -1 { + width = r.width + } + return LineCount(width, + runes.WidthAll(r.buf)+r.PromptLen()) +} + +func (r *RuneBuffer) MoveTo(ch rune, prevChar, reverse bool) (success bool) { + r.Refresh(func() { + if reverse { + for i := r.idx - 1; i >= 0; i-- { + if r.buf[i] == ch { + r.idx = i + if prevChar { + r.idx++ + } + success = true + return + } + } + return + } + for i := r.idx + 1; i < len(r.buf); i++ { + if r.buf[i] == ch { + r.idx = i + if prevChar { + r.idx-- + } + success = true + return + } + } + }) + return +} + +func (r *RuneBuffer) isInLineEdge() bool { + if isWindows { + return false + } + sp := r.getSplitByLine(r.buf) + return len(sp[len(sp)-1]) == 0 +} + +func (r *RuneBuffer) getSplitByLine(rs []rune) []string { + return SplitByLine(r.promptLen(), r.width, rs) +} + +func (r *RuneBuffer) IdxLine(width int) int { + r.Lock() + defer r.Unlock() + return r.idxLine(width) +} + +func (r *RuneBuffer) idxLine(width int) int { + if width == 0 { + return 0 + } + sp := r.getSplitByLine(r.buf[:r.idx]) + return len(sp) - 1 +} + +func (r *RuneBuffer) CursorLineCount() int { + return r.LineCount(r.width) - r.IdxLine(r.width) +} + +func (r *RuneBuffer) Refresh(f func()) { + r.Lock() + defer r.Unlock() + + if !r.interactive { + if f != nil { + f() + } + return + } + + r.clean() + if f != nil { + f() + } + r.print() +} + +func (r *RuneBuffer) SetOffset(offset string) { + r.Lock() + r.offset = offset + r.Unlock() +} + +func (r *RuneBuffer) print() { + r.w.Write(r.output()) + r.hadClean = false +} + +func (r *RuneBuffer) output() []byte { + buf := bytes.NewBuffer(nil) + buf.WriteString(string(r.prompt)) + if r.cfg.EnableMask && len(r.buf) > 0 { + buf.Write([]byte(strings.Repeat(string(r.cfg.MaskRune), len(r.buf)-1))) + if r.buf[len(r.buf)-1] == '\n' { + buf.Write([]byte{'\n'}) + } else { + buf.Write([]byte(string(r.cfg.MaskRune))) + } + if len(r.buf) > r.idx { + buf.Write(r.getBackspaceSequence()) + } + + } else { + for _, e := range r.cfg.Painter.Paint(r.buf, r.idx) { + if e == '\t' { + buf.WriteString(strings.Repeat(" ", TabWidth)) + } else { + buf.WriteRune(e) + } + } + if r.isInLineEdge() { + buf.Write([]byte(" \b")) + } + } + // cursor position + if len(r.buf) > r.idx { + buf.Write(r.getBackspaceSequence()) + } + return buf.Bytes() +} + +func (r *RuneBuffer) getBackspaceSequence() []byte { + var sep = map[int]bool{} + + var i int + for { + if i >= runes.WidthAll(r.buf) { + break + } + + if i == 0 { + i -= r.promptLen() + } + i += r.width + + sep[i] = true + } + var buf []byte + for i := len(r.buf); i > r.idx; i-- { + if sep[i] { + // up one line, go to the start of the line and move cursor right to the end (r.width) + buf = append(buf, "\033[A\r"+"\033["+strconv.Itoa(r.width)+"C"...) + } else { + // move input to the left of one + buf = append(buf, '\b') + } + } + + return buf + +} + +func (r *RuneBuffer) Reset() []rune { + ret := runes.Copy(r.buf) + r.buf = r.buf[:0] + r.idx = 0 + return ret +} + +func (r *RuneBuffer) calWidth(m int) int { + if m > 0 { + return runes.WidthAll(r.buf[r.idx : r.idx+m]) + } + return runes.WidthAll(r.buf[r.idx+m : r.idx]) +} + +func (r *RuneBuffer) SetStyle(start, end int, style string) { + if end < start { + panic("end < start") + } + + // goto start + move := start - r.idx + if move > 0 { + r.w.Write([]byte(string(r.buf[r.idx : r.idx+move]))) + } else { + r.w.Write(bytes.Repeat([]byte("\b"), r.calWidth(move))) + } + r.w.Write([]byte("\033[" + style + "m")) + r.w.Write([]byte(string(r.buf[start:end]))) + r.w.Write([]byte("\033[0m")) + // TODO: move back +} + +func (r *RuneBuffer) SetWithIdx(idx int, buf []rune) { + r.Refresh(func() { + r.buf = buf + r.idx = idx + }) +} + +func (r *RuneBuffer) Set(buf []rune) { + r.SetWithIdx(len(buf), buf) +} + +func (r *RuneBuffer) SetPrompt(prompt string) { + r.Lock() + r.prompt = []rune(prompt) + r.Unlock() +} + +func (r *RuneBuffer) cleanOutput(w io.Writer, idxLine int) { + buf := bufio.NewWriter(w) + + if r.width == 0 { + buf.WriteString(strings.Repeat("\r\b", len(r.buf)+r.promptLen())) + buf.Write([]byte("\033[J")) + } else { + buf.Write([]byte("\033[J")) // just like ^k :) + if idxLine == 0 { + buf.WriteString("\033[2K") + buf.WriteString("\r") + } else { + for i := 0; i < idxLine; i++ { + io.WriteString(buf, "\033[2K\r\033[A") + } + io.WriteString(buf, "\033[2K\r") + } + } + buf.Flush() + return +} + +func (r *RuneBuffer) Clean() { + r.Lock() + r.clean() + r.Unlock() +} + +func (r *RuneBuffer) clean() { + r.cleanWithIdxLine(r.idxLine(r.width)) +} + +func (r *RuneBuffer) cleanWithIdxLine(idxLine int) { + if r.hadClean || !r.interactive { + return + } + r.hadClean = true + r.cleanOutput(r.w, idxLine) +} diff --git a/src/control/vendor/github.com/desertbit/readline/runes.go b/src/control/vendor/github.com/desertbit/readline/runes.go new file mode 100644 index 00000000000..a669bc48c30 --- /dev/null +++ b/src/control/vendor/github.com/desertbit/readline/runes.go @@ -0,0 +1,223 @@ +package readline + +import ( + "bytes" + "unicode" + "unicode/utf8" +) + +var runes = Runes{} +var TabWidth = 4 + +type Runes struct{} + +func (Runes) EqualRune(a, b rune, fold bool) bool { + if a == b { + return true + } + if !fold { + return false + } + if a > b { + a, b = b, a + } + if b < utf8.RuneSelf && 'A' <= a && a <= 'Z' { + if b == a+'a'-'A' { + return true + } + } + return false +} + +func (r Runes) EqualRuneFold(a, b rune) bool { + return r.EqualRune(a, b, true) +} + +func (r Runes) EqualFold(a, b []rune) bool { + if len(a) != len(b) { + return false + } + for i := 0; i < len(a); i++ { + if r.EqualRuneFold(a[i], b[i]) { + continue + } + return false + } + + return true +} + +func (Runes) Equal(a, b []rune) bool { + if len(a) != len(b) { + return false + } + for i := 0; i < len(a); i++ { + if a[i] != b[i] { + return false + } + } + return true +} + +func (rs Runes) IndexAllBckEx(r, sub []rune, fold bool) int { + for i := len(r) - len(sub); i >= 0; i-- { + found := true + for j := 0; j < len(sub); j++ { + if !rs.EqualRune(r[i+j], sub[j], fold) { + found = false + break + } + } + if found { + return i + } + } + return -1 +} + +// Search in runes from end to front +func (rs Runes) IndexAllBck(r, sub []rune) int { + return rs.IndexAllBckEx(r, sub, false) +} + +// Search in runes from front to end +func (rs Runes) IndexAll(r, sub []rune) int { + return rs.IndexAllEx(r, sub, false) +} + +func (rs Runes) IndexAllEx(r, sub []rune, fold bool) int { + for i := 0; i < len(r); i++ { + found := true + if len(r[i:]) < len(sub) { + return -1 + } + for j := 0; j < len(sub); j++ { + if !rs.EqualRune(r[i+j], sub[j], fold) { + found = false + break + } + } + if found { + return i + } + } + return -1 +} + +func (Runes) Index(r rune, rs []rune) int { + for i := 0; i < len(rs); i++ { + if rs[i] == r { + return i + } + } + return -1 +} + +func (Runes) ColorFilter(r []rune) []rune { + newr := make([]rune, 0, len(r)) + for pos := 0; pos < len(r); pos++ { + if r[pos] == '\033' && r[pos+1] == '[' { + idx := runes.Index('m', r[pos+2:]) + if idx == -1 { + continue + } + pos += idx + 2 + continue + } + newr = append(newr, r[pos]) + } + return newr +} + +var zeroWidth = []*unicode.RangeTable{ + unicode.Mn, + unicode.Me, + unicode.Cc, + unicode.Cf, +} + +var doubleWidth = []*unicode.RangeTable{ + unicode.Han, + unicode.Hangul, + unicode.Hiragana, + unicode.Katakana, +} + +func (Runes) Width(r rune) int { + if r == '\t' { + return TabWidth + } + if unicode.IsOneOf(zeroWidth, r) { + return 0 + } + if unicode.IsOneOf(doubleWidth, r) { + return 2 + } + return 1 +} + +func (Runes) WidthAll(r []rune) (length int) { + for i := 0; i < len(r); i++ { + length += runes.Width(r[i]) + } + return +} + +func (Runes) Backspace(r []rune) []byte { + return bytes.Repeat([]byte{'\b'}, runes.WidthAll(r)) +} + +func (Runes) Copy(r []rune) []rune { + n := make([]rune, len(r)) + copy(n, r) + return n +} + +func (Runes) HasPrefixFold(r, prefix []rune) bool { + if len(r) < len(prefix) { + return false + } + return runes.EqualFold(r[:len(prefix)], prefix) +} + +func (Runes) HasPrefix(r, prefix []rune) bool { + if len(r) < len(prefix) { + return false + } + return runes.Equal(r[:len(prefix)], prefix) +} + +func (Runes) Aggregate(candicate [][]rune) (same []rune, size int) { + for i := 0; i < len(candicate[0]); i++ { + for j := 0; j < len(candicate)-1; j++ { + if i >= len(candicate[j]) || i >= len(candicate[j+1]) { + goto aggregate + } + if candicate[j][i] != candicate[j+1][i] { + goto aggregate + } + } + size = i + 1 + } +aggregate: + if size > 0 { + same = runes.Copy(candicate[0][:size]) + for i := 0; i < len(candicate); i++ { + n := runes.Copy(candicate[i]) + copy(n, n[size:]) + candicate[i] = n[:len(n)-size] + } + } + return +} + +func (Runes) TrimSpaceLeft(in []rune) []rune { + firstIndex := len(in) + for i, r := range in { + if unicode.IsSpace(r) == false { + firstIndex = i + break + } + } + return in[firstIndex:] +} diff --git a/src/control/vendor/github.com/desertbit/readline/search.go b/src/control/vendor/github.com/desertbit/readline/search.go new file mode 100644 index 00000000000..52e8ff09953 --- /dev/null +++ b/src/control/vendor/github.com/desertbit/readline/search.go @@ -0,0 +1,164 @@ +package readline + +import ( + "bytes" + "container/list" + "fmt" + "io" +) + +const ( + S_STATE_FOUND = iota + S_STATE_FAILING +) + +const ( + S_DIR_BCK = iota + S_DIR_FWD +) + +type opSearch struct { + inMode bool + state int + dir int + source *list.Element + w io.Writer + buf *RuneBuffer + data []rune + history *opHistory + cfg *Config + markStart int + markEnd int + width int +} + +func newOpSearch(w io.Writer, buf *RuneBuffer, history *opHistory, cfg *Config, width int) *opSearch { + return &opSearch{ + w: w, + buf: buf, + cfg: cfg, + history: history, + width: width, + } +} + +func (o *opSearch) OnWidthChange(newWidth int) { + o.width = newWidth +} + +func (o *opSearch) IsSearchMode() bool { + return o.inMode +} + +func (o *opSearch) SearchBackspace() { + if len(o.data) > 0 { + o.data = o.data[:len(o.data)-1] + o.search(true) + } +} + +func (o *opSearch) findHistoryBy(isNewSearch bool) (int, *list.Element) { + if o.dir == S_DIR_BCK { + return o.history.FindBck(isNewSearch, o.data, o.buf.idx) + } + return o.history.FindFwd(isNewSearch, o.data, o.buf.idx) +} + +func (o *opSearch) search(isChange bool) bool { + if len(o.data) == 0 { + o.state = S_STATE_FOUND + o.SearchRefresh(-1) + return true + } + idx, elem := o.findHistoryBy(isChange) + if elem == nil { + o.SearchRefresh(-2) + return false + } + o.history.current = elem + + item := o.history.showItem(o.history.current.Value) + start, end := 0, 0 + if o.dir == S_DIR_BCK { + start, end = idx, idx+len(o.data) + } else { + start, end = idx, idx+len(o.data) + idx += len(o.data) + } + o.buf.SetWithIdx(idx, item) + o.markStart, o.markEnd = start, end + o.SearchRefresh(idx) + return true +} + +func (o *opSearch) SearchChar(r rune) { + o.data = append(o.data, r) + o.search(true) +} + +func (o *opSearch) SearchMode(dir int) bool { + if o.width == 0 { + return false + } + alreadyInMode := o.inMode + o.inMode = true + o.dir = dir + o.source = o.history.current + if alreadyInMode { + o.search(false) + } else { + o.SearchRefresh(-1) + } + return true +} + +func (o *opSearch) ExitSearchMode(revert bool) { + if revert { + o.history.current = o.source + o.buf.Set(o.history.showItem(o.history.current.Value)) + } + o.markStart, o.markEnd = 0, 0 + o.state = S_STATE_FOUND + o.inMode = false + o.source = nil + o.data = nil +} + +func (o *opSearch) SearchRefresh(x int) { + if x == -2 { + o.state = S_STATE_FAILING + } else if x >= 0 { + o.state = S_STATE_FOUND + } + if x < 0 { + x = o.buf.idx + } + x = o.buf.CurrentWidth(x) + x += o.buf.PromptLen() + x = x % o.width + + if o.markStart > 0 { + o.buf.SetStyle(o.markStart, o.markEnd, "4") + } + + lineCnt := o.buf.CursorLineCount() + buf := bytes.NewBuffer(nil) + buf.Write(bytes.Repeat([]byte("\n"), lineCnt)) + buf.WriteString("\033[J") + if o.state == S_STATE_FAILING { + buf.WriteString("failing ") + } + if o.dir == S_DIR_BCK { + buf.WriteString("bck") + } else if o.dir == S_DIR_FWD { + buf.WriteString("fwd") + } + buf.WriteString("-i-search: ") + buf.WriteString(string(o.data)) // keyword + buf.WriteString("\033[4m \033[0m") // _ + fmt.Fprintf(buf, "\r\033[%dA", lineCnt) // move prev + if x > 0 { + fmt.Fprintf(buf, "\033[%dC", x) // move forward + } + o.w.Write(buf.Bytes()) +} diff --git a/src/control/vendor/github.com/desertbit/readline/std.go b/src/control/vendor/github.com/desertbit/readline/std.go new file mode 100644 index 00000000000..61d44b75974 --- /dev/null +++ b/src/control/vendor/github.com/desertbit/readline/std.go @@ -0,0 +1,197 @@ +package readline + +import ( + "io" + "os" + "sync" + "sync/atomic" +) + +var ( + Stdin io.ReadCloser = os.Stdin + Stdout io.WriteCloser = os.Stdout + Stderr io.WriteCloser = os.Stderr +) + +var ( + std *Instance + stdOnce sync.Once +) + +// global instance will not submit history automatic +func getInstance() *Instance { + stdOnce.Do(func() { + std, _ = NewEx(&Config{ + DisableAutoSaveHistory: true, + }) + }) + return std +} + +// let readline load history from filepath +// and try to persist history into disk +// set fp to "" to prevent readline persisting history to disk +// so the `AddHistory` will return nil error forever. +func SetHistoryPath(fp string) { + ins := getInstance() + cfg := ins.Config.Clone() + cfg.HistoryFile = fp + ins.SetConfig(cfg) +} + +// set auto completer to global instance +func SetAutoComplete(completer AutoCompleter) { + ins := getInstance() + cfg := ins.Config.Clone() + cfg.AutoComplete = completer + ins.SetConfig(cfg) +} + +// add history to global instance manually +// raise error only if `SetHistoryPath` is set with a non-empty path +func AddHistory(content string) error { + ins := getInstance() + return ins.SaveHistory(content) +} + +func Password(prompt string) ([]byte, error) { + ins := getInstance() + return ins.ReadPassword(prompt) +} + +// readline with global configs +func Line(prompt string) (string, error) { + ins := getInstance() + ins.SetPrompt(prompt) + return ins.Readline() +} + +type CancelableStdin struct { + r io.Reader + mutex sync.Mutex + stop chan struct{} + closed int32 + notify chan struct{} + data []byte + read int + err error +} + +func NewCancelableStdin(r io.Reader) *CancelableStdin { + c := &CancelableStdin{ + r: r, + notify: make(chan struct{}), + stop: make(chan struct{}), + } + go c.ioloop() + return c +} + +func (c *CancelableStdin) ioloop() { +loop: + for { + select { + case <-c.notify: + c.read, c.err = c.r.Read(c.data) + select { + case c.notify <- struct{}{}: + case <-c.stop: + break loop + } + case <-c.stop: + break loop + } + } +} + +func (c *CancelableStdin) Read(b []byte) (n int, err error) { + c.mutex.Lock() + defer c.mutex.Unlock() + if atomic.LoadInt32(&c.closed) == 1 { + return 0, io.EOF + } + + c.data = b + select { + case c.notify <- struct{}{}: + case <-c.stop: + return 0, io.EOF + } + select { + case <-c.notify: + return c.read, c.err + case <-c.stop: + return 0, io.EOF + } +} + +func (c *CancelableStdin) Close() error { + if atomic.CompareAndSwapInt32(&c.closed, 0, 1) { + close(c.stop) + } + return nil +} + +// FillableStdin is a stdin reader which can prepend some data before +// reading into the real stdin +type FillableStdin struct { + sync.Mutex + stdin io.Reader + stdinBuffer io.ReadCloser + buf []byte + bufErr error +} + +// NewFillableStdin gives you FillableStdin +func NewFillableStdin(stdin io.Reader) (io.ReadCloser, io.Writer) { + r, w := io.Pipe() + s := &FillableStdin{ + stdinBuffer: r, + stdin: stdin, + } + s.ioloop() + return s, w +} + +func (s *FillableStdin) ioloop() { + go func() { + for { + bufR := make([]byte, 100) + var n int + n, s.bufErr = s.stdinBuffer.Read(bufR) + if s.bufErr != nil { + if s.bufErr == io.ErrClosedPipe { + break + } + } + s.Lock() + s.buf = append(s.buf, bufR[:n]...) + s.Unlock() + } + }() +} + +// Read will read from the local buffer and if no data, read from stdin +func (s *FillableStdin) Read(p []byte) (n int, err error) { + s.Lock() + i := len(s.buf) + if len(p) < i { + i = len(p) + } + if i > 0 { + n := copy(p, s.buf) + s.buf = s.buf[:0] + cerr := s.bufErr + s.bufErr = nil + s.Unlock() + return n, cerr + } + s.Unlock() + n, err = s.stdin.Read(p) + return n, err +} + +func (s *FillableStdin) Close() error { + s.stdinBuffer.Close() + return nil +} diff --git a/src/control/vendor/github.com/desertbit/readline/std_windows.go b/src/control/vendor/github.com/desertbit/readline/std_windows.go new file mode 100644 index 00000000000..b10f91bcb7e --- /dev/null +++ b/src/control/vendor/github.com/desertbit/readline/std_windows.go @@ -0,0 +1,9 @@ +// +build windows + +package readline + +func init() { + Stdin = NewRawReader() + Stdout = NewANSIWriter(Stdout) + Stderr = NewANSIWriter(Stderr) +} diff --git a/src/control/vendor/github.com/desertbit/readline/term.go b/src/control/vendor/github.com/desertbit/readline/term.go new file mode 100644 index 00000000000..133993ca8ea --- /dev/null +++ b/src/control/vendor/github.com/desertbit/readline/term.go @@ -0,0 +1,123 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build darwin dragonfly freebsd linux,!appengine netbsd openbsd solaris + +// Package terminal provides support functions for dealing with terminals, as +// commonly found on UNIX systems. +// +// Putting a terminal into raw mode is the most common requirement: +// +// oldState, err := terminal.MakeRaw(0) +// if err != nil { +// panic(err) +// } +// defer terminal.Restore(0, oldState) +package readline + +import ( + "io" + "syscall" +) + +// State contains the state of a terminal. +type State struct { + termios Termios +} + +// IsTerminal returns true if the given file descriptor is a terminal. +func IsTerminal(fd int) bool { + _, err := getTermios(fd) + return err == nil +} + +// MakeRaw put the terminal connected to the given file descriptor into raw +// mode and returns the previous state of the terminal so that it can be +// restored. +func MakeRaw(fd int) (*State, error) { + var oldState State + + if termios, err := getTermios(fd); err != nil { + return nil, err + } else { + oldState.termios = *termios + } + + newState := oldState.termios + // This attempts to replicate the behaviour documented for cfmakeraw in + // the termios(3) manpage. + newState.Iflag &^= syscall.IGNBRK | syscall.BRKINT | syscall.PARMRK | syscall.ISTRIP | syscall.INLCR | syscall.IGNCR | syscall.ICRNL | syscall.IXON + // newState.Oflag &^= syscall.OPOST + newState.Lflag &^= syscall.ECHO | syscall.ECHONL | syscall.ICANON | syscall.ISIG | syscall.IEXTEN + newState.Cflag &^= syscall.CSIZE | syscall.PARENB + newState.Cflag |= syscall.CS8 + + newState.Cc[syscall.VMIN] = 1 + newState.Cc[syscall.VTIME] = 0 + + return &oldState, setTermios(fd, &newState) +} + +// GetState returns the current state of a terminal which may be useful to +// restore the terminal after a signal. +func GetState(fd int) (*State, error) { + termios, err := getTermios(fd) + if err != nil { + return nil, err + } + + return &State{termios: *termios}, nil +} + +// Restore restores the terminal connected to the given file descriptor to a +// previous state. +func restoreTerm(fd int, state *State) error { + return setTermios(fd, &state.termios) +} + +// ReadPassword reads a line of input from a terminal without local echo. This +// is commonly used for inputting passwords and other sensitive data. The slice +// returned does not include the \n. +func ReadPassword(fd int) ([]byte, error) { + oldState, err := getTermios(fd) + if err != nil { + return nil, err + } + + newState := oldState + newState.Lflag &^= syscall.ECHO + newState.Lflag |= syscall.ICANON | syscall.ISIG + newState.Iflag |= syscall.ICRNL + if err := setTermios(fd, newState); err != nil { + return nil, err + } + + defer func() { + setTermios(fd, oldState) + }() + + var buf [16]byte + var ret []byte + for { + n, err := syscall.Read(fd, buf[:]) + if err != nil { + return nil, err + } + if n == 0 { + if len(ret) == 0 { + return nil, io.EOF + } + break + } + if buf[n-1] == '\n' { + n-- + } + ret = append(ret, buf[:n]...) + if n < len(buf) { + break + } + } + + return ret, nil +} diff --git a/src/control/vendor/github.com/desertbit/readline/term_bsd.go b/src/control/vendor/github.com/desertbit/readline/term_bsd.go new file mode 100644 index 00000000000..68b56ea6ba7 --- /dev/null +++ b/src/control/vendor/github.com/desertbit/readline/term_bsd.go @@ -0,0 +1,29 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build darwin dragonfly freebsd netbsd openbsd + +package readline + +import ( + "syscall" + "unsafe" +) + +func getTermios(fd int) (*Termios, error) { + termios := new(Termios) + _, _, err := syscall.Syscall6(syscall.SYS_IOCTL, uintptr(fd), syscall.TIOCGETA, uintptr(unsafe.Pointer(termios)), 0, 0, 0) + if err != 0 { + return nil, err + } + return termios, nil +} + +func setTermios(fd int, termios *Termios) error { + _, _, err := syscall.Syscall6(syscall.SYS_IOCTL, uintptr(fd), syscall.TIOCSETA, uintptr(unsafe.Pointer(termios)), 0, 0, 0) + if err != 0 { + return err + } + return nil +} diff --git a/src/control/vendor/github.com/desertbit/readline/term_linux.go b/src/control/vendor/github.com/desertbit/readline/term_linux.go new file mode 100644 index 00000000000..e3392b4ac2d --- /dev/null +++ b/src/control/vendor/github.com/desertbit/readline/term_linux.go @@ -0,0 +1,33 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package readline + +import ( + "syscall" + "unsafe" +) + +// These constants are declared here, rather than importing +// them from the syscall package as some syscall packages, even +// on linux, for example gccgo, do not declare them. +const ioctlReadTermios = 0x5401 // syscall.TCGETS +const ioctlWriteTermios = 0x5402 // syscall.TCSETS + +func getTermios(fd int) (*Termios, error) { + termios := new(Termios) + _, _, err := syscall.Syscall6(syscall.SYS_IOCTL, uintptr(fd), ioctlReadTermios, uintptr(unsafe.Pointer(termios)), 0, 0, 0) + if err != 0 { + return nil, err + } + return termios, nil +} + +func setTermios(fd int, termios *Termios) error { + _, _, err := syscall.Syscall6(syscall.SYS_IOCTL, uintptr(fd), ioctlWriteTermios, uintptr(unsafe.Pointer(termios)), 0, 0, 0) + if err != 0 { + return err + } + return nil +} diff --git a/src/control/vendor/github.com/desertbit/readline/term_solaris.go b/src/control/vendor/github.com/desertbit/readline/term_solaris.go new file mode 100644 index 00000000000..4c27273c7ab --- /dev/null +++ b/src/control/vendor/github.com/desertbit/readline/term_solaris.go @@ -0,0 +1,32 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build solaris + +package readline + +import "golang.org/x/sys/unix" + +// GetSize returns the dimensions of the given terminal. +func GetSize(fd int) (int, int, error) { + ws, err := unix.IoctlGetWinsize(fd, unix.TIOCGWINSZ) + if err != nil { + return 0, 0, err + } + return int(ws.Col), int(ws.Row), nil +} + +type Termios unix.Termios + +func getTermios(fd int) (*Termios, error) { + termios, err := unix.IoctlGetTermios(fd, unix.TCGETS) + if err != nil { + return nil, err + } + return (*Termios)(termios), nil +} + +func setTermios(fd int, termios *Termios) error { + return unix.IoctlSetTermios(fd, unix.TCSETSF, (*unix.Termios)(termios)) +} diff --git a/src/control/vendor/github.com/desertbit/readline/term_unix.go b/src/control/vendor/github.com/desertbit/readline/term_unix.go new file mode 100644 index 00000000000..d3ea242448d --- /dev/null +++ b/src/control/vendor/github.com/desertbit/readline/term_unix.go @@ -0,0 +1,24 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build darwin dragonfly freebsd linux,!appengine netbsd openbsd + +package readline + +import ( + "syscall" + "unsafe" +) + +type Termios syscall.Termios + +// GetSize returns the dimensions of the given terminal. +func GetSize(fd int) (int, int, error) { + var dimensions [4]uint16 + _, _, err := syscall.Syscall6(syscall.SYS_IOCTL, uintptr(fd), uintptr(syscall.TIOCGWINSZ), uintptr(unsafe.Pointer(&dimensions)), 0, 0, 0) + if err != 0 { + return 0, 0, err + } + return int(dimensions[1]), int(dimensions[0]), nil +} diff --git a/src/control/vendor/github.com/desertbit/readline/term_windows.go b/src/control/vendor/github.com/desertbit/readline/term_windows.go new file mode 100644 index 00000000000..1290e00bc14 --- /dev/null +++ b/src/control/vendor/github.com/desertbit/readline/term_windows.go @@ -0,0 +1,171 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build windows + +// Package terminal provides support functions for dealing with terminals, as +// commonly found on UNIX systems. +// +// Putting a terminal into raw mode is the most common requirement: +// +// oldState, err := terminal.MakeRaw(0) +// if err != nil { +// panic(err) +// } +// defer terminal.Restore(0, oldState) +package readline + +import ( + "io" + "syscall" + "unsafe" +) + +const ( + enableLineInput = 2 + enableEchoInput = 4 + enableProcessedInput = 1 + enableWindowInput = 8 + enableMouseInput = 16 + enableInsertMode = 32 + enableQuickEditMode = 64 + enableExtendedFlags = 128 + enableAutoPosition = 256 + enableProcessedOutput = 1 + enableWrapAtEolOutput = 2 +) + +var kernel32 = syscall.NewLazyDLL("kernel32.dll") + +var ( + procGetConsoleMode = kernel32.NewProc("GetConsoleMode") + procSetConsoleMode = kernel32.NewProc("SetConsoleMode") + procGetConsoleScreenBufferInfo = kernel32.NewProc("GetConsoleScreenBufferInfo") +) + +type ( + coord struct { + x short + y short + } + smallRect struct { + left short + top short + right short + bottom short + } + consoleScreenBufferInfo struct { + size coord + cursorPosition coord + attributes word + window smallRect + maximumWindowSize coord + } +) + +type State struct { + mode uint32 +} + +// IsTerminal returns true if the given file descriptor is a terminal. +func IsTerminal(fd int) bool { + var st uint32 + r, _, e := syscall.Syscall(procGetConsoleMode.Addr(), 2, uintptr(fd), uintptr(unsafe.Pointer(&st)), 0) + return r != 0 && e == 0 +} + +// MakeRaw put the terminal connected to the given file descriptor into raw +// mode and returns the previous state of the terminal so that it can be +// restored. +func MakeRaw(fd int) (*State, error) { + var st uint32 + _, _, e := syscall.Syscall(procGetConsoleMode.Addr(), 2, uintptr(fd), uintptr(unsafe.Pointer(&st)), 0) + if e != 0 { + return nil, error(e) + } + raw := st &^ (enableEchoInput | enableProcessedInput | enableLineInput | enableProcessedOutput) + _, _, e = syscall.Syscall(procSetConsoleMode.Addr(), 2, uintptr(fd), uintptr(raw), 0) + if e != 0 { + return nil, error(e) + } + return &State{st}, nil +} + +// GetState returns the current state of a terminal which may be useful to +// restore the terminal after a signal. +func GetState(fd int) (*State, error) { + var st uint32 + _, _, e := syscall.Syscall(procGetConsoleMode.Addr(), 2, uintptr(fd), uintptr(unsafe.Pointer(&st)), 0) + if e != 0 { + return nil, error(e) + } + return &State{st}, nil +} + +// Restore restores the terminal connected to the given file descriptor to a +// previous state. +func restoreTerm(fd int, state *State) error { + _, _, err := syscall.Syscall(procSetConsoleMode.Addr(), 2, uintptr(fd), uintptr(state.mode), 0) + return err +} + +// GetSize returns the dimensions of the given terminal. +func GetSize(fd int) (width, height int, err error) { + var info consoleScreenBufferInfo + _, _, e := syscall.Syscall(procGetConsoleScreenBufferInfo.Addr(), 2, uintptr(fd), uintptr(unsafe.Pointer(&info)), 0) + if e != 0 { + return 0, 0, error(e) + } + return int(info.size.x), int(info.size.y), nil +} + +// ReadPassword reads a line of input from a terminal without local echo. This +// is commonly used for inputting passwords and other sensitive data. The slice +// returned does not include the \n. +func ReadPassword(fd int) ([]byte, error) { + var st uint32 + _, _, e := syscall.Syscall(procGetConsoleMode.Addr(), 2, uintptr(fd), uintptr(unsafe.Pointer(&st)), 0) + if e != 0 { + return nil, error(e) + } + old := st + + st &^= (enableEchoInput) + st |= (enableProcessedInput | enableLineInput | enableProcessedOutput) + _, _, e = syscall.Syscall(procSetConsoleMode.Addr(), 2, uintptr(fd), uintptr(st), 0) + if e != 0 { + return nil, error(e) + } + + defer func() { + syscall.Syscall(procSetConsoleMode.Addr(), 2, uintptr(fd), uintptr(old), 0) + }() + + var buf [16]byte + var ret []byte + for { + n, err := syscall.Read(syscall.Handle(fd), buf[:]) + if err != nil { + return nil, err + } + if n == 0 { + if len(ret) == 0 { + return nil, io.EOF + } + break + } + if buf[n-1] == '\n' { + n-- + } + if n > 0 && buf[n-1] == '\r' { + n-- + } + ret = append(ret, buf[:n]...) + if n < len(buf) { + break + } + } + + return ret, nil +} diff --git a/src/control/vendor/github.com/desertbit/readline/terminal.go b/src/control/vendor/github.com/desertbit/readline/terminal.go new file mode 100644 index 00000000000..1078631c14a --- /dev/null +++ b/src/control/vendor/github.com/desertbit/readline/terminal.go @@ -0,0 +1,238 @@ +package readline + +import ( + "bufio" + "fmt" + "io" + "strings" + "sync" + "sync/atomic" +) + +type Terminal struct { + m sync.Mutex + cfg *Config + outchan chan rune + closed int32 + stopChan chan struct{} + kickChan chan struct{} + wg sync.WaitGroup + isReading int32 + sleeping int32 + + sizeChan chan string +} + +func NewTerminal(cfg *Config) (*Terminal, error) { + if err := cfg.Init(); err != nil { + return nil, err + } + t := &Terminal{ + cfg: cfg, + kickChan: make(chan struct{}, 1), + outchan: make(chan rune), + stopChan: make(chan struct{}, 1), + sizeChan: make(chan string, 1), + } + + go t.ioloop() + return t, nil +} + +// SleepToResume will sleep myself, and return only if I'm resumed. +func (t *Terminal) SleepToResume() { + if !atomic.CompareAndSwapInt32(&t.sleeping, 0, 1) { + return + } + defer atomic.StoreInt32(&t.sleeping, 0) + + t.ExitRawMode() + ch := WaitForResume() + SuspendMe() + <-ch + t.EnterRawMode() +} + +func (t *Terminal) EnterRawMode() (err error) { + return t.cfg.FuncMakeRaw() +} + +func (t *Terminal) ExitRawMode() (err error) { + return t.cfg.FuncExitRaw() +} + +func (t *Terminal) Write(b []byte) (int, error) { + return t.cfg.Stdout.Write(b) +} + +// WriteStdin prefill the next Stdin fetch +// Next time you call ReadLine() this value will be writen before the user input +func (t *Terminal) WriteStdin(b []byte) (int, error) { + return t.cfg.StdinWriter.Write(b) +} + +type termSize struct { + left int + top int +} + +func (t *Terminal) GetOffset(f func(offset string)) { + go func() { + f(<-t.sizeChan) + }() + t.Write([]byte("\033[6n")) +} + +func (t *Terminal) Print(s string) { + fmt.Fprintf(t.cfg.Stdout, "%s", s) +} + +func (t *Terminal) PrintRune(r rune) { + fmt.Fprintf(t.cfg.Stdout, "%c", r) +} + +func (t *Terminal) Readline() *Operation { + return NewOperation(t, t.cfg) +} + +// return rune(0) if meet EOF +func (t *Terminal) ReadRune() rune { + ch, ok := <-t.outchan + if !ok { + return rune(0) + } + return ch +} + +func (t *Terminal) IsReading() bool { + return atomic.LoadInt32(&t.isReading) == 1 +} + +func (t *Terminal) KickRead() { + select { + case t.kickChan <- struct{}{}: + default: + } +} + +func (t *Terminal) ioloop() { + t.wg.Add(1) + defer func() { + t.wg.Done() + close(t.outchan) + }() + + var ( + isEscape bool + isEscapeEx bool + expectNextChar bool + ) + + buf := bufio.NewReader(t.getStdin()) + for { + if !expectNextChar { + atomic.StoreInt32(&t.isReading, 0) + select { + case <-t.kickChan: + atomic.StoreInt32(&t.isReading, 1) + case <-t.stopChan: + return + } + } + expectNextChar = false + r, _, err := buf.ReadRune() + if err != nil { + if strings.Contains(err.Error(), "interrupted system call") { + expectNextChar = true + continue + } + break + } + + if isEscape { + isEscape = false + if r == CharEscapeEx { + expectNextChar = true + isEscapeEx = true + continue + } + r = escapeKey(r, buf) + } else if isEscapeEx { + isEscapeEx = false + if key := readEscKey(r, buf); key != nil { + r = escapeExKey(key) + // offset + if key.typ == 'R' { + if _, _, ok := key.Get2(); ok { + select { + case t.sizeChan <- key.attr: + default: + } + } + expectNextChar = true + continue + } + } + if r == 0 { + expectNextChar = true + continue + } + } + + expectNextChar = true + switch r { + case CharEsc: + if t.cfg.VimMode { + t.outchan <- r + break + } + isEscape = true + case CharInterrupt, CharEnter, CharCtrlJ, CharDelete: + expectNextChar = false + fallthrough + default: + t.outchan <- r + } + } + +} + +func (t *Terminal) Bell() { + fmt.Fprintf(t, "%c", CharBell) +} + +func (t *Terminal) Close() error { + if atomic.SwapInt32(&t.closed, 1) != 0 { + return nil + } + if closer, ok := t.cfg.Stdin.(io.Closer); ok { + closer.Close() + } + close(t.stopChan) + t.wg.Wait() + return t.ExitRawMode() +} + +func (t *Terminal) GetConfig() *Config { + t.m.Lock() + cfg := *t.cfg + t.m.Unlock() + return &cfg +} + +func (t *Terminal) getStdin() io.Reader { + t.m.Lock() + r := t.cfg.Stdin + t.m.Unlock() + return r +} + +func (t *Terminal) SetConfig(c *Config) error { + if err := c.Init(); err != nil { + return err + } + t.m.Lock() + t.cfg = c + t.m.Unlock() + return nil +} diff --git a/src/control/vendor/github.com/desertbit/readline/utils.go b/src/control/vendor/github.com/desertbit/readline/utils.go new file mode 100644 index 00000000000..af4e005216f --- /dev/null +++ b/src/control/vendor/github.com/desertbit/readline/utils.go @@ -0,0 +1,277 @@ +package readline + +import ( + "bufio" + "bytes" + "container/list" + "fmt" + "os" + "strconv" + "strings" + "sync" + "time" + "unicode" +) + +var ( + isWindows = false +) + +const ( + CharLineStart = 1 + CharBackward = 2 + CharInterrupt = 3 + CharDelete = 4 + CharLineEnd = 5 + CharForward = 6 + CharBell = 7 + CharCtrlH = 8 + CharTab = 9 + CharCtrlJ = 10 + CharKill = 11 + CharCtrlL = 12 + CharEnter = 13 + CharNext = 14 + CharPrev = 16 + CharBckSearch = 18 + CharFwdSearch = 19 + CharTranspose = 20 + CharCtrlU = 21 + CharCtrlW = 23 + CharCtrlY = 25 + CharCtrlZ = 26 + CharEsc = 27 + CharEscapeEx = 91 + CharBackspace = 127 +) + +const ( + MetaBackward rune = -iota - 1 + MetaForward + MetaDelete + MetaBackspace + MetaTranspose +) + +// WaitForResume need to call before current process got suspend. +// It will run a ticker until a long duration is occurs, +// which means this process is resumed. +func WaitForResume() chan struct{} { + ch := make(chan struct{}) + var wg sync.WaitGroup + wg.Add(1) + go func() { + ticker := time.NewTicker(10 * time.Millisecond) + t := time.Now() + wg.Done() + for { + now := <-ticker.C + if now.Sub(t) > 100*time.Millisecond { + break + } + t = now + } + ticker.Stop() + ch <- struct{}{} + }() + wg.Wait() + return ch +} + +func Restore(fd int, state *State) error { + err := restoreTerm(fd, state) + if err != nil { + // errno 0 means everything is ok :) + if err.Error() == "errno 0" { + return nil + } else { + return err + } + } + return nil +} + +func IsPrintable(key rune) bool { + isInSurrogateArea := key >= 0xd800 && key <= 0xdbff + return key >= 32 && !isInSurrogateArea +} + +// translate Esc[X +func escapeExKey(key *escapeKeyPair) rune { + var r rune + switch key.typ { + case 'D': + r = CharBackward + case 'C': + r = CharForward + case 'A': + r = CharPrev + case 'B': + r = CharNext + case 'H': + r = CharLineStart + case 'F': + r = CharLineEnd + case '~': + if key.attr == "3" { + r = CharDelete + } + default: + } + return r +} + +type escapeKeyPair struct { + attr string + typ rune +} + +func (e *escapeKeyPair) Get2() (int, int, bool) { + sp := strings.Split(e.attr, ";") + if len(sp) < 2 { + return -1, -1, false + } + s1, err := strconv.Atoi(sp[0]) + if err != nil { + return -1, -1, false + } + s2, err := strconv.Atoi(sp[1]) + if err != nil { + return -1, -1, false + } + return s1, s2, true +} + +func readEscKey(r rune, reader *bufio.Reader) *escapeKeyPair { + p := escapeKeyPair{} + buf := bytes.NewBuffer(nil) + for { + if r == ';' { + } else if unicode.IsNumber(r) { + } else { + p.typ = r + break + } + buf.WriteRune(r) + r, _, _ = reader.ReadRune() + } + p.attr = buf.String() + return &p +} + +// translate EscX to Meta+X +func escapeKey(r rune, reader *bufio.Reader) rune { + switch r { + case 'b': + r = MetaBackward + case 'f': + r = MetaForward + case 'd': + r = MetaDelete + case CharTranspose: + r = MetaTranspose + case CharBackspace: + r = MetaBackspace + case 'O': + d, _, _ := reader.ReadRune() + switch d { + case 'H': + r = CharLineStart + case 'F': + r = CharLineEnd + default: + reader.UnreadRune() + } + case CharEsc: + + } + return r +} + +func SplitByLine(start, screenWidth int, rs []rune) []string { + var ret []string + buf := bytes.NewBuffer(nil) + currentWidth := start + for _, r := range rs { + w := runes.Width(r) + currentWidth += w + buf.WriteRune(r) + if currentWidth >= screenWidth { + ret = append(ret, buf.String()) + buf.Reset() + currentWidth = 0 + } + } + ret = append(ret, buf.String()) + return ret +} + +// calculate how many lines for N character +func LineCount(screenWidth, w int) int { + r := w / screenWidth + if w%screenWidth != 0 { + r++ + } + return r +} + +func IsWordBreak(i rune) bool { + switch { + case i >= 'a' && i <= 'z': + case i >= 'A' && i <= 'Z': + case i >= '0' && i <= '9': + default: + return true + } + return false +} + +func GetInt(s []string, def int) int { + if len(s) == 0 { + return def + } + c, err := strconv.Atoi(s[0]) + if err != nil { + return def + } + return c +} + +type RawMode struct { + state *State +} + +func (r *RawMode) Enter() (err error) { + r.state, err = MakeRaw(GetStdin()) + return err +} + +func (r *RawMode) Exit() error { + if r.state == nil { + return nil + } + return Restore(GetStdin(), r.state) +} + +// ----------------------------------------------------------------------------- + +func sleep(n int) { + Debug(n) + time.Sleep(2000 * time.Millisecond) +} + +// print a linked list to Debug() +func debugList(l *list.List) { + idx := 0 + for e := l.Front(); e != nil; e = e.Next() { + Debug(idx, fmt.Sprintf("%+v", e.Value)) + idx++ + } +} + +// append log info to another file +func Debug(o ...interface{}) { + f, _ := os.OpenFile("debug.tmp", os.O_RDWR|os.O_CREATE|os.O_APPEND, 0666) + fmt.Fprintln(f, o...) + f.Close() +} diff --git a/src/control/vendor/github.com/desertbit/readline/utils_unix.go b/src/control/vendor/github.com/desertbit/readline/utils_unix.go new file mode 100644 index 00000000000..f88dac97bd7 --- /dev/null +++ b/src/control/vendor/github.com/desertbit/readline/utils_unix.go @@ -0,0 +1,83 @@ +// +build darwin dragonfly freebsd linux,!appengine netbsd openbsd solaris + +package readline + +import ( + "io" + "os" + "os/signal" + "sync" + "syscall" +) + +type winsize struct { + Row uint16 + Col uint16 + Xpixel uint16 + Ypixel uint16 +} + +// SuspendMe use to send suspend signal to myself, when we in the raw mode. +// For OSX it need to send to parent's pid +// For Linux it need to send to myself +func SuspendMe() { + p, _ := os.FindProcess(os.Getppid()) + p.Signal(syscall.SIGTSTP) + p, _ = os.FindProcess(os.Getpid()) + p.Signal(syscall.SIGTSTP) +} + +// get width of the terminal +func getWidth(stdoutFd int) int { + cols, _, err := GetSize(stdoutFd) + if err != nil { + return -1 + } + return cols +} + +func GetScreenWidth() int { + w := getWidth(syscall.Stdout) + if w < 0 { + w = getWidth(syscall.Stderr) + } + return w +} + +// ClearScreen clears the console screen +func ClearScreen(w io.Writer) (int, error) { + return w.Write([]byte("\033[H")) +} + +func DefaultIsTerminal() bool { + return IsTerminal(syscall.Stdin) && (IsTerminal(syscall.Stdout) || IsTerminal(syscall.Stderr)) +} + +func GetStdin() int { + return syscall.Stdin +} + +// ----------------------------------------------------------------------------- + +var ( + widthChange sync.Once + widthChangeCallback func() +) + +func DefaultOnWidthChanged(f func()) { + widthChangeCallback = f + widthChange.Do(func() { + ch := make(chan os.Signal, 1) + signal.Notify(ch, syscall.SIGWINCH) + + go func() { + for { + _, ok := <-ch + if !ok { + break + } + widthChangeCallback() + } + }() + }) +} diff --git a/src/control/vendor/github.com/desertbit/readline/utils_windows.go b/src/control/vendor/github.com/desertbit/readline/utils_windows.go new file mode 100644 index 00000000000..5bfa55dcce8 --- /dev/null +++ b/src/control/vendor/github.com/desertbit/readline/utils_windows.go @@ -0,0 +1,41 @@ +// +build windows + +package readline + +import ( + "io" + "syscall" +) + +func SuspendMe() { +} + +func GetStdin() int { + return int(syscall.Stdin) +} + +func init() { + isWindows = true +} + +// get width of the terminal +func GetScreenWidth() int { + info, _ := GetConsoleScreenBufferInfo() + if info == nil { + return -1 + } + return int(info.dwSize.x) +} + +// ClearScreen clears the console screen +func ClearScreen(_ io.Writer) error { + return SetConsoleCursorPosition(&_COORD{0, 0}) +} + +func DefaultIsTerminal() bool { + return true +} + +func DefaultOnWidthChanged(func()) { + +} diff --git a/src/control/vendor/github.com/desertbit/readline/vim.go b/src/control/vendor/github.com/desertbit/readline/vim.go new file mode 100644 index 00000000000..bedf2c1a693 --- /dev/null +++ b/src/control/vendor/github.com/desertbit/readline/vim.go @@ -0,0 +1,176 @@ +package readline + +const ( + VIM_NORMAL = iota + VIM_INSERT + VIM_VISUAL +) + +type opVim struct { + cfg *Config + op *Operation + vimMode int +} + +func newVimMode(op *Operation) *opVim { + ov := &opVim{ + cfg: op.cfg, + op: op, + } + ov.SetVimMode(ov.cfg.VimMode) + return ov +} + +func (o *opVim) SetVimMode(on bool) { + if o.cfg.VimMode && !on { // turn off + o.ExitVimMode() + } + o.cfg.VimMode = on + o.vimMode = VIM_INSERT +} + +func (o *opVim) ExitVimMode() { + o.vimMode = VIM_INSERT +} + +func (o *opVim) IsEnableVimMode() bool { + return o.cfg.VimMode +} + +func (o *opVim) handleVimNormalMovement(r rune, readNext func() rune) (t rune, handled bool) { + rb := o.op.buf + handled = true + switch r { + case 'h': + t = CharBackward + case 'j': + t = CharNext + case 'k': + t = CharPrev + case 'l': + t = CharForward + case '0', '^': + rb.MoveToLineStart() + case '$': + rb.MoveToLineEnd() + case 'x': + rb.Delete() + if rb.IsCursorInEnd() { + rb.MoveBackward() + } + case 'r': + rb.Replace(readNext()) + case 'd': + next := readNext() + switch next { + case 'd': + rb.Erase() + case 'w': + rb.DeleteWord() + case 'h': + rb.Backspace() + case 'l': + rb.Delete() + } + case 'p': + rb.Yank() + case 'b', 'B': + rb.MoveToPrevWord() + case 'w', 'W': + rb.MoveToNextWord() + case 'e', 'E': + rb.MoveToEndWord() + case 'f', 'F', 't', 'T': + next := readNext() + prevChar := r == 't' || r == 'T' + reverse := r == 'F' || r == 'T' + switch next { + case CharEsc: + default: + rb.MoveTo(next, prevChar, reverse) + } + default: + return r, false + } + return t, true +} + +func (o *opVim) handleVimNormalEnterInsert(r rune, readNext func() rune) (t rune, handled bool) { + rb := o.op.buf + handled = true + switch r { + case 'i': + case 'I': + rb.MoveToLineStart() + case 'a': + rb.MoveForward() + case 'A': + rb.MoveToLineEnd() + case 's': + rb.Delete() + case 'S': + rb.Erase() + case 'c': + next := readNext() + switch next { + case 'c': + rb.Erase() + case 'w': + rb.DeleteWord() + case 'h': + rb.Backspace() + case 'l': + rb.Delete() + } + default: + return r, false + } + + o.EnterVimInsertMode() + return +} + +func (o *opVim) HandleVimNormal(r rune, readNext func() rune) (t rune) { + switch r { + case CharEnter, CharInterrupt: + o.ExitVimMode() + return r + } + + if r, handled := o.handleVimNormalMovement(r, readNext); handled { + return r + } + + if r, handled := o.handleVimNormalEnterInsert(r, readNext); handled { + return r + } + + // invalid operation + o.op.t.Bell() + return 0 +} + +func (o *opVim) EnterVimInsertMode() { + o.vimMode = VIM_INSERT +} + +func (o *opVim) ExitVimInsertMode() { + o.vimMode = VIM_NORMAL +} + +func (o *opVim) HandleVim(r rune, readNext func() rune) rune { + if o.vimMode == VIM_NORMAL { + return o.HandleVimNormal(r, readNext) + } + if r == CharEsc { + o.ExitVimInsertMode() + return 0 + } + + switch o.vimMode { + case VIM_INSERT: + return r + case VIM_VISUAL: + } + return r +} diff --git a/src/control/vendor/github.com/desertbit/readline/windows_api.go b/src/control/vendor/github.com/desertbit/readline/windows_api.go new file mode 100644 index 00000000000..63f4f7b78fc --- /dev/null +++ b/src/control/vendor/github.com/desertbit/readline/windows_api.go @@ -0,0 +1,152 @@ +// +build windows + +package readline + +import ( + "reflect" + "syscall" + "unsafe" +) + +var ( + kernel = NewKernel() + stdout = uintptr(syscall.Stdout) + stdin = uintptr(syscall.Stdin) +) + +type Kernel struct { + SetConsoleCursorPosition, + SetConsoleTextAttribute, + FillConsoleOutputCharacterW, + FillConsoleOutputAttribute, + ReadConsoleInputW, + GetConsoleScreenBufferInfo, + GetConsoleCursorInfo, + GetStdHandle CallFunc +} + +type short int16 +type word uint16 +type dword uint32 +type wchar uint16 + +type _COORD struct { + x short + y short +} + +func (c *_COORD) ptr() uintptr { + return uintptr(*(*int32)(unsafe.Pointer(c))) +} + +const ( + EVENT_KEY = 0x0001 + EVENT_MOUSE = 0x0002 + EVENT_WINDOW_BUFFER_SIZE = 0x0004 + EVENT_MENU = 0x0008 + EVENT_FOCUS = 0x0010 +) + +type _KEY_EVENT_RECORD struct { + bKeyDown int32 + wRepeatCount word + wVirtualKeyCode word + wVirtualScanCode word + unicodeChar wchar + dwControlKeyState dword +} + +// KEY_EVENT_RECORD KeyEvent; +// MOUSE_EVENT_RECORD MouseEvent; +// WINDOW_BUFFER_SIZE_RECORD WindowBufferSizeEvent; +// MENU_EVENT_RECORD MenuEvent; +// FOCUS_EVENT_RECORD FocusEvent; +type _INPUT_RECORD struct { + EventType word + Padding uint16 + Event [16]byte +} + +type _CONSOLE_SCREEN_BUFFER_INFO struct { + dwSize _COORD + dwCursorPosition _COORD + wAttributes word + srWindow _SMALL_RECT + dwMaximumWindowSize _COORD +} + +type _SMALL_RECT struct { + left short + top short + right short + bottom short +} + +type _CONSOLE_CURSOR_INFO struct { + dwSize dword + bVisible bool +} + +type CallFunc func(u ...uintptr) error + +func NewKernel() *Kernel { + k := &Kernel{} + kernel32 := syscall.NewLazyDLL("kernel32.dll") + v := reflect.ValueOf(k).Elem() + t := v.Type() + for i := 0; i < t.NumField(); i++ { + name := t.Field(i).Name + f := kernel32.NewProc(name) + v.Field(i).Set(reflect.ValueOf(k.Wrap(f))) + } + return k +} + +func (k *Kernel) Wrap(p *syscall.LazyProc) CallFunc { + return func(args ...uintptr) error { + var r0 uintptr + var e1 syscall.Errno + size := uintptr(len(args)) + if len(args) <= 3 { + buf := make([]uintptr, 3) + copy(buf, args) + r0, _, e1 = syscall.Syscall(p.Addr(), size, + buf[0], buf[1], buf[2]) + } else { + buf := make([]uintptr, 6) + copy(buf, args) + r0, _, e1 = syscall.Syscall6(p.Addr(), size, + buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], + ) + } + + if int(r0) == 0 { + if e1 != 0 { + return error(e1) + } else { + return syscall.EINVAL + } + } + return nil + } + +} + +func GetConsoleScreenBufferInfo() (*_CONSOLE_SCREEN_BUFFER_INFO, error) { + t := new(_CONSOLE_SCREEN_BUFFER_INFO) + err := kernel.GetConsoleScreenBufferInfo( + stdout, + uintptr(unsafe.Pointer(t)), + ) + return t, err +} + +func GetConsoleCursorInfo() (*_CONSOLE_CURSOR_INFO, error) { + t := new(_CONSOLE_CURSOR_INFO) + err := kernel.GetConsoleCursorInfo(stdout, uintptr(unsafe.Pointer(t))) + return t, err +} + +func SetConsoleCursorPosition(c *_COORD) error { + return kernel.SetConsoleCursorPosition(stdout, c.ptr()) +} diff --git a/src/control/vendor/github.com/hashicorp/errwrap/LICENSE b/src/control/vendor/github.com/hashicorp/errwrap/LICENSE new file mode 100644 index 00000000000..c33dcc7c928 --- /dev/null +++ b/src/control/vendor/github.com/hashicorp/errwrap/LICENSE @@ -0,0 +1,354 @@ +Mozilla Public License, version 2.0 + +1. Definitions + +1.1. “Contributor” + + means each individual or legal entity that creates, contributes to the + creation of, or owns Covered Software. + +1.2. “Contributor Version” + + means the combination of the Contributions of others (if any) used by a + Contributor and that particular Contributor’s Contribution. + +1.3. “Contribution” + + means Covered Software of a particular Contributor. + +1.4. “Covered Software” + + means Source Code Form to which the initial Contributor has attached the + notice in Exhibit A, the Executable Form of such Source Code Form, and + Modifications of such Source Code Form, in each case including portions + thereof. + +1.5. “Incompatible With Secondary Licenses” + means + + a. that the initial Contributor has attached the notice described in + Exhibit B to the Covered Software; or + + b. that the Covered Software was made available under the terms of version + 1.1 or earlier of the License, but not also under the terms of a + Secondary License. + +1.6. “Executable Form” + + means any form of the work other than Source Code Form. + +1.7. “Larger Work” + + means a work that combines Covered Software with other material, in a separate + file or files, that is not Covered Software. + +1.8. “License” + + means this document. + +1.9. “Licensable” + + means having the right to grant, to the maximum extent possible, whether at the + time of the initial grant or subsequently, any and all of the rights conveyed by + this License. + +1.10. “Modifications” + + means any of the following: + + a. any file in Source Code Form that results from an addition to, deletion + from, or modification of the contents of Covered Software; or + + b. any new file in Source Code Form that contains any Covered Software. + +1.11. “Patent Claims” of a Contributor + + means any patent claim(s), including without limitation, method, process, + and apparatus claims, in any patent Licensable by such Contributor that + would be infringed, but for the grant of the License, by the making, + using, selling, offering for sale, having made, import, or transfer of + either its Contributions or its Contributor Version. + +1.12. “Secondary License” + + means either the GNU General Public License, Version 2.0, the GNU Lesser + General Public License, Version 2.1, the GNU Affero General Public + License, Version 3.0, or any later versions of those licenses. + +1.13. “Source Code Form” + + means the form of the work preferred for making modifications. + +1.14. “You” (or “Your”) + + means an individual or a legal entity exercising rights under this + License. For legal entities, “You” includes any entity that controls, is + controlled by, or is under common control with You. For purposes of this + definition, “control” means (a) the power, direct or indirect, to cause + the direction or management of such entity, whether by contract or + otherwise, or (b) ownership of more than fifty percent (50%) of the + outstanding shares or beneficial ownership of such entity. + + +2. License Grants and Conditions + +2.1. Grants + + Each Contributor hereby grants You a world-wide, royalty-free, + non-exclusive license: + + a. under intellectual property rights (other than patent or trademark) + Licensable by such Contributor to use, reproduce, make available, + modify, display, perform, distribute, and otherwise exploit its + Contributions, either on an unmodified basis, with Modifications, or as + part of a Larger Work; and + + b. under Patent Claims of such Contributor to make, use, sell, offer for + sale, have made, import, and otherwise transfer either its Contributions + or its Contributor Version. + +2.2. Effective Date + + The licenses granted in Section 2.1 with respect to any Contribution become + effective for each Contribution on the date the Contributor first distributes + such Contribution. + +2.3. Limitations on Grant Scope + + The licenses granted in this Section 2 are the only rights granted under this + License. No additional rights or licenses will be implied from the distribution + or licensing of Covered Software under this License. Notwithstanding Section + 2.1(b) above, no patent license is granted by a Contributor: + + a. for any code that a Contributor has removed from Covered Software; or + + b. for infringements caused by: (i) Your and any other third party’s + modifications of Covered Software, or (ii) the combination of its + Contributions with other software (except as part of its Contributor + Version); or + + c. under Patent Claims infringed by Covered Software in the absence of its + Contributions. + + This License does not grant any rights in the trademarks, service marks, or + logos of any Contributor (except as may be necessary to comply with the + notice requirements in Section 3.4). + +2.4. Subsequent Licenses + + No Contributor makes additional grants as a result of Your choice to + distribute the Covered Software under a subsequent version of this License + (see Section 10.2) or under the terms of a Secondary License (if permitted + under the terms of Section 3.3). + +2.5. Representation + + Each Contributor represents that the Contributor believes its Contributions + are its original creation(s) or it has sufficient rights to grant the + rights to its Contributions conveyed by this License. + +2.6. Fair Use + + This License is not intended to limit any rights You have under applicable + copyright doctrines of fair use, fair dealing, or other equivalents. + +2.7. Conditions + + Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted in + Section 2.1. + + +3. Responsibilities + +3.1. Distribution of Source Form + + All distribution of Covered Software in Source Code Form, including any + Modifications that You create or to which You contribute, must be under the + terms of this License. You must inform recipients that the Source Code Form + of the Covered Software is governed by the terms of this License, and how + they can obtain a copy of this License. You may not attempt to alter or + restrict the recipients’ rights in the Source Code Form. + +3.2. Distribution of Executable Form + + If You distribute Covered Software in Executable Form then: + + a. such Covered Software must also be made available in Source Code Form, + as described in Section 3.1, and You must inform recipients of the + Executable Form how they can obtain a copy of such Source Code Form by + reasonable means in a timely manner, at a charge no more than the cost + of distribution to the recipient; and + + b. You may distribute such Executable Form under the terms of this License, + or sublicense it under different terms, provided that the license for + the Executable Form does not attempt to limit or alter the recipients’ + rights in the Source Code Form under this License. + +3.3. Distribution of a Larger Work + + You may create and distribute a Larger Work under terms of Your choice, + provided that You also comply with the requirements of this License for the + Covered Software. If the Larger Work is a combination of Covered Software + with a work governed by one or more Secondary Licenses, and the Covered + Software is not Incompatible With Secondary Licenses, this License permits + You to additionally distribute such Covered Software under the terms of + such Secondary License(s), so that the recipient of the Larger Work may, at + their option, further distribute the Covered Software under the terms of + either this License or such Secondary License(s). + +3.4. Notices + + You may not remove or alter the substance of any license notices (including + copyright notices, patent notices, disclaimers of warranty, or limitations + of liability) contained within the Source Code Form of the Covered + Software, except that You may alter any license notices to the extent + required to remedy known factual inaccuracies. + +3.5. Application of Additional Terms + + You may choose to offer, and to charge a fee for, warranty, support, + indemnity or liability obligations to one or more recipients of Covered + Software. However, You may do so only on Your own behalf, and not on behalf + of any Contributor. You must make it absolutely clear that any such + warranty, support, indemnity, or liability obligation is offered by You + alone, and You hereby agree to indemnify every Contributor for any + liability incurred by such Contributor as a result of warranty, support, + indemnity or liability terms You offer. You may include additional + disclaimers of warranty and limitations of liability specific to any + jurisdiction. + +4. Inability to Comply Due to Statute or Regulation + + If it is impossible for You to comply with any of the terms of this License + with respect to some or all of the Covered Software due to statute, judicial + order, or regulation then You must: (a) comply with the terms of this License + to the maximum extent possible; and (b) describe the limitations and the code + they affect. Such description must be placed in a text file included with all + distributions of the Covered Software under this License. Except to the + extent prohibited by statute or regulation, such description must be + sufficiently detailed for a recipient of ordinary skill to be able to + understand it. + +5. Termination + +5.1. The rights granted under this License will terminate automatically if You + fail to comply with any of its terms. However, if You become compliant, + then the rights granted under this License from a particular Contributor + are reinstated (a) provisionally, unless and until such Contributor + explicitly and finally terminates Your grants, and (b) on an ongoing basis, + if such Contributor fails to notify You of the non-compliance by some + reasonable means prior to 60 days after You have come back into compliance. + Moreover, Your grants from a particular Contributor are reinstated on an + ongoing basis if such Contributor notifies You of the non-compliance by + some reasonable means, this is the first time You have received notice of + non-compliance with this License from such Contributor, and You become + compliant prior to 30 days after Your receipt of the notice. + +5.2. If You initiate litigation against any entity by asserting a patent + infringement claim (excluding declaratory judgment actions, counter-claims, + and cross-claims) alleging that a Contributor Version directly or + indirectly infringes any patent, then the rights granted to You by any and + all Contributors for the Covered Software under Section 2.1 of this License + shall terminate. + +5.3. In the event of termination under Sections 5.1 or 5.2 above, all end user + license agreements (excluding distributors and resellers) which have been + validly granted by You or Your distributors under this License prior to + termination shall survive termination. + +6. Disclaimer of Warranty + + Covered Software is provided under this License on an “as is” basis, without + warranty of any kind, either expressed, implied, or statutory, including, + without limitation, warranties that the Covered Software is free of defects, + merchantable, fit for a particular purpose or non-infringing. The entire + risk as to the quality and performance of the Covered Software is with You. + Should any Covered Software prove defective in any respect, You (not any + Contributor) assume the cost of any necessary servicing, repair, or + correction. This disclaimer of warranty constitutes an essential part of this + License. No use of any Covered Software is authorized under this License + except under this disclaimer. + +7. Limitation of Liability + + Under no circumstances and under no legal theory, whether tort (including + negligence), contract, or otherwise, shall any Contributor, or anyone who + distributes Covered Software as permitted above, be liable to You for any + direct, indirect, special, incidental, or consequential damages of any + character including, without limitation, damages for lost profits, loss of + goodwill, work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses, even if such party shall have been + informed of the possibility of such damages. This limitation of liability + shall not apply to liability for death or personal injury resulting from such + party’s negligence to the extent applicable law prohibits such limitation. + Some jurisdictions do not allow the exclusion or limitation of incidental or + consequential damages, so this exclusion and limitation may not apply to You. + +8. Litigation + + Any litigation relating to this License may be brought only in the courts of + a jurisdiction where the defendant maintains its principal place of business + and such litigation shall be governed by laws of that jurisdiction, without + reference to its conflict-of-law provisions. Nothing in this Section shall + prevent a party’s ability to bring cross-claims or counter-claims. + +9. Miscellaneous + + This License represents the complete agreement concerning the subject matter + hereof. If any provision of this License is held to be unenforceable, such + provision shall be reformed only to the extent necessary to make it + enforceable. Any law or regulation which provides that the language of a + contract shall be construed against the drafter shall not be used to construe + this License against a Contributor. + + +10. Versions of the License + +10.1. New Versions + + Mozilla Foundation is the license steward. Except as provided in Section + 10.3, no one other than the license steward has the right to modify or + publish new versions of this License. Each version will be given a + distinguishing version number. + +10.2. Effect of New Versions + + You may distribute the Covered Software under the terms of the version of + the License under which You originally received the Covered Software, or + under the terms of any subsequent version published by the license + steward. + +10.3. Modified Versions + + If you create software not governed by this License, and you want to + create a new license for such software, you may create and use a modified + version of this License if you rename the license and remove any + references to the name of the license steward (except to note that such + modified license differs from this License). + +10.4. Distributing Source Code Form that is Incompatible With Secondary Licenses + If You choose to distribute Source Code Form that is Incompatible With + Secondary Licenses under the terms of this version of the License, the + notice described in Exhibit B of this License must be attached. + +Exhibit A - Source Code Form License Notice + + This Source Code Form is subject to the + terms of the Mozilla Public License, v. + 2.0. If a copy of the MPL was not + distributed with this file, You can + obtain one at + http://mozilla.org/MPL/2.0/. + +If it is not possible or desirable to put the notice in a particular file, then +You may include the notice in a location (such as a LICENSE file in a relevant +directory) where a recipient would be likely to look for such a notice. + +You may add additional accurate notices of copyright ownership. + +Exhibit B - “Incompatible With Secondary Licenses” Notice + + This Source Code Form is “Incompatible + With Secondary Licenses”, as defined by + the Mozilla Public License, v. 2.0. + diff --git a/src/control/vendor/github.com/hashicorp/errwrap/README.md b/src/control/vendor/github.com/hashicorp/errwrap/README.md new file mode 100644 index 00000000000..444df08f8e7 --- /dev/null +++ b/src/control/vendor/github.com/hashicorp/errwrap/README.md @@ -0,0 +1,89 @@ +# errwrap + +`errwrap` is a package for Go that formalizes the pattern of wrapping errors +and checking if an error contains another error. + +There is a common pattern in Go of taking a returned `error` value and +then wrapping it (such as with `fmt.Errorf`) before returning it. The problem +with this pattern is that you completely lose the original `error` structure. + +Arguably the _correct_ approach is that you should make a custom structure +implementing the `error` interface, and have the original error as a field +on that structure, such [as this example](http://golang.org/pkg/os/#PathError). +This is a good approach, but you have to know the entire chain of possible +rewrapping that happens, when you might just care about one. + +`errwrap` formalizes this pattern (it doesn't matter what approach you use +above) by giving a single interface for wrapping errors, checking if a specific +error is wrapped, and extracting that error. + +## Installation and Docs + +Install using `go get github.com/hashicorp/errwrap`. + +Full documentation is available at +http://godoc.org/github.com/hashicorp/errwrap + +## Usage + +#### Basic Usage + +Below is a very basic example of its usage: + +```go +// A function that always returns an error, but wraps it, like a real +// function might. +func tryOpen() error { + _, err := os.Open("/i/dont/exist") + if err != nil { + return errwrap.Wrapf("Doesn't exist: {{err}}", err) + } + + return nil +} + +func main() { + err := tryOpen() + + // We can use the Contains helpers to check if an error contains + // another error. It is safe to do this with a nil error, or with + // an error that doesn't even use the errwrap package. + if errwrap.Contains(err, "does not exist") { + // Do something + } + if errwrap.ContainsType(err, new(os.PathError)) { + // Do something + } + + // Or we can use the associated `Get` functions to just extract + // a specific error. This would return nil if that specific error doesn't + // exist. + perr := errwrap.GetType(err, new(os.PathError)) +} +``` + +#### Custom Types + +If you're already making custom types that properly wrap errors, then +you can get all the functionality of `errwraps.Contains` and such by +implementing the `Wrapper` interface with just one function. Example: + +```go +type AppError { + Code ErrorCode + Err error +} + +func (e *AppError) WrappedErrors() []error { + return []error{e.Err} +} +``` + +Now this works: + +```go +err := &AppError{Err: fmt.Errorf("an error")} +if errwrap.ContainsType(err, fmt.Errorf("")) { + // This will work! +} +``` diff --git a/src/control/vendor/github.com/hashicorp/errwrap/errwrap.go b/src/control/vendor/github.com/hashicorp/errwrap/errwrap.go new file mode 100644 index 00000000000..44e368e5692 --- /dev/null +++ b/src/control/vendor/github.com/hashicorp/errwrap/errwrap.go @@ -0,0 +1,178 @@ +// Package errwrap implements methods to formalize error wrapping in Go. +// +// All of the top-level functions that take an `error` are built to be able +// to take any error, not just wrapped errors. This allows you to use errwrap +// without having to type-check and type-cast everywhere. +package errwrap + +import ( + "errors" + "reflect" + "strings" +) + +// WalkFunc is the callback called for Walk. +type WalkFunc func(error) + +// Wrapper is an interface that can be implemented by custom types to +// have all the Contains, Get, etc. functions in errwrap work. +// +// When Walk reaches a Wrapper, it will call the callback for every +// wrapped error in addition to the wrapper itself. Since all the top-level +// functions in errwrap use Walk, this means that all those functions work +// with your custom type. +type Wrapper interface { + WrappedErrors() []error +} + +// Wrap defines that outer wraps inner, returning an error type that +// can be cleanly used with the other methods in this package, such as +// Contains, GetAll, etc. +// +// This function won't modify the error message at all (the outer message +// will be used). +func Wrap(outer, inner error) error { + return &wrappedError{ + Outer: outer, + Inner: inner, + } +} + +// Wrapf wraps an error with a formatting message. This is similar to using +// `fmt.Errorf` to wrap an error. If you're using `fmt.Errorf` to wrap +// errors, you should replace it with this. +// +// format is the format of the error message. The string '{{err}}' will +// be replaced with the original error message. +// +// Deprecated: Use fmt.Errorf() +func Wrapf(format string, err error) error { + outerMsg := "" + if err != nil { + outerMsg = err.Error() + } + + outer := errors.New(strings.Replace( + format, "{{err}}", outerMsg, -1)) + + return Wrap(outer, err) +} + +// Contains checks if the given error contains an error with the +// message msg. If err is not a wrapped error, this will always return +// false unless the error itself happens to match this msg. +func Contains(err error, msg string) bool { + return len(GetAll(err, msg)) > 0 +} + +// ContainsType checks if the given error contains an error with +// the same concrete type as v. If err is not a wrapped error, this will +// check the err itself. +func ContainsType(err error, v interface{}) bool { + return len(GetAllType(err, v)) > 0 +} + +// Get is the same as GetAll but returns the deepest matching error. +func Get(err error, msg string) error { + es := GetAll(err, msg) + if len(es) > 0 { + return es[len(es)-1] + } + + return nil +} + +// GetType is the same as GetAllType but returns the deepest matching error. +func GetType(err error, v interface{}) error { + es := GetAllType(err, v) + if len(es) > 0 { + return es[len(es)-1] + } + + return nil +} + +// GetAll gets all the errors that might be wrapped in err with the +// given message. The order of the errors is such that the outermost +// matching error (the most recent wrap) is index zero, and so on. +func GetAll(err error, msg string) []error { + var result []error + + Walk(err, func(err error) { + if err.Error() == msg { + result = append(result, err) + } + }) + + return result +} + +// GetAllType gets all the errors that are the same type as v. +// +// The order of the return value is the same as described in GetAll. +func GetAllType(err error, v interface{}) []error { + var result []error + + var search string + if v != nil { + search = reflect.TypeOf(v).String() + } + Walk(err, func(err error) { + var needle string + if err != nil { + needle = reflect.TypeOf(err).String() + } + + if needle == search { + result = append(result, err) + } + }) + + return result +} + +// Walk walks all the wrapped errors in err and calls the callback. If +// err isn't a wrapped error, this will be called once for err. If err +// is a wrapped error, the callback will be called for both the wrapper +// that implements error as well as the wrapped error itself. +func Walk(err error, cb WalkFunc) { + if err == nil { + return + } + + switch e := err.(type) { + case *wrappedError: + cb(e.Outer) + Walk(e.Inner, cb) + case Wrapper: + cb(err) + + for _, err := range e.WrappedErrors() { + Walk(err, cb) + } + case interface{ Unwrap() error }: + cb(err) + Walk(e.Unwrap(), cb) + default: + cb(err) + } +} + +// wrappedError is an implementation of error that has both the +// outer and inner errors. +type wrappedError struct { + Outer error + Inner error +} + +func (w *wrappedError) Error() string { + return w.Outer.Error() +} + +func (w *wrappedError) WrappedErrors() []error { + return []error{w.Outer, w.Inner} +} + +func (w *wrappedError) Unwrap() error { + return w.Inner +} diff --git a/src/control/vendor/github.com/hashicorp/go-multierror/.travis.yml b/src/control/vendor/github.com/hashicorp/go-multierror/.travis.yml new file mode 100644 index 00000000000..24b80388f72 --- /dev/null +++ b/src/control/vendor/github.com/hashicorp/go-multierror/.travis.yml @@ -0,0 +1,12 @@ +sudo: false + +language: go + +go: + - 1.x + +branches: + only: + - master + +script: env GO111MODULE=on make test testrace diff --git a/src/control/vendor/github.com/hashicorp/go-multierror/LICENSE b/src/control/vendor/github.com/hashicorp/go-multierror/LICENSE new file mode 100644 index 00000000000..82b4de97c7e --- /dev/null +++ b/src/control/vendor/github.com/hashicorp/go-multierror/LICENSE @@ -0,0 +1,353 @@ +Mozilla Public License, version 2.0 + +1. Definitions + +1.1. “Contributor” + + means each individual or legal entity that creates, contributes to the + creation of, or owns Covered Software. + +1.2. “Contributor Version” + + means the combination of the Contributions of others (if any) used by a + Contributor and that particular Contributor’s Contribution. + +1.3. “Contribution” + + means Covered Software of a particular Contributor. + +1.4. “Covered Software” + + means Source Code Form to which the initial Contributor has attached the + notice in Exhibit A, the Executable Form of such Source Code Form, and + Modifications of such Source Code Form, in each case including portions + thereof. + +1.5. “Incompatible With Secondary Licenses” + means + + a. that the initial Contributor has attached the notice described in + Exhibit B to the Covered Software; or + + b. that the Covered Software was made available under the terms of version + 1.1 or earlier of the License, but not also under the terms of a + Secondary License. + +1.6. “Executable Form” + + means any form of the work other than Source Code Form. + +1.7. “Larger Work” + + means a work that combines Covered Software with other material, in a separate + file or files, that is not Covered Software. + +1.8. “License” + + means this document. + +1.9. “Licensable” + + means having the right to grant, to the maximum extent possible, whether at the + time of the initial grant or subsequently, any and all of the rights conveyed by + this License. + +1.10. “Modifications” + + means any of the following: + + a. any file in Source Code Form that results from an addition to, deletion + from, or modification of the contents of Covered Software; or + + b. any new file in Source Code Form that contains any Covered Software. + +1.11. “Patent Claims” of a Contributor + + means any patent claim(s), including without limitation, method, process, + and apparatus claims, in any patent Licensable by such Contributor that + would be infringed, but for the grant of the License, by the making, + using, selling, offering for sale, having made, import, or transfer of + either its Contributions or its Contributor Version. + +1.12. “Secondary License” + + means either the GNU General Public License, Version 2.0, the GNU Lesser + General Public License, Version 2.1, the GNU Affero General Public + License, Version 3.0, or any later versions of those licenses. + +1.13. “Source Code Form” + + means the form of the work preferred for making modifications. + +1.14. “You” (or “Your”) + + means an individual or a legal entity exercising rights under this + License. For legal entities, “You” includes any entity that controls, is + controlled by, or is under common control with You. For purposes of this + definition, “control” means (a) the power, direct or indirect, to cause + the direction or management of such entity, whether by contract or + otherwise, or (b) ownership of more than fifty percent (50%) of the + outstanding shares or beneficial ownership of such entity. + + +2. License Grants and Conditions + +2.1. Grants + + Each Contributor hereby grants You a world-wide, royalty-free, + non-exclusive license: + + a. under intellectual property rights (other than patent or trademark) + Licensable by such Contributor to use, reproduce, make available, + modify, display, perform, distribute, and otherwise exploit its + Contributions, either on an unmodified basis, with Modifications, or as + part of a Larger Work; and + + b. under Patent Claims of such Contributor to make, use, sell, offer for + sale, have made, import, and otherwise transfer either its Contributions + or its Contributor Version. + +2.2. Effective Date + + The licenses granted in Section 2.1 with respect to any Contribution become + effective for each Contribution on the date the Contributor first distributes + such Contribution. + +2.3. Limitations on Grant Scope + + The licenses granted in this Section 2 are the only rights granted under this + License. No additional rights or licenses will be implied from the distribution + or licensing of Covered Software under this License. Notwithstanding Section + 2.1(b) above, no patent license is granted by a Contributor: + + a. for any code that a Contributor has removed from Covered Software; or + + b. for infringements caused by: (i) Your and any other third party’s + modifications of Covered Software, or (ii) the combination of its + Contributions with other software (except as part of its Contributor + Version); or + + c. under Patent Claims infringed by Covered Software in the absence of its + Contributions. + + This License does not grant any rights in the trademarks, service marks, or + logos of any Contributor (except as may be necessary to comply with the + notice requirements in Section 3.4). + +2.4. Subsequent Licenses + + No Contributor makes additional grants as a result of Your choice to + distribute the Covered Software under a subsequent version of this License + (see Section 10.2) or under the terms of a Secondary License (if permitted + under the terms of Section 3.3). + +2.5. Representation + + Each Contributor represents that the Contributor believes its Contributions + are its original creation(s) or it has sufficient rights to grant the + rights to its Contributions conveyed by this License. + +2.6. Fair Use + + This License is not intended to limit any rights You have under applicable + copyright doctrines of fair use, fair dealing, or other equivalents. + +2.7. Conditions + + Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted in + Section 2.1. + + +3. Responsibilities + +3.1. Distribution of Source Form + + All distribution of Covered Software in Source Code Form, including any + Modifications that You create or to which You contribute, must be under the + terms of this License. You must inform recipients that the Source Code Form + of the Covered Software is governed by the terms of this License, and how + they can obtain a copy of this License. You may not attempt to alter or + restrict the recipients’ rights in the Source Code Form. + +3.2. Distribution of Executable Form + + If You distribute Covered Software in Executable Form then: + + a. such Covered Software must also be made available in Source Code Form, + as described in Section 3.1, and You must inform recipients of the + Executable Form how they can obtain a copy of such Source Code Form by + reasonable means in a timely manner, at a charge no more than the cost + of distribution to the recipient; and + + b. You may distribute such Executable Form under the terms of this License, + or sublicense it under different terms, provided that the license for + the Executable Form does not attempt to limit or alter the recipients’ + rights in the Source Code Form under this License. + +3.3. Distribution of a Larger Work + + You may create and distribute a Larger Work under terms of Your choice, + provided that You also comply with the requirements of this License for the + Covered Software. If the Larger Work is a combination of Covered Software + with a work governed by one or more Secondary Licenses, and the Covered + Software is not Incompatible With Secondary Licenses, this License permits + You to additionally distribute such Covered Software under the terms of + such Secondary License(s), so that the recipient of the Larger Work may, at + their option, further distribute the Covered Software under the terms of + either this License or such Secondary License(s). + +3.4. Notices + + You may not remove or alter the substance of any license notices (including + copyright notices, patent notices, disclaimers of warranty, or limitations + of liability) contained within the Source Code Form of the Covered + Software, except that You may alter any license notices to the extent + required to remedy known factual inaccuracies. + +3.5. Application of Additional Terms + + You may choose to offer, and to charge a fee for, warranty, support, + indemnity or liability obligations to one or more recipients of Covered + Software. However, You may do so only on Your own behalf, and not on behalf + of any Contributor. You must make it absolutely clear that any such + warranty, support, indemnity, or liability obligation is offered by You + alone, and You hereby agree to indemnify every Contributor for any + liability incurred by such Contributor as a result of warranty, support, + indemnity or liability terms You offer. You may include additional + disclaimers of warranty and limitations of liability specific to any + jurisdiction. + +4. Inability to Comply Due to Statute or Regulation + + If it is impossible for You to comply with any of the terms of this License + with respect to some or all of the Covered Software due to statute, judicial + order, or regulation then You must: (a) comply with the terms of this License + to the maximum extent possible; and (b) describe the limitations and the code + they affect. Such description must be placed in a text file included with all + distributions of the Covered Software under this License. Except to the + extent prohibited by statute or regulation, such description must be + sufficiently detailed for a recipient of ordinary skill to be able to + understand it. + +5. Termination + +5.1. The rights granted under this License will terminate automatically if You + fail to comply with any of its terms. However, if You become compliant, + then the rights granted under this License from a particular Contributor + are reinstated (a) provisionally, unless and until such Contributor + explicitly and finally terminates Your grants, and (b) on an ongoing basis, + if such Contributor fails to notify You of the non-compliance by some + reasonable means prior to 60 days after You have come back into compliance. + Moreover, Your grants from a particular Contributor are reinstated on an + ongoing basis if such Contributor notifies You of the non-compliance by + some reasonable means, this is the first time You have received notice of + non-compliance with this License from such Contributor, and You become + compliant prior to 30 days after Your receipt of the notice. + +5.2. If You initiate litigation against any entity by asserting a patent + infringement claim (excluding declaratory judgment actions, counter-claims, + and cross-claims) alleging that a Contributor Version directly or + indirectly infringes any patent, then the rights granted to You by any and + all Contributors for the Covered Software under Section 2.1 of this License + shall terminate. + +5.3. In the event of termination under Sections 5.1 or 5.2 above, all end user + license agreements (excluding distributors and resellers) which have been + validly granted by You or Your distributors under this License prior to + termination shall survive termination. + +6. Disclaimer of Warranty + + Covered Software is provided under this License on an “as is” basis, without + warranty of any kind, either expressed, implied, or statutory, including, + without limitation, warranties that the Covered Software is free of defects, + merchantable, fit for a particular purpose or non-infringing. The entire + risk as to the quality and performance of the Covered Software is with You. + Should any Covered Software prove defective in any respect, You (not any + Contributor) assume the cost of any necessary servicing, repair, or + correction. This disclaimer of warranty constitutes an essential part of this + License. No use of any Covered Software is authorized under this License + except under this disclaimer. + +7. Limitation of Liability + + Under no circumstances and under no legal theory, whether tort (including + negligence), contract, or otherwise, shall any Contributor, or anyone who + distributes Covered Software as permitted above, be liable to You for any + direct, indirect, special, incidental, or consequential damages of any + character including, without limitation, damages for lost profits, loss of + goodwill, work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses, even if such party shall have been + informed of the possibility of such damages. This limitation of liability + shall not apply to liability for death or personal injury resulting from such + party’s negligence to the extent applicable law prohibits such limitation. + Some jurisdictions do not allow the exclusion or limitation of incidental or + consequential damages, so this exclusion and limitation may not apply to You. + +8. Litigation + + Any litigation relating to this License may be brought only in the courts of + a jurisdiction where the defendant maintains its principal place of business + and such litigation shall be governed by laws of that jurisdiction, without + reference to its conflict-of-law provisions. Nothing in this Section shall + prevent a party’s ability to bring cross-claims or counter-claims. + +9. Miscellaneous + + This License represents the complete agreement concerning the subject matter + hereof. If any provision of this License is held to be unenforceable, such + provision shall be reformed only to the extent necessary to make it + enforceable. Any law or regulation which provides that the language of a + contract shall be construed against the drafter shall not be used to construe + this License against a Contributor. + + +10. Versions of the License + +10.1. New Versions + + Mozilla Foundation is the license steward. Except as provided in Section + 10.3, no one other than the license steward has the right to modify or + publish new versions of this License. Each version will be given a + distinguishing version number. + +10.2. Effect of New Versions + + You may distribute the Covered Software under the terms of the version of + the License under which You originally received the Covered Software, or + under the terms of any subsequent version published by the license + steward. + +10.3. Modified Versions + + If you create software not governed by this License, and you want to + create a new license for such software, you may create and use a modified + version of this License if you rename the license and remove any + references to the name of the license steward (except to note that such + modified license differs from this License). + +10.4. Distributing Source Code Form that is Incompatible With Secondary Licenses + If You choose to distribute Source Code Form that is Incompatible With + Secondary Licenses under the terms of this version of the License, the + notice described in Exhibit B of this License must be attached. + +Exhibit A - Source Code Form License Notice + + This Source Code Form is subject to the + terms of the Mozilla Public License, v. + 2.0. If a copy of the MPL was not + distributed with this file, You can + obtain one at + http://mozilla.org/MPL/2.0/. + +If it is not possible or desirable to put the notice in a particular file, then +You may include the notice in a location (such as a LICENSE file in a relevant +directory) where a recipient would be likely to look for such a notice. + +You may add additional accurate notices of copyright ownership. + +Exhibit B - “Incompatible With Secondary Licenses” Notice + + This Source Code Form is “Incompatible + With Secondary Licenses”, as defined by + the Mozilla Public License, v. 2.0. diff --git a/src/control/vendor/github.com/hashicorp/go-multierror/Makefile b/src/control/vendor/github.com/hashicorp/go-multierror/Makefile new file mode 100644 index 00000000000..b97cd6ed02b --- /dev/null +++ b/src/control/vendor/github.com/hashicorp/go-multierror/Makefile @@ -0,0 +1,31 @@ +TEST?=./... + +default: test + +# test runs the test suite and vets the code. +test: generate + @echo "==> Running tests..." + @go list $(TEST) \ + | grep -v "/vendor/" \ + | xargs -n1 go test -timeout=60s -parallel=10 ${TESTARGS} + +# testrace runs the race checker +testrace: generate + @echo "==> Running tests (race)..." + @go list $(TEST) \ + | grep -v "/vendor/" \ + | xargs -n1 go test -timeout=60s -race ${TESTARGS} + +# updatedeps installs all the dependencies needed to run and build. +updatedeps: + @sh -c "'${CURDIR}/scripts/deps.sh' '${NAME}'" + +# generate runs `go generate` to build the dynamically generated source files. +generate: + @echo "==> Generating..." + @find . -type f -name '.DS_Store' -delete + @go list ./... \ + | grep -v "/vendor/" \ + | xargs -n1 go generate + +.PHONY: default test testrace updatedeps generate diff --git a/src/control/vendor/github.com/hashicorp/go-multierror/README.md b/src/control/vendor/github.com/hashicorp/go-multierror/README.md new file mode 100644 index 00000000000..e92fa614cd6 --- /dev/null +++ b/src/control/vendor/github.com/hashicorp/go-multierror/README.md @@ -0,0 +1,131 @@ +# go-multierror + +[![Build Status](http://img.shields.io/travis/hashicorp/go-multierror.svg?style=flat-square)][travis] +[![Go Documentation](http://img.shields.io/badge/go-documentation-blue.svg?style=flat-square)][godocs] + +[travis]: https://travis-ci.org/hashicorp/go-multierror +[godocs]: https://godoc.org/github.com/hashicorp/go-multierror + +`go-multierror` is a package for Go that provides a mechanism for +representing a list of `error` values as a single `error`. + +This allows a function in Go to return an `error` that might actually +be a list of errors. If the caller knows this, they can unwrap the +list and access the errors. If the caller doesn't know, the error +formats to a nice human-readable format. + +`go-multierror` is fully compatible with the Go standard library +[errors](https://golang.org/pkg/errors/) package, including the +functions `As`, `Is`, and `Unwrap`. This provides a standardized approach +for introspecting on error values. + +## Installation and Docs + +Install using `go get github.com/hashicorp/go-multierror`. + +Full documentation is available at +http://godoc.org/github.com/hashicorp/go-multierror + +## Usage + +go-multierror is easy to use and purposely built to be unobtrusive in +existing Go applications/libraries that may not be aware of it. + +**Building a list of errors** + +The `Append` function is used to create a list of errors. This function +behaves a lot like the Go built-in `append` function: it doesn't matter +if the first argument is nil, a `multierror.Error`, or any other `error`, +the function behaves as you would expect. + +```go +var result error + +if err := step1(); err != nil { + result = multierror.Append(result, err) +} +if err := step2(); err != nil { + result = multierror.Append(result, err) +} + +return result +``` + +**Customizing the formatting of the errors** + +By specifying a custom `ErrorFormat`, you can customize the format +of the `Error() string` function: + +```go +var result *multierror.Error + +// ... accumulate errors here, maybe using Append + +if result != nil { + result.ErrorFormat = func([]error) string { + return "errors!" + } +} +``` + +**Accessing the list of errors** + +`multierror.Error` implements `error` so if the caller doesn't know about +multierror, it will work just fine. But if you're aware a multierror might +be returned, you can use type switches to access the list of errors: + +```go +if err := something(); err != nil { + if merr, ok := err.(*multierror.Error); ok { + // Use merr.Errors + } +} +``` + +You can also use the standard [`errors.Unwrap`](https://golang.org/pkg/errors/#Unwrap) +function. This will continue to unwrap into subsequent errors until none exist. + +**Extracting an error** + +The standard library [`errors.As`](https://golang.org/pkg/errors/#As) +function can be used directly with a multierror to extract a specific error: + +```go +// Assume err is a multierror value +err := somefunc() + +// We want to know if "err" has a "RichErrorType" in it and extract it. +var errRich RichErrorType +if errors.As(err, &errRich) { + // It has it, and now errRich is populated. +} +``` + +**Checking for an exact error value** + +Some errors are returned as exact errors such as the [`ErrNotExist`](https://golang.org/pkg/os/#pkg-variables) +error in the `os` package. You can check if this error is present by using +the standard [`errors.Is`](https://golang.org/pkg/errors/#Is) function. + +```go +// Assume err is a multierror value +err := somefunc() +if errors.Is(err, os.ErrNotExist) { + // err contains os.ErrNotExist +} +``` + +**Returning a multierror only if there are errors** + +If you build a `multierror.Error`, you can use the `ErrorOrNil` function +to return an `error` implementation only if there are errors to return: + +```go +var result *multierror.Error + +// ... accumulate errors here + +// Return the `error` only if errors were added to the multierror, otherwise +// return nil since there are no errors. +return result.ErrorOrNil() +``` diff --git a/src/control/vendor/github.com/hashicorp/go-multierror/append.go b/src/control/vendor/github.com/hashicorp/go-multierror/append.go new file mode 100644 index 00000000000..775b6e753e7 --- /dev/null +++ b/src/control/vendor/github.com/hashicorp/go-multierror/append.go @@ -0,0 +1,41 @@ +package multierror + +// Append is a helper function that will append more errors +// onto an Error in order to create a larger multi-error. +// +// If err is not a multierror.Error, then it will be turned into +// one. If any of the errs are multierr.Error, they will be flattened +// one level into err. +func Append(err error, errs ...error) *Error { + switch err := err.(type) { + case *Error: + // Typed nils can reach here, so initialize if we are nil + if err == nil { + err = new(Error) + } + + // Go through each error and flatten + for _, e := range errs { + switch e := e.(type) { + case *Error: + if e != nil { + err.Errors = append(err.Errors, e.Errors...) + } + default: + if e != nil { + err.Errors = append(err.Errors, e) + } + } + } + + return err + default: + newErrs := make([]error, 0, len(errs)+1) + if err != nil { + newErrs = append(newErrs, err) + } + newErrs = append(newErrs, errs...) + + return Append(&Error{}, newErrs...) + } +} diff --git a/src/control/vendor/github.com/hashicorp/go-multierror/flatten.go b/src/control/vendor/github.com/hashicorp/go-multierror/flatten.go new file mode 100644 index 00000000000..aab8e9abec9 --- /dev/null +++ b/src/control/vendor/github.com/hashicorp/go-multierror/flatten.go @@ -0,0 +1,26 @@ +package multierror + +// Flatten flattens the given error, merging any *Errors together into +// a single *Error. +func Flatten(err error) error { + // If it isn't an *Error, just return the error as-is + if _, ok := err.(*Error); !ok { + return err + } + + // Otherwise, make the result and flatten away! + flatErr := new(Error) + flatten(err, flatErr) + return flatErr +} + +func flatten(err error, flatErr *Error) { + switch err := err.(type) { + case *Error: + for _, e := range err.Errors { + flatten(e, flatErr) + } + default: + flatErr.Errors = append(flatErr.Errors, err) + } +} diff --git a/src/control/vendor/github.com/hashicorp/go-multierror/format.go b/src/control/vendor/github.com/hashicorp/go-multierror/format.go new file mode 100644 index 00000000000..47f13c49a67 --- /dev/null +++ b/src/control/vendor/github.com/hashicorp/go-multierror/format.go @@ -0,0 +1,27 @@ +package multierror + +import ( + "fmt" + "strings" +) + +// ErrorFormatFunc is a function callback that is called by Error to +// turn the list of errors into a string. +type ErrorFormatFunc func([]error) string + +// ListFormatFunc is a basic formatter that outputs the number of errors +// that occurred along with a bullet point list of the errors. +func ListFormatFunc(es []error) string { + if len(es) == 1 { + return fmt.Sprintf("1 error occurred:\n\t* %s\n\n", es[0]) + } + + points := make([]string, len(es)) + for i, err := range es { + points[i] = fmt.Sprintf("* %s", err) + } + + return fmt.Sprintf( + "%d errors occurred:\n\t%s\n\n", + len(es), strings.Join(points, "\n\t")) +} diff --git a/src/control/vendor/github.com/hashicorp/go-multierror/group.go b/src/control/vendor/github.com/hashicorp/go-multierror/group.go new file mode 100644 index 00000000000..9c29efb7f87 --- /dev/null +++ b/src/control/vendor/github.com/hashicorp/go-multierror/group.go @@ -0,0 +1,38 @@ +package multierror + +import "sync" + +// Group is a collection of goroutines which return errors that need to be +// coalesced. +type Group struct { + mutex sync.Mutex + err *Error + wg sync.WaitGroup +} + +// Go calls the given function in a new goroutine. +// +// If the function returns an error it is added to the group multierror which +// is returned by Wait. +func (g *Group) Go(f func() error) { + g.wg.Add(1) + + go func() { + defer g.wg.Done() + + if err := f(); err != nil { + g.mutex.Lock() + g.err = Append(g.err, err) + g.mutex.Unlock() + } + }() +} + +// Wait blocks until all function calls from the Go method have returned, then +// returns the multierror. +func (g *Group) Wait() *Error { + g.wg.Wait() + g.mutex.Lock() + defer g.mutex.Unlock() + return g.err +} diff --git a/src/control/vendor/github.com/hashicorp/go-multierror/multierror.go b/src/control/vendor/github.com/hashicorp/go-multierror/multierror.go new file mode 100644 index 00000000000..d05dd926987 --- /dev/null +++ b/src/control/vendor/github.com/hashicorp/go-multierror/multierror.go @@ -0,0 +1,118 @@ +package multierror + +import ( + "errors" + "fmt" +) + +// Error is an error type to track multiple errors. This is used to +// accumulate errors in cases and return them as a single "error". +type Error struct { + Errors []error + ErrorFormat ErrorFormatFunc +} + +func (e *Error) Error() string { + fn := e.ErrorFormat + if fn == nil { + fn = ListFormatFunc + } + + return fn(e.Errors) +} + +// ErrorOrNil returns an error interface if this Error represents +// a list of errors, or returns nil if the list of errors is empty. This +// function is useful at the end of accumulation to make sure that the value +// returned represents the existence of errors. +func (e *Error) ErrorOrNil() error { + if e == nil { + return nil + } + if len(e.Errors) == 0 { + return nil + } + + return e +} + +func (e *Error) GoString() string { + return fmt.Sprintf("*%#v", *e) +} + +// WrappedErrors returns the list of errors that this Error is wrapping. +// It is an implementation of the errwrap.Wrapper interface so that +// multierror.Error can be used with that library. +// +// This method is not safe to be called concurrently and is no different +// than accessing the Errors field directly. It is implemented only to +// satisfy the errwrap.Wrapper interface. +func (e *Error) WrappedErrors() []error { + return e.Errors +} + +// Unwrap returns an error from Error (or nil if there are no errors). +// This error returned will further support Unwrap to get the next error, +// etc. The order will match the order of Errors in the multierror.Error +// at the time of calling. +// +// The resulting error supports errors.As/Is/Unwrap so you can continue +// to use the stdlib errors package to introspect further. +// +// This will perform a shallow copy of the errors slice. Any errors appended +// to this error after calling Unwrap will not be available until a new +// Unwrap is called on the multierror.Error. +func (e *Error) Unwrap() error { + // If we have no errors then we do nothing + if e == nil || len(e.Errors) == 0 { + return nil + } + + // If we have exactly one error, we can just return that directly. + if len(e.Errors) == 1 { + return e.Errors[0] + } + + // Shallow copy the slice + errs := make([]error, len(e.Errors)) + copy(errs, e.Errors) + return chain(errs) +} + +// chain implements the interfaces necessary for errors.Is/As/Unwrap to +// work in a deterministic way with multierror. A chain tracks a list of +// errors while accounting for the current represented error. This lets +// Is/As be meaningful. +// +// Unwrap returns the next error. In the cleanest form, Unwrap would return +// the wrapped error here but we can't do that if we want to properly +// get access to all the errors. Instead, users are recommended to use +// Is/As to get the correct error type out. +// +// Precondition: []error is non-empty (len > 0) +type chain []error + +// Error implements the error interface +func (e chain) Error() string { + return e[0].Error() +} + +// Unwrap implements errors.Unwrap by returning the next error in the +// chain or nil if there are no more errors. +func (e chain) Unwrap() error { + if len(e) == 1 { + return nil + } + + return e[1:] +} + +// As implements errors.As by attempting to map to the current value. +func (e chain) As(target interface{}) bool { + return errors.As(e[0], target) +} + +// Is implements errors.Is by comparing the current value directly. +func (e chain) Is(target error) bool { + return errors.Is(e[0], target) +} diff --git a/src/control/vendor/github.com/hashicorp/go-multierror/prefix.go b/src/control/vendor/github.com/hashicorp/go-multierror/prefix.go new file mode 100644 index 00000000000..5c477abe44f --- /dev/null +++ b/src/control/vendor/github.com/hashicorp/go-multierror/prefix.go @@ -0,0 +1,37 @@ +package multierror + +import ( + "fmt" + + "github.com/hashicorp/errwrap" +) + +// Prefix is a helper function that will prefix some text +// to the given error. If the error is a multierror.Error, then +// it will be prefixed to each wrapped error. +// +// This is useful to use when appending multiple multierrors +// together in order to give better scoping. +func Prefix(err error, prefix string) error { + if err == nil { + return nil + } + + format := fmt.Sprintf("%s {{err}}", prefix) + switch err := err.(type) { + case *Error: + // Typed nils can reach here, so initialize if we are nil + if err == nil { + err = new(Error) + } + + // Wrap each of the errors + for i, e := range err.Errors { + err.Errors[i] = errwrap.Wrapf(format, e) + } + + return err + default: + return errwrap.Wrapf(format, err) + } +} diff --git a/src/control/vendor/github.com/hashicorp/go-multierror/sort.go b/src/control/vendor/github.com/hashicorp/go-multierror/sort.go new file mode 100644 index 00000000000..fecb14e81c5 --- /dev/null +++ b/src/control/vendor/github.com/hashicorp/go-multierror/sort.go @@ -0,0 +1,16 @@ +package multierror + +// Len implements sort.Interface function for length +func (err Error) Len() int { + return len(err.Errors) +} + +// Swap implements sort.Interface function for swapping elements +func (err Error) Swap(i, j int) { + err.Errors[i], err.Errors[j] = err.Errors[j], err.Errors[i] +} + +// Less implements sort.Interface function for determining order +func (err Error) Less(i, j int) bool { + return err.Errors[i].Error() < err.Errors[j].Error() +} diff --git a/src/control/vendor/modules.txt b/src/control/vendor/modules.txt index 214c82eefa1..0b5f3ddb599 100644 --- a/src/control/vendor/modules.txt +++ b/src/control/vendor/modules.txt @@ -11,6 +11,21 @@ github.com/beorn7/perks/quantile # github.com/cespare/xxhash/v2 v2.2.0 ## explicit; go 1.11 github.com/cespare/xxhash/v2 +# github.com/desertbit/closer/v3 v3.1.2 +## explicit; go 1.12 +github.com/desertbit/closer/v3 +# github.com/desertbit/columnize v2.1.0+incompatible +## explicit +github.com/desertbit/columnize +# github.com/desertbit/go-shlex v0.1.1 +## explicit; go 1.14 +github.com/desertbit/go-shlex +# github.com/desertbit/grumble v1.1.3 +## explicit; go 1.12 +github.com/desertbit/grumble +# github.com/desertbit/readline v1.5.1 +## explicit; go 1.12 +github.com/desertbit/readline # github.com/dustin/go-humanize v1.0.0 ## explicit github.com/dustin/go-humanize @@ -37,6 +52,9 @@ github.com/google/go-cmp/cmp/internal/value # github.com/google/uuid v1.3.0 ## explicit github.com/google/uuid +# github.com/hashicorp/errwrap v1.1.0 +## explicit +github.com/hashicorp/errwrap # github.com/hashicorp/go-hclog v1.2.2 ## explicit; go 1.13 github.com/hashicorp/go-hclog @@ -46,6 +64,9 @@ github.com/hashicorp/go-immutable-radix # github.com/hashicorp/go-msgpack v1.1.5 ## explicit; go 1.13 github.com/hashicorp/go-msgpack/codec +# github.com/hashicorp/go-multierror v1.1.0 +## explicit; go 1.14 +github.com/hashicorp/go-multierror # github.com/hashicorp/go-uuid v1.0.1 ## explicit # github.com/hashicorp/golang-lru v0.5.4 diff --git a/src/ddb/README.md b/src/ddb/README.md new file mode 100644 index 00000000000..58ceabf9098 --- /dev/null +++ b/src/ddb/README.md @@ -0,0 +1,107 @@ +# DAOS Debug Tool (ddb) + +## Description + +The DAOS Debug Tool (ddb) allows a user to navigate through a file in the VOS +format. It is similar to debugfs for ext2/3/4 and offers both a command line and +interactive shell mode. + +## Design + +DDB will be developed on top of the VOS API which already supports interacting +with a VOS file. The vos_iterate api is heavily used to iterate and navigate +over a VOS tree. Function tables/pointers are used quite a bit as well for +injecting callbacks to vos_iterate which already uses a callback approach. It +also helps support unit testing of the different layers. The user interface is +written in golang to support a richer interactive shell and cli experience. The +golang code wraps c functions which do the heavy lifting of the command. + +### Layers + +The primary layers for the application are: + +#### CLI / User interface + +The golang interface which handles parsing most of the user input. The +github.com/jessevdk/go-flags module handles the user input from the command +line. This includes determining if the -R and -f options are passed and if a +path to a vos file was supplied. + +The github.com/desertbit/grumble module handles the execution of the commands, +whether from interactive mode or from the values of -R or -f. It also supplies +the interactive mode, managing history, input keys, etc. + +The golang code also calls the c code functions to initialize daos and vos. + +#### ddb commands (sub commands) + +The implementation of the individual commands that ddb supports. It +receives a command's options/arguments as a well defined structure (fields of +which are set by ddb). It interacts with a ddb/vos adapter layer for using the +VOS api. + +### ddb vos (dv_) + +This layer will adapt the needs of the ddb commands to the current VOS API +implementation, making the VOS interaction a bit nicer for ddb. + +# Help and Usage + +``` +$ ddb -h +Usage: + ddb [OPTIONS] [] + +The DAOS Debug Tool (ddb) allows a user to navigate through and modify +a file in the VOS format. It offers both a command line and interactive +shell mode. If the '-R' or '-f' options are not provided, then it will +run in interactive mode. In order to modify the file, the '-w' option +must be included. The optional will be opened before running +commands supplied by '-R' or '-f' or entering interactive mode. + +Application Options: + -R, --run_cmd= Execute the single command , then exit + -f, --file_cmd= Path to a file container a list of ddb commands, one + command per line, then exit. + -w, --write_mode Open the vos file in write mode. + +Help Options: + -h, --help Show this help message +``` + +Interactive mode help +``` +$ help + +The DAOS Debug Tool (ddb) allows a user to navigate through and modify +a file in the VOS format. In order to modify the file, the '-w' option must +be included when opening the vos file. + +Many of the commands take a vos tree path. The format for this path +is 'cont_uuid/obj_id/dkey/akey/recx'. The keys currently only support string +keys. The recx for array values is the format {lo-hi}. To make it easier to +navigate the tree, indexes can be used instead of the path part. The index +is in the format '[i]', for example '[0]/[0]/[0]' + +Commands: + clear clear the screen + clear_cmt_dtx Clear the dtx committed table + close Close the currently opened vos pool shard + commit_ilog Process the ilog + dtx_abort Mark the active dtx entry as aborted + dtx_commit Mark the active dtx entry as committed + dump_dtx Dump the dtx tables + dump_ilog Dump the ilog + dump_superblock Dump the pool superblock information + dump_value Dump a value to a file + dump_vea Dump information from the vea about free regions + exit exit the shell + help use 'help [command]' for command help + load Load a value to a vos path. + ls List containers, objects, dkeys, akeys, and values + open Opens the vos file at + rm Remove a branch of the VOS tree. + rm_ilog Remove all the ilog entries + smd_sync Restore the SMD file with backup from blob + update_vea Alter the VEA tree to mark a region as free. +``` \ No newline at end of file diff --git a/src/ddb/SConscript b/src/ddb/SConscript new file mode 100644 index 00000000000..3fd7878f494 --- /dev/null +++ b/src/ddb/SConscript @@ -0,0 +1,61 @@ +"""Build tests""" + + +def scons(): + """Execute build""" + Import('env', 'prereqs') + + if not prereqs.server_requested(): + return + + denv = env.Clone() + + libs = ['vos', 'daos_common_pmem', 'abt', 'gurt', 'uuid', 'bio', 'cart'] + # spdk libraries + libs += ['spdk_event', 'spdk_log'] + libs += ['spdk_bdev', 'spdk_blob', 'spdk_blob_bdev', 'spdk_json'] + libs += ['spdk_nvme', 'spdk_init', 'spdk_thread', 'spdk_log'] + libs += ['spdk_env_dpdk', 'spdk_thread', 'spdk_bdev', 'rte_mempool'] + libs += ['rte_mempool_ring', 'rte_bus_pci', 'rte_pci', 'rte_ring'] + libs += ['rte_mbuf', 'rte_eal', 'rte_kvargs', 'spdk_bdev_aio'] + libs += ['spdk_bdev_nvme', 'spdk_blob', 'spdk_nvme', 'spdk_util'] + libs += ['spdk_json', 'spdk_jsonrpc', 'spdk_rpc', 'spdk_trace'] + libs += ['spdk_sock', 'spdk_log', 'spdk_notify', 'spdk_blob_bdev'] + libs += ['spdk_vmd', 'spdk_event_bdev', 'spdk_init', 'rte_power'] + + src = ['ddb.c', + 'ddb_commands.c', + 'ddb_main.c', + 'ddb_parse.c', + 'ddb_tree_path.c', + 'ddb_printer.c', + 'ddb_vos.c', + 'ddb_spdk.c'] + + # Add runtime paths for daos libraries + denv.AppendUnique(LIBPATH=[Dir('.')]) + denv.AppendUnique(RPATH_FULL=['$PREFIX/lib64/daos_srv']) + denv.AppendUnique(RPATH_FULL=['$PREFIX/lib64']) + + denv.Append(CPPDEFINES={'DAOS_PMEM_BUILD': '1'}) + + # Because ddb is so heavily dependent on VOS, allow it to have some internal + # knowledge of it. + denv.AppendUnique(CPPPATH=[Dir('../vos/').srcnode()]) + denv.AppendUnique(CPPPATH=[Dir('../vos/')]) + # Also need some internal knowledge of bio to read the bio header from the + # blob stores + denv.AppendUnique(CPPPATH=[Dir('../bio/').srcnode()]) + denv.AppendUnique(CPPPATH=[Dir('../bio/')]) + + prereqs.require(denv, 'argobots', 'protobufc', 'pmdk', 'spdk') + + ddblib = denv.d_library('ddb', src, LIBS=libs) + denv.Install('$PREFIX/lib64/daos_srv/', ddblib) + + # tests + SConscript('tests/SConscript', exports=['denv']) + + +if __name__ == "SCons.Script": + scons() diff --git a/src/ddb/ddb.c b/src/ddb/ddb.c new file mode 100644 index 00000000000..8acb0bd60af --- /dev/null +++ b/src/ddb/ddb.c @@ -0,0 +1,1100 @@ +/** + * (C) Copyright 2022-2023 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ +#include +#include +#include + +#include "ddb.h" +#include "ddb_common.h" +#include "ddb_parse.h" + +#define MAX_COMMAND_LEN 1024 + +#define same(a, b) (strcmp((a), (b)) == 0) +#define COMMAND_NAME_HELP "help" +#define COMMAND_NAME_QUIT "quit" +#define COMMAND_NAME_LS "ls" +#define COMMAND_NAME_OPEN "open" +#define COMMAND_NAME_VERSION "version" +#define COMMAND_NAME_CLOSE "close" +#define COMMAND_NAME_SUPERBLOCK_DUMP "superblock_dump" +#define COMMAND_NAME_VALUE_DUMP "value_dump" +#define COMMAND_NAME_RM "rm" +#define COMMAND_NAME_VALUE_LOAD "value_load" +#define COMMAND_NAME_ILOG_DUMP "ilog_dump" +#define COMMAND_NAME_ILOG_COMMIT "ilog_commit" +#define COMMAND_NAME_ILOG_CLEAR "ilog_clear" +#define COMMAND_NAME_DTX_DUMP "dtx_dump" +#define COMMAND_NAME_DTX_CMT_CLEAR "dtx_cmt_clear" +#define COMMAND_NAME_SMD_SYNC "smd_sync" +#define COMMAND_NAME_VEA_DUMP "vea_dump" +#define COMMAND_NAME_VEA_UPDATE "vea_update" +#define COMMAND_NAME_DTX_ACT_COMMIT "dtx_act_commit" +#define COMMAND_NAME_DTX_ACT_ABORT "dtx_act_abort" + +/* Parse command line options for the 'ls' command */ +static int +ls_option_parse(struct ddb_ctx *ctx, struct ls_options *cmd_args, + uint32_t argc, char **argv) +{ + char *options_short = "rd"; + int index = 0, opt; + struct option options_long[] = { + { "recursive", no_argument, NULL, 'r' }, + { "details", no_argument, NULL, 'd' }, + { NULL } + }; + + memset(cmd_args, 0, sizeof(*cmd_args)); + + /* Restart getopt */ + optind = 1; + opterr = 0; + while ((opt = getopt_long(argc, argv, options_short, options_long, &index)) != -1) { + switch (opt) { + case 'r': + cmd_args->recursive = true; + break; + case 'd': + cmd_args->details = true; + break; + case '?': + ddb_printf(ctx, "Unknown option: '%c'\n", optopt); + default: + return -DER_INVAL; + } + } + + index = optind; + if (argc - index > 0) { + cmd_args->path = argv[index]; + index++; + } + + if (argc - index > 0) { + ddb_printf(ctx, "Unexpected argument: %s\n", argv[index]); + return -DER_INVAL; + } + + return 0; +} + +/* Parse command line options for the 'open' command */ +static int +open_option_parse(struct ddb_ctx *ctx, struct open_options *cmd_args, + uint32_t argc, char **argv) +{ + char *options_short = "w"; + int index = 0, opt; + struct option options_long[] = { + { "write_mode", no_argument, NULL, 'w' }, + { NULL } + }; + + memset(cmd_args, 0, sizeof(*cmd_args)); + + /* Restart getopt */ + optind = 1; + opterr = 0; + while ((opt = getopt_long(argc, argv, options_short, options_long, &index)) != -1) { + switch (opt) { + case 'w': + cmd_args->write_mode = true; + break; + case '?': + ddb_printf(ctx, "Unknown option: '%c'\n", optopt); + default: + return -DER_INVAL; + } + } + + index = optind; + if (argc - index > 0) { + cmd_args->path = argv[index]; + index++; + } else { + ddb_print(ctx, "Expected argument 'path'\n"); + return -DER_INVAL; + } + + if (argc - index > 0) { + ddb_printf(ctx, "Unexpected argument: %s\n", argv[index]); + return -DER_INVAL; + } + + return 0; +} + +/* Parse command line options for the 'value_dump' command */ +static int +value_dump_option_parse(struct ddb_ctx *ctx, struct value_dump_options *cmd_args, + uint32_t argc, char **argv) +{ + char *options_short = ""; + int index = 0; + struct option options_long[] = { + { NULL } + }; + + memset(cmd_args, 0, sizeof(*cmd_args)); + + /* Restart getopt */ + optind = 1; + opterr = 0; + if (getopt_long(argc, argv, options_short, options_long, &index) != -1) { + ddb_printf(ctx, "Unknown option: '%c'\n", optopt); + return -DER_INVAL; + } + + index = optind; + if (argc - index > 0) { + cmd_args->path = argv[index]; + index++; + } else { + ddb_print(ctx, "Expected argument 'path'\n"); + return -DER_INVAL; + } + if (argc - index > 0) { + cmd_args->dst = argv[index]; + index++; + } + + if (argc - index > 0) { + ddb_printf(ctx, "Unexpected argument: %s\n", argv[index]); + return -DER_INVAL; + } + + return 0; +} + +/* Parse command line options for the 'rm' command */ +static int +rm_option_parse(struct ddb_ctx *ctx, struct rm_options *cmd_args, + uint32_t argc, char **argv) +{ + char *options_short = ""; + int index = 0; + struct option options_long[] = { + { NULL } + }; + + memset(cmd_args, 0, sizeof(*cmd_args)); + + /* Restart getopt */ + optind = 1; + opterr = 0; + if (getopt_long(argc, argv, options_short, options_long, &index) != -1) { + ddb_printf(ctx, "Unknown option: '%c'\n", optopt); + return -DER_INVAL; + } + + index = optind; + if (argc - index > 0) { + cmd_args->path = argv[index]; + index++; + } else { + ddb_print(ctx, "Expected argument 'path'\n"); + return -DER_INVAL; + } + + if (argc - index > 0) { + ddb_printf(ctx, "Unexpected argument: %s\n", argv[index]); + return -DER_INVAL; + } + + return 0; +} + +/* Parse command line options for the 'value_load' command */ +static int +value_load_option_parse(struct ddb_ctx *ctx, struct value_load_options *cmd_args, + uint32_t argc, char **argv) +{ + char *options_short = ""; + int index = 0; + struct option options_long[] = { + { NULL } + }; + + memset(cmd_args, 0, sizeof(*cmd_args)); + + /* Restart getopt */ + optind = 1; + opterr = 0; + if (getopt_long(argc, argv, options_short, options_long, &index) != -1) { + ddb_printf(ctx, "Unknown option: '%c'\n", optopt); + return -DER_INVAL; + } + + index = optind; + if (argc - index > 0) { + cmd_args->src = argv[index]; + index++; + } else { + ddb_print(ctx, "Expected argument 'src'\n"); + return -DER_INVAL; + } + if (argc - index > 0) { + cmd_args->dst = argv[index]; + index++; + } else { + ddb_print(ctx, "Expected argument 'dst'\n"); + return -DER_INVAL; + } + + if (argc - index > 0) { + ddb_printf(ctx, "Unexpected argument: %s\n", argv[index]); + return -DER_INVAL; + } + + return 0; +} + +/* Parse command line options for the 'ilog_dump' command */ +static int +ilog_dump_option_parse(struct ddb_ctx *ctx, struct ilog_dump_options *cmd_args, + uint32_t argc, char **argv) +{ + char *options_short = ""; + int index = 0; + struct option options_long[] = { + { NULL } + }; + + memset(cmd_args, 0, sizeof(*cmd_args)); + + /* Restart getopt */ + optind = 1; + opterr = 0; + if (getopt_long(argc, argv, options_short, options_long, &index) != -1) { + ddb_printf(ctx, "Unknown option: '%c'\n", optopt); + return -DER_INVAL; + } + + index = optind; + if (argc - index > 0) { + cmd_args->path = argv[index]; + index++; + } else { + ddb_print(ctx, "Expected argument 'path'\n"); + return -DER_INVAL; + } + + if (argc - index > 0) { + ddb_printf(ctx, "Unexpected argument: %s\n", argv[index]); + return -DER_INVAL; + } + + return 0; +} + +/* Parse command line options for the 'ilog_commit' command */ +static int +ilog_commit_option_parse(struct ddb_ctx *ctx, struct ilog_commit_options *cmd_args, + uint32_t argc, char **argv) +{ + char *options_short = ""; + int index = 0; + struct option options_long[] = { + { NULL } + }; + + memset(cmd_args, 0, sizeof(*cmd_args)); + + /* Restart getopt */ + optind = 1; + opterr = 0; + if (getopt_long(argc, argv, options_short, options_long, &index) != -1) { + ddb_printf(ctx, "Unknown option: '%c'\n", optopt); + return -DER_INVAL; + } + + index = optind; + if (argc - index > 0) { + cmd_args->path = argv[index]; + index++; + } else { + ddb_print(ctx, "Expected argument 'path'\n"); + return -DER_INVAL; + } + + if (argc - index > 0) { + ddb_printf(ctx, "Unexpected argument: %s\n", argv[index]); + return -DER_INVAL; + } + + return 0; +} + +/* Parse command line options for the 'ilog_clear' command */ +static int +ilog_clear_option_parse(struct ddb_ctx *ctx, struct ilog_clear_options *cmd_args, + uint32_t argc, char **argv) +{ + char *options_short = ""; + int index = 0; + struct option options_long[] = { + { NULL } + }; + + memset(cmd_args, 0, sizeof(*cmd_args)); + + /* Restart getopt */ + optind = 1; + opterr = 0; + if (getopt_long(argc, argv, options_short, options_long, &index) != -1) { + ddb_printf(ctx, "Unknown option: '%c'\n", optopt); + return -DER_INVAL; + } + + index = optind; + if (argc - index > 0) { + cmd_args->path = argv[index]; + index++; + } else { + ddb_print(ctx, "Expected argument 'path'\n"); + return -DER_INVAL; + } + + if (argc - index > 0) { + ddb_printf(ctx, "Unexpected argument: %s\n", argv[index]); + return -DER_INVAL; + } + + return 0; +} + +/* Parse command line options for the 'dtx_dump' command */ +static int +dtx_dump_option_parse(struct ddb_ctx *ctx, struct dtx_dump_options *cmd_args, + uint32_t argc, char **argv) +{ + char *options_short = "ac"; + int index = 0, opt; + struct option options_long[] = { + { "active", no_argument, NULL, 'a' }, + { "committed", no_argument, NULL, 'c' }, + { NULL } + }; + + memset(cmd_args, 0, sizeof(*cmd_args)); + + /* Restart getopt */ + optind = 1; + opterr = 0; + while ((opt = getopt_long(argc, argv, options_short, options_long, &index)) != -1) { + switch (opt) { + case 'a': + cmd_args->active = true; + break; + case 'c': + cmd_args->committed = true; + break; + case '?': + ddb_printf(ctx, "Unknown option: '%c'\n", optopt); + default: + return -DER_INVAL; + } + } + + index = optind; + if (argc - index > 0) { + cmd_args->path = argv[index]; + index++; + } else { + ddb_print(ctx, "Expected argument 'path'\n"); + return -DER_INVAL; + } + + if (argc - index > 0) { + ddb_printf(ctx, "Unexpected argument: %s\n", argv[index]); + return -DER_INVAL; + } + + return 0; +} + +/* Parse command line options for the 'dtx_cmt_clear' command */ +static int +dtx_cmt_clear_option_parse(struct ddb_ctx *ctx, struct dtx_cmt_clear_options *cmd_args, + uint32_t argc, char **argv) +{ + char *options_short = ""; + int index = 0; + struct option options_long[] = { + { NULL } + }; + + memset(cmd_args, 0, sizeof(*cmd_args)); + + /* Restart getopt */ + optind = 1; + opterr = 0; + if (getopt_long(argc, argv, options_short, options_long, &index) != -1) { + ddb_printf(ctx, "Unknown option: '%c'\n", optopt); + return -DER_INVAL; + } + + index = optind; + if (argc - index > 0) { + cmd_args->path = argv[index]; + index++; + } else { + ddb_print(ctx, "Expected argument 'path'\n"); + return -DER_INVAL; + } + + if (argc - index > 0) { + ddb_printf(ctx, "Unexpected argument: %s\n", argv[index]); + return -DER_INVAL; + } + + return 0; +} + +/* Parse command line options for the 'smd_sync' command */ +static int +smd_sync_option_parse(struct ddb_ctx *ctx, struct smd_sync_options *cmd_args, + uint32_t argc, char **argv) +{ + char *options_short = ""; + int index = 0; + struct option options_long[] = { + { NULL } + }; + + memset(cmd_args, 0, sizeof(*cmd_args)); + + /* Restart getopt */ + optind = 1; + opterr = 0; + if (getopt_long(argc, argv, options_short, options_long, &index) != -1) { + ddb_printf(ctx, "Unknown option: '%c'\n", optopt); + return -DER_INVAL; + } + + index = optind; + if (argc - index > 0) { + cmd_args->nvme_conf = argv[index]; + index++; + } + if (argc - index > 0) { + cmd_args->db_path = argv[index]; + index++; + } + + if (argc - index > 0) { + ddb_printf(ctx, "Unexpected argument: %s\n", argv[index]); + return -DER_INVAL; + } + + return 0; +} + +/* Parse command line options for the 'vea_update' command */ +static int +vea_update_option_parse(struct ddb_ctx *ctx, struct vea_update_options *cmd_args, + uint32_t argc, char **argv) +{ + char *options_short = ""; + int index = 0; + struct option options_long[] = { + { NULL } + }; + + memset(cmd_args, 0, sizeof(*cmd_args)); + + /* Restart getopt */ + optind = 1; + opterr = 0; + if (getopt_long(argc, argv, options_short, options_long, &index) != -1) { + ddb_printf(ctx, "Unknown option: '%c'\n", optopt); + return -DER_INVAL; + } + + index = optind; + if (argc - index > 0) { + cmd_args->offset = argv[index]; + index++; + } else { + ddb_print(ctx, "Expected argument 'offset'\n"); + return -DER_INVAL; + } + if (argc - index > 0) { + cmd_args->blk_cnt = argv[index]; + index++; + } else { + ddb_print(ctx, "Expected argument 'blk_cnt'\n"); + return -DER_INVAL; + } + + if (argc - index > 0) { + ddb_printf(ctx, "Unexpected argument: %s\n", argv[index]); + return -DER_INVAL; + } + + return 0; +} + +/* Parse command line options for the 'dtx_act_commit' command */ +static int +dtx_act_commit_option_parse(struct ddb_ctx *ctx, struct dtx_act_commit_options *cmd_args, + uint32_t argc, char **argv) +{ + char *options_short = ""; + int index = 0; + struct option options_long[] = { + { NULL } + }; + + memset(cmd_args, 0, sizeof(*cmd_args)); + + /* Restart getopt */ + optind = 1; + opterr = 0; + if (getopt_long(argc, argv, options_short, options_long, &index) != -1) { + ddb_printf(ctx, "Unknown option: '%c'\n", optopt); + return -DER_INVAL; + } + + index = optind; + if (argc - index > 0) { + cmd_args->path = argv[index]; + index++; + } else { + ddb_print(ctx, "Expected argument 'path'\n"); + return -DER_INVAL; + } + if (argc - index > 0) { + cmd_args->dtx_id = argv[index]; + index++; + } else { + ddb_print(ctx, "Expected argument 'dtx_id'\n"); + return -DER_INVAL; + } + + if (argc - index > 0) { + ddb_printf(ctx, "Unexpected argument: %s\n", argv[index]); + return -DER_INVAL; + } + + return 0; +} + +/* Parse command line options for the 'dtx_act_abort' command */ +static int +dtx_act_abort_option_parse(struct ddb_ctx *ctx, struct dtx_act_abort_options *cmd_args, + uint32_t argc, char **argv) +{ + char *options_short = ""; + int index = 0; + struct option options_long[] = { + { NULL } + }; + + memset(cmd_args, 0, sizeof(*cmd_args)); + + /* Restart getopt */ + optind = 1; + opterr = 0; + if (getopt_long(argc, argv, options_short, options_long, &index) != -1) { + ddb_printf(ctx, "Unknown option: '%c'\n", optopt); + return -DER_INVAL; + } + + index = optind; + if (argc - index > 0) { + cmd_args->path = argv[index]; + index++; + } else { + ddb_print(ctx, "Expected argument 'path'\n"); + return -DER_INVAL; + } + if (argc - index > 0) { + cmd_args->dtx_id = argv[index]; + index++; + } else { + ddb_print(ctx, "Expected argument 'dtx_id'\n"); + return -DER_INVAL; + } + + if (argc - index > 0) { + ddb_printf(ctx, "Unexpected argument: %s\n", argv[index]); + return -DER_INVAL; + } + + return 0; +} + +int +ddb_parse_cmd_args(struct ddb_ctx *ctx, uint32_t argc, char **argv, struct ddb_cmd_info *info) +{ + char *cmd = argv[0]; + + if (same(cmd, COMMAND_NAME_HELP)) { + info->dci_cmd = DDB_CMD_HELP; + return 0; + } + if (same(cmd, COMMAND_NAME_QUIT)) { + info->dci_cmd = DDB_CMD_QUIT; + return 0; + } + if (same(cmd, COMMAND_NAME_LS)) { + info->dci_cmd = DDB_CMD_LS; + return ls_option_parse(ctx, &info->dci_cmd_option.dci_ls, + argc, argv); + } + if (same(cmd, COMMAND_NAME_OPEN)) { + info->dci_cmd = DDB_CMD_OPEN; + return open_option_parse(ctx, &info->dci_cmd_option.dci_open, + argc, argv); + } + if (same(cmd, COMMAND_NAME_VERSION)) { + info->dci_cmd = DDB_CMD_VERSION; + return 0; + } + if (same(cmd, COMMAND_NAME_CLOSE)) { + info->dci_cmd = DDB_CMD_CLOSE; + return 0; + } + if (same(cmd, COMMAND_NAME_SUPERBLOCK_DUMP)) { + info->dci_cmd = DDB_CMD_SUPERBLOCK_DUMP; + return 0; + } + if (same(cmd, COMMAND_NAME_VALUE_DUMP)) { + info->dci_cmd = DDB_CMD_VALUE_DUMP; + return value_dump_option_parse(ctx, &info->dci_cmd_option.dci_value_dump, + argc, argv); + } + if (same(cmd, COMMAND_NAME_RM)) { + info->dci_cmd = DDB_CMD_RM; + return rm_option_parse(ctx, &info->dci_cmd_option.dci_rm, + argc, argv); + } + if (same(cmd, COMMAND_NAME_VALUE_LOAD)) { + info->dci_cmd = DDB_CMD_VALUE_LOAD; + return value_load_option_parse(ctx, &info->dci_cmd_option.dci_value_load, + argc, argv); + } + if (same(cmd, COMMAND_NAME_ILOG_DUMP)) { + info->dci_cmd = DDB_CMD_ILOG_DUMP; + return ilog_dump_option_parse(ctx, &info->dci_cmd_option.dci_ilog_dump, + argc, argv); + } + if (same(cmd, COMMAND_NAME_ILOG_COMMIT)) { + info->dci_cmd = DDB_CMD_ILOG_COMMIT; + return ilog_commit_option_parse(ctx, &info->dci_cmd_option.dci_ilog_commit, + argc, argv); + } + if (same(cmd, COMMAND_NAME_ILOG_CLEAR)) { + info->dci_cmd = DDB_CMD_ILOG_CLEAR; + return ilog_clear_option_parse(ctx, &info->dci_cmd_option.dci_ilog_clear, + argc, argv); + } + if (same(cmd, COMMAND_NAME_DTX_DUMP)) { + info->dci_cmd = DDB_CMD_DTX_DUMP; + return dtx_dump_option_parse(ctx, &info->dci_cmd_option.dci_dtx_dump, + argc, argv); + } + if (same(cmd, COMMAND_NAME_DTX_CMT_CLEAR)) { + info->dci_cmd = DDB_CMD_DTX_CMT_CLEAR; + return dtx_cmt_clear_option_parse(ctx, &info->dci_cmd_option.dci_dtx_cmt_clear, + argc, argv); + } + if (same(cmd, COMMAND_NAME_SMD_SYNC)) { + info->dci_cmd = DDB_CMD_SMD_SYNC; + return smd_sync_option_parse(ctx, &info->dci_cmd_option.dci_smd_sync, + argc, argv); + } + if (same(cmd, COMMAND_NAME_VEA_DUMP)) { + info->dci_cmd = DDB_CMD_VEA_DUMP; + return 0; + } + if (same(cmd, COMMAND_NAME_VEA_UPDATE)) { + info->dci_cmd = DDB_CMD_VEA_UPDATE; + return vea_update_option_parse(ctx, &info->dci_cmd_option.dci_vea_update, + argc, argv); + } + if (same(cmd, COMMAND_NAME_DTX_ACT_COMMIT)) { + info->dci_cmd = DDB_CMD_DTX_ACT_COMMIT; + return dtx_act_commit_option_parse(ctx, &info->dci_cmd_option.dci_dtx_act_commit, + argc, argv); + } + if (same(cmd, COMMAND_NAME_DTX_ACT_ABORT)) { + info->dci_cmd = DDB_CMD_DTX_ACT_ABORT; + return dtx_act_abort_option_parse(ctx, &info->dci_cmd_option.dci_dtx_act_abort, + argc, argv); + } + + ddb_errorf(ctx, "'%s' is not a valid command. Available commands are:" + "'help', " + "'quit', " + "'ls', " + "'open', " + "'version', " + "'close', " + "'superblock_dump', " + "'value_dump', " + "'rm', " + "'value_load', " + "'ilog_dump', " + "'ilog_commit', " + "'ilog_clear', " + "'dtx_dump', " + "'dtx_cmt_clear', " + "'smd_sync', " + "'vea_dump', " + "'vea_update', " + "'dtx_act_commit', " + "'dtx_act_abort'\n", cmd); + + return -DER_INVAL; +} + +int +ddb_run_cmd(struct ddb_ctx *ctx, const char *cmd_str, bool write_mode) +{ + struct argv_parsed parse_args = {0}; + struct ddb_cmd_info info = {0}; + int rc; + char *cmd_copy; + + D_STRNDUP(cmd_copy, cmd_str, MAX_COMMAND_LEN); + + if (cmd_copy == NULL) + return -DER_NOMEM; + + /* Remove newline if needed */ + if (cmd_copy[strlen(cmd_copy) - 1] == '\n') + cmd_copy[strlen(cmd_copy) - 1] = '\0'; + + rc = ddb_str2argv_create(cmd_copy, &parse_args); + if (!SUCCESS(rc)) + D_GOTO(done, rc); + + if (parse_args.ap_argc == 0) { + D_ERROR("Nothing parsed\n"); + return -DER_INVAL; + } + + rc = ddb_parse_cmd_args(ctx, parse_args.ap_argc, parse_args.ap_argv, &info); + if (!SUCCESS(rc)) + D_GOTO(done, rc); + + switch (info.dci_cmd) { + + case DDB_CMD_HELP: + rc = ddb_run_help(ctx); + break; + + case DDB_CMD_QUIT: + rc = ddb_run_quit(ctx); + break; + + case DDB_CMD_LS: + rc = ddb_run_ls(ctx, &info.dci_cmd_option.dci_ls); + break; + + case DDB_CMD_OPEN: + rc = ddb_run_open(ctx, &info.dci_cmd_option.dci_open); + break; + + case DDB_CMD_VERSION: + rc = ddb_run_version(ctx); + break; + + case DDB_CMD_CLOSE: + rc = ddb_run_close(ctx); + break; + + case DDB_CMD_SUPERBLOCK_DUMP: + rc = ddb_run_superblock_dump(ctx); + break; + + case DDB_CMD_VALUE_DUMP: + rc = ddb_run_value_dump(ctx, &info.dci_cmd_option.dci_value_dump); + break; + + case DDB_CMD_RM: + rc = ddb_run_rm(ctx, &info.dci_cmd_option.dci_rm); + break; + + case DDB_CMD_VALUE_LOAD: + rc = ddb_run_value_load(ctx, &info.dci_cmd_option.dci_value_load); + break; + + case DDB_CMD_ILOG_DUMP: + rc = ddb_run_ilog_dump(ctx, &info.dci_cmd_option.dci_ilog_dump); + break; + + case DDB_CMD_ILOG_COMMIT: + rc = ddb_run_ilog_commit(ctx, &info.dci_cmd_option.dci_ilog_commit); + break; + + case DDB_CMD_ILOG_CLEAR: + rc = ddb_run_ilog_clear(ctx, &info.dci_cmd_option.dci_ilog_clear); + break; + + case DDB_CMD_DTX_DUMP: + rc = ddb_run_dtx_dump(ctx, &info.dci_cmd_option.dci_dtx_dump); + break; + + case DDB_CMD_DTX_CMT_CLEAR: + rc = ddb_run_dtx_cmt_clear(ctx, &info.dci_cmd_option.dci_dtx_cmt_clear); + break; + + case DDB_CMD_SMD_SYNC: + rc = ddb_run_smd_sync(ctx, &info.dci_cmd_option.dci_smd_sync); + break; + + case DDB_CMD_VEA_DUMP: + rc = ddb_run_vea_dump(ctx); + break; + + case DDB_CMD_VEA_UPDATE: + rc = ddb_run_vea_update(ctx, &info.dci_cmd_option.dci_vea_update); + break; + + case DDB_CMD_DTX_ACT_COMMIT: + rc = ddb_run_dtx_act_commit(ctx, &info.dci_cmd_option.dci_dtx_act_commit); + break; + + case DDB_CMD_DTX_ACT_ABORT: + rc = ddb_run_dtx_act_abort(ctx, &info.dci_cmd_option.dci_dtx_act_abort); + break; + + case DDB_CMD_UNKNOWN: + ddb_error(ctx, "Unknown command\n"); + rc = -DER_INVAL; + break; + } +done: + ddb_str2argv_free(&parse_args); + D_FREE(cmd_copy); + + return rc; +} + + +void +ddb_commands_help(struct ddb_ctx *ctx) +{ + /* Command: help */ + ddb_print(ctx, "help\n"); + ddb_print(ctx, "\tShow help message for all the commands.\n"); + ddb_print(ctx, "\n"); + + /* Command: quit */ + ddb_print(ctx, "quit\n"); + ddb_print(ctx, "\tQuit interactive mode\n"); + ddb_print(ctx, "\n"); + + /* Command: ls */ + ddb_print(ctx, "ls [path]\n"); + ddb_print(ctx, "\tList containers, objects, dkeys, akeys, and values\n"); + ddb_print(ctx, " [path]\n"); + ddb_print(ctx, "\tOptional, list contents of the provided path\n"); + ddb_print(ctx, "Options:\n"); + ddb_print(ctx, " -r, --recursive\n"); + ddb_print(ctx, "\tRecursively list the contents of the path\n"); + ddb_print(ctx, " -d, --details\n"); + ddb_print(ctx, "\tList more details of items in path\n"); + ddb_print(ctx, "\n"); + + /* Command: open */ + ddb_print(ctx, "open \n"); + ddb_print(ctx, "\tOpens the vos file at \n"); + ddb_print(ctx, " \n"); + ddb_print(ctx, "\tPath to the vos file to open.\n"); + ddb_print(ctx, "Options:\n"); + ddb_print(ctx, " -w, --write_mode\n"); + ddb_print(ctx, "\tOpen the vos file in write mode.\n"); + ddb_print(ctx, "\n"); + + /* Command: version */ + ddb_print(ctx, "version\n"); + ddb_print(ctx, "\tPrint ddb version\n"); + ddb_print(ctx, "\n"); + + /* Command: close */ + ddb_print(ctx, "close\n"); + ddb_print(ctx, "\tClose the currently opened vos pool shard\n"); + ddb_print(ctx, "\n"); + + /* Command: superblock_dump */ + ddb_print(ctx, "superblock_dump\n"); + ddb_print(ctx, "\tDump the pool superblock information\n"); + ddb_print(ctx, "\n"); + + /* Command: value_dump */ + ddb_print(ctx, "value_dump [dst]\n"); + ddb_print(ctx, "\tDump a value\n"); + ddb_print(ctx, " \n"); + ddb_print(ctx, "\tVOS tree path to dump.\n"); + ddb_print(ctx, " [dst]\n"); + ddb_print(ctx, "\tFile path to dump the value to.\n"); + ddb_print(ctx, "\n"); + + /* Command: rm */ + ddb_print(ctx, "rm \n"); + ddb_print(ctx, "\tRemove a branch of the VOS tree.\n"); + ddb_print(ctx, " \n"); + ddb_print(ctx, "\tVOS tree path to remove.\n"); + ddb_print(ctx, "\n"); + + /* Command: value_load */ + ddb_print(ctx, "value_load \n"); + ddb_print(ctx, "\tLoad a value to a vos path.\n"); + ddb_print(ctx, " \n"); + ddb_print(ctx, "\tSource file path.\n"); + ddb_print(ctx, " \n"); + ddb_print(ctx, "\tDestination vos tree path to a value.\n"); + ddb_print(ctx, "\n"); + + /* Command: ilog_dump */ + ddb_print(ctx, "ilog_dump \n"); + ddb_print(ctx, "\tDump the ilog\n"); + ddb_print(ctx, " \n"); + ddb_print(ctx, "\tVOS tree path to an object, dkey, or akey.\n"); + ddb_print(ctx, "\n"); + + /* Command: ilog_commit */ + ddb_print(ctx, "ilog_commit \n"); + ddb_print(ctx, "\tProcess the ilog\n"); + ddb_print(ctx, " \n"); + ddb_print(ctx, "\tVOS tree path to an object, dkey, or akey.\n"); + ddb_print(ctx, "\n"); + + /* Command: ilog_clear */ + ddb_print(ctx, "ilog_clear \n"); + ddb_print(ctx, "\tRemove all the ilog entries\n"); + ddb_print(ctx, " \n"); + ddb_print(ctx, "\tVOS tree path to an object, dkey, or akey.\n"); + ddb_print(ctx, "\n"); + + /* Command: dtx_dump */ + ddb_print(ctx, "dtx_dump \n"); + ddb_print(ctx, "\tDump the dtx tables\n"); + ddb_print(ctx, " \n"); + ddb_print(ctx, "\tVOS tree path to a container.\n"); + ddb_print(ctx, "Options:\n"); + ddb_print(ctx, " -a, --active\n"); + ddb_print(ctx, "\tOnly dump entries from the active table\n"); + ddb_print(ctx, " -c, --committed\n"); + ddb_print(ctx, "\tOnly dump entries from the committed table\n"); + ddb_print(ctx, "\n"); + + /* Command: dtx_cmt_clear */ + ddb_print(ctx, "dtx_cmt_clear \n"); + ddb_print(ctx, "\tClear the dtx committed table\n"); + ddb_print(ctx, " \n"); + ddb_print(ctx, "\tVOS tree path to a container.\n"); + ddb_print(ctx, "\n"); + + /* Command: smd_sync */ + ddb_print(ctx, "smd_sync [nvme_conf] [db_path]\n"); + ddb_print(ctx, "\tRestore the SMD file with backup from blob\n"); + ddb_print(ctx, " [nvme_conf]\n"); + ddb_print(ctx, "\tPath to the nvme conf file. (default /mnt/daos/daos_nvme.conf)\n"); + ddb_print(ctx, " [db_path]\n"); + ddb_print(ctx, "\tPath to the vos db. (default /mnt/daos)\n"); + ddb_print(ctx, "\n"); + + /* Command: vea_dump */ + ddb_print(ctx, "vea_dump\n"); + ddb_print(ctx, "\tDump information from the vea about free regions\n"); + ddb_print(ctx, "\n"); + + /* Command: vea_update */ + ddb_print(ctx, "vea_update \n"); + ddb_print(ctx, "\tAlter the VEA tree to mark a region as free.\n"); + ddb_print(ctx, " \n"); + ddb_print(ctx, "\tBlock offset of the region to mark free.\n"); + ddb_print(ctx, " \n"); + ddb_print(ctx, "\tTotal blocks of the region to mark free.\n"); + ddb_print(ctx, "\n"); + + /* Command: dtx_act_commit */ + ddb_print(ctx, "dtx_act_commit \n"); + ddb_print(ctx, "\tMark the active dtx entry as committed\n"); + ddb_print(ctx, " \n"); + ddb_print(ctx, "\tVOS tree path to a container.\n"); + ddb_print(ctx, " \n"); + ddb_print(ctx, "\tDTX id of the entry to commit.\n"); + ddb_print(ctx, "\n"); + + /* Command: dtx_act_abort */ + ddb_print(ctx, "dtx_act_abort \n"); + ddb_print(ctx, "\tMark the active dtx entry as aborted\n"); + ddb_print(ctx, " \n"); + ddb_print(ctx, "\tVOS tree path to a container.\n"); + ddb_print(ctx, " \n"); + ddb_print(ctx, "\tDTX id of the entry to abort.\n"); + ddb_print(ctx, "\n"); +} + +void +ddb_program_help(struct ddb_ctx *ctx) +{ + ddb_print(ctx, "The DAOS Debug Tool (ddb) allows a user to navigate through and modify\n" + "a file in the VOS format. It offers both a command line and interactive\n" + "shell mode. If the '-R' or '-f' options are not provided, then it will\n" + "run in interactive mode. In order to modify the file, the '-w' option\n" + "must be included.\n" + "\n" + "Many of the commands take a vos tree path. The format for this path\n" + "is [cont]/[obj]/[dkey]/[akey]/[extent]. The container is the container\n" + "uuid. The object is the object id. The keys parts currently only\n" + "support string keys and must be surrounded with a single quote (') unless\n" + "using indexes (explained later). The extent for array values is the\n" + "format {lo-hi}. To make it easier to navigate the tree, indexes can be\n" + "used instead of the path part. The index is in the format [i]\n"); + ddb_print(ctx, "\n"); + ddb_print(ctx, "Usage:\n"); + ddb_print(ctx, "ddb [path] [options]\n"); + ddb_print(ctx, "\n"); + ddb_print(ctx, " [path]\n"); + ddb_print(ctx, "\tPath to the vos file to open. This should be an absolute\n" + "\tpath to the pool shard. Part of the path is used to\n" + "\tdetermine what the pool uuid is. If a path is not provided\n" + "\tinitially, the open command can be used later to open the\n" + "\tvos file.\n"); + + ddb_print(ctx, "\nOptions:\n"); + ddb_print(ctx, " -w, --write_mode\n"); + ddb_print(ctx, "\tOpen the vos file in write mode. This allows for modifying\n" + "\tVOS file with the rm, load,\n" + "\tcommit_ilog, etc commands.\n"); + ddb_print(ctx, " -R, --run_cmd \n"); + ddb_print(ctx, "\tExecute the single command , then exit.\n"); + ddb_print(ctx, " -f, --file_cmd \n"); + ddb_print(ctx, "\tPath to a file container a list of ddb commands, one command\n" + "\tper line, then exit.\n"); + ddb_print(ctx, " -h, --help\n"); + ddb_print(ctx, "\tShow tool usage.\n"); + + ddb_print(ctx, "Commands:\n"); + ddb_print(ctx, " help Show help message for all the commands.\n"); + ddb_print(ctx, " quit Quit interactive mode\n"); + ddb_print(ctx, " ls List containers, objects, dkeys, akeys, and values\n"); + ddb_print(ctx, " open Opens the vos file at \n"); + ddb_print(ctx, " version Print ddb version\n"); + ddb_print(ctx, " close Close the currently opened vos pool shard\n"); + ddb_print(ctx, " superblock_dump Dump the pool superblock information\n"); + ddb_print(ctx, " value_dump Dump a value\n"); + ddb_print(ctx, " rm Remove a branch of the VOS tree.\n"); + ddb_print(ctx, " value_load Load a value to a vos path.\n"); + ddb_print(ctx, " ilog_dump Dump the ilog\n"); + ddb_print(ctx, " ilog_commit Process the ilog\n"); + ddb_print(ctx, " ilog_clear Remove all the ilog entries\n"); + ddb_print(ctx, " dtx_dump Dump the dtx tables\n"); + ddb_print(ctx, " dtx_cmt_clear Clear the dtx committed table\n"); + ddb_print(ctx, " smd_sync Restore the SMD file with backup from blob\n"); + ddb_print(ctx, " vea_dump Dump information from the vea about free regions\n"); + ddb_print(ctx, " vea_update Alter the VEA tree to mark a region as free.\n"); + ddb_print(ctx, " dtx_act_commit Mark the active dtx entry as committed\n"); + ddb_print(ctx, " dtx_act_abort Mark the active dtx entry as aborted\n"); +} \ No newline at end of file diff --git a/src/ddb/ddb.h b/src/ddb/ddb.h new file mode 100644 index 00000000000..5cae879368f --- /dev/null +++ b/src/ddb/ddb.h @@ -0,0 +1,236 @@ +/** + * (C) Copyright 2022 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ +#ifndef __DDB_RUN_CMDS_H +#define __DDB_RUN_CMDS_H + + +#include + +typedef int (*ddb_io_line_cb)(void *cb_args, char *line, uint32_t str_len); + +struct ddb_io_ft { + /** + * Print a message. + * + * @param fmt Typically printf string format + * @param ... Additional args will be formatted into the printed string + * @return Total number of characters written + */ + int (*ddb_print_message)(const char *fmt, ...); + + /** + * Print an error message. + * + * @param fmt Typically printf string format + * @param ... Additional args will be formatted into the printed string + * @return Total number of characters written + */ + int (*ddb_print_error)(const char *fmt, ...); + + /** + * Read a line from stdin and stores into buf. + * + * @param buf Pointer to an array where the string read is stored + * @param buf_len Length of buf + * @return On success the same buf parameter, else NULL + */ + char *(*ddb_get_input)(char *buf, uint32_t buf_len); + + /** + * Check if a file exists + * + * @param path Path to file to check + * @return true if the file exists, else false + */ + bool (*ddb_get_file_exists)(const char *path); + + /** + * Write the contents of the iov to a file + * + * @param dst_path File to write to + * @param contents Contents to be written + * @return 0 on success, else an error code + */ + int (*ddb_write_file)(const char *dst_path, d_iov_t *contents); + + /** + * Determine the size of a file at path + * @param path Path of file to check + * @return the size of the file at path in bytes + */ + size_t (*ddb_get_file_size)(const char *path); + + /** + * Read the contents of a file and store into the iov + * @param src_path Path of the file to read + * @param contents Where to load the contents of the file into + * @return number of bytes read from the src_path + */ + size_t (*ddb_read_file)(const char *src_path, d_iov_t *contents); + + /** + * Read contents of a file line by line. For each line, the line_cb will be called. + * @param path Path of the file to read + * @param line_cb Callback function used for each line + * @param cb_args Caller arguments passed to the callback function + * @return 0 on success, else an error code + */ + int (*ddb_get_lines)(const char *path, ddb_io_line_cb line_cb, void *cb_args); +}; + +struct ddb_ctx { + struct ddb_io_ft dc_io_ft; + daos_handle_t dc_poh; + bool dc_should_quit; + bool dc_write_mode; +}; + +void ddb_ctx_init(struct ddb_ctx *ctx); +int ddb_init(void); +void ddb_fini(void); + +enum ddb_cmd { + DDB_CMD_UNKNOWN = 0, + DDB_CMD_HELP = 1, + DDB_CMD_QUIT = 2, + DDB_CMD_LS = 3, + DDB_CMD_OPEN = 4, + DDB_CMD_VERSION = 5, + DDB_CMD_CLOSE = 6, + DDB_CMD_SUPERBLOCK_DUMP = 7, + DDB_CMD_VALUE_DUMP = 8, + DDB_CMD_RM = 9, + DDB_CMD_VALUE_LOAD = 10, + DDB_CMD_ILOG_DUMP = 11, + DDB_CMD_ILOG_COMMIT = 12, + DDB_CMD_ILOG_CLEAR = 13, + DDB_CMD_DTX_DUMP = 14, + DDB_CMD_DTX_CMT_CLEAR = 15, + DDB_CMD_SMD_SYNC = 16, + DDB_CMD_VEA_DUMP = 17, + DDB_CMD_VEA_UPDATE = 18, + DDB_CMD_DTX_ACT_COMMIT = 19, + DDB_CMD_DTX_ACT_ABORT = 20, +}; + +/* option and argument structures for commands that need them */ +struct ls_options { + bool recursive; + bool details; + char *path; +}; + +struct open_options { + bool write_mode; + char *path; +}; + +struct value_dump_options { + char *path; + char *dst; +}; + +struct rm_options { + char *path; +}; + +struct value_load_options { + char *src; + char *dst; +}; + +struct ilog_dump_options { + char *path; +}; + +struct ilog_commit_options { + char *path; +}; + +struct ilog_clear_options { + char *path; +}; + +struct dtx_dump_options { + bool active; + bool committed; + char *path; +}; + +struct dtx_cmt_clear_options { + char *path; +}; + +struct smd_sync_options { + char *nvme_conf; + char *db_path; +}; + +struct vea_update_options { + char *offset; + char *blk_cnt; +}; + +struct dtx_act_commit_options { + char *path; + char *dtx_id; +}; + +struct dtx_act_abort_options { + char *path; + char *dtx_id; +}; + +struct ddb_cmd_info { + enum ddb_cmd dci_cmd; + union { + struct ls_options dci_ls; + struct open_options dci_open; + struct value_dump_options dci_value_dump; + struct rm_options dci_rm; + struct value_load_options dci_value_load; + struct ilog_dump_options dci_ilog_dump; + struct ilog_commit_options dci_ilog_commit; + struct ilog_clear_options dci_ilog_clear; + struct dtx_dump_options dci_dtx_dump; + struct dtx_cmt_clear_options dci_dtx_cmt_clear; + struct smd_sync_options dci_smd_sync; + struct vea_update_options dci_vea_update; + struct dtx_act_commit_options dci_dtx_act_commit; + struct dtx_act_abort_options dci_dtx_act_abort; + } dci_cmd_option; +}; + +int ddb_parse_cmd_args(struct ddb_ctx *ctx, uint32_t argc, char **argv, struct ddb_cmd_info *info); +int ddb_run_cmd(struct ddb_ctx *ctx, const char *cmd_str, bool write_mode); +/* Run commands ... */ +int ddb_run_help(struct ddb_ctx *ctx); +int ddb_run_quit(struct ddb_ctx *ctx); +int ddb_run_ls(struct ddb_ctx *ctx, struct ls_options *opt); +bool ddb_pool_is_open(struct ddb_ctx *ctx); +int ddb_run_open(struct ddb_ctx *ctx, struct open_options *opt); +int ddb_run_version(struct ddb_ctx *ctx); +int ddb_run_close(struct ddb_ctx *ctx); +int ddb_run_superblock_dump(struct ddb_ctx *ctx); +int ddb_run_value_dump(struct ddb_ctx *ctx, struct value_dump_options *opt); +int ddb_run_rm(struct ddb_ctx *ctx, struct rm_options *opt); +int ddb_run_value_load(struct ddb_ctx *ctx, struct value_load_options *opt); +int ddb_run_ilog_dump(struct ddb_ctx *ctx, struct ilog_dump_options *opt); +int ddb_run_ilog_commit(struct ddb_ctx *ctx, struct ilog_commit_options *opt); +int ddb_run_ilog_clear(struct ddb_ctx *ctx, struct ilog_clear_options *opt); +int ddb_run_dtx_dump(struct ddb_ctx *ctx, struct dtx_dump_options *opt); +int ddb_run_dtx_cmt_clear(struct ddb_ctx *ctx, struct dtx_cmt_clear_options *opt); +int ddb_run_smd_sync(struct ddb_ctx *ctx, struct smd_sync_options *opt); +int ddb_run_vea_dump(struct ddb_ctx *ctx); +int ddb_run_vea_update(struct ddb_ctx *ctx, struct vea_update_options *opt); +int ddb_run_dtx_act_commit(struct ddb_ctx *ctx, struct dtx_act_commit_options *opt); +int ddb_run_dtx_act_abort(struct ddb_ctx *ctx, struct dtx_act_abort_options *opt); + + +void ddb_program_help(struct ddb_ctx *ctx); +void ddb_commands_help(struct ddb_ctx *ctx); + +#endif /* __DDB_RUN_CMDS_H */ diff --git a/src/ddb/ddb_commands.c b/src/ddb/ddb_commands.c new file mode 100644 index 00000000000..77fc72d9c17 --- /dev/null +++ b/src/ddb/ddb_commands.c @@ -0,0 +1,992 @@ +/** + * (C) Copyright 2022-2023 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ + +#include +#include "ddb_common.h" +#include "ddb_parse.h" +#include "ddb.h" +#include "ddb_vos.h" +#include "ddb_printer.h" +#include "daos.h" +#include "ddb_tree_path.h" + +#define ilog_path_required_error_message "Path to object, dkey, or akey required\n" +#define error_msg_write_mode_only "Can only modify the VOS tree in 'write mode'\n" + +int +ddb_run_version(struct ddb_ctx *ctx) +{ + ddb_printf(ctx, "ddb version %d.%d.%d\n", + DAOS_VERSION_MAJOR, + DAOS_VERSION_MINOR, + DAOS_VERSION_FIX); + + return 0; +} + +int +ddb_run_help(struct ddb_ctx *ctx) +{ + ddb_commands_help(ctx); + + return 0; +} + +int +ddb_run_quit(struct ddb_ctx *ctx) +{ + ctx->dc_should_quit = true; + return 0; +} + +bool +ddb_pool_is_open(struct ddb_ctx *ctx) +{ + return daos_handle_is_valid(ctx->dc_poh); +} + +int +ddb_run_open(struct ddb_ctx *ctx, struct open_options *opt) +{ + if (ddb_pool_is_open(ctx)) { + ddb_error(ctx, "Must close pool before can open another\n"); + return -DER_EXIST; + } + ctx->dc_write_mode = opt->write_mode; + return dv_pool_open(opt->path, &ctx->dc_poh); +} + +int +ddb_run_close(struct ddb_ctx *ctx) +{ + int rc; + + if (!ddb_pool_is_open(ctx)) { + ddb_error(ctx, "No pool open to close\n"); + return 0; + } + + rc = dv_pool_close(ctx->dc_poh); + ctx->dc_poh = DAOS_HDL_INVAL; + ctx->dc_write_mode = false; + + return rc; +} + +struct ls_ctx { + struct ddb_ctx *ctx; + bool has_cont; + bool has_obj; + bool has_dkey; + bool has_akey; + bool print_details; +}; + +#define DF_IDX "[%d]" +#define DP_IDX(idx) idx + +static int +init_path(struct ddb_ctx *ctx, char *path, struct dv_indexed_tree_path *itp) +{ + int rc; + + memset(itp, 0, sizeof(*itp)); + + rc = itp_parse(path, itp); + if (!SUCCESS(rc)) + return itp_handle_path_parse_error(ctx, rc); + + rc = dv_path_verify(ctx->dc_poh, itp); + if (!SUCCESS(rc)) + return itp_handle_path_parse_error(ctx, rc); + return 0; +} + +static int +ls_cont_handler(struct ddb_cont *cont, void *args) +{ + struct ls_ctx *ctx = args; + + ctx->has_cont = true; + if (ctx->print_details) + ddb_print_cont(ctx->ctx, cont); + else + ddb_print_path(ctx->ctx, cont->ddbc_path, 0); + + return 0; +} + +static int +ls_obj_handler(struct ddb_obj *obj, void *args) +{ + struct ls_ctx *ctx = args; + + ctx->has_obj = true; + if (ctx->print_details) + ddb_print_obj(ctx->ctx, obj, ctx->has_cont); + else + ddb_print_path(ctx->ctx, obj->ddbo_path, ctx->has_cont); + + return 0; +} + +static int +ls_dkey_handler(struct ddb_key *key, void *args) +{ + struct ls_ctx *ctx = args; + int indent = ctx->has_cont + ctx->has_obj; + + ctx->has_dkey = true; + if (ctx->print_details) + ddb_print_key(ctx->ctx, key, indent); + else + ddb_print_path(ctx->ctx, key->ddbk_path, indent); + + return 0; +} + +static int +ls_akey_handler(struct ddb_key *key, void *args) +{ + struct ls_ctx *ctx = args; + int indent = ctx->has_cont + ctx->has_obj + ctx->has_dkey; + + ctx->has_akey = true; + if (ctx->print_details) + ddb_print_key(ctx->ctx, key, indent); + else + ddb_print_path(ctx->ctx, key->ddbk_path, indent); + + return 0; +} + +static int +ls_sv_handler(struct ddb_sv *val, void *args) +{ + struct ls_ctx *ctx = args; + int indent = ctx->has_cont + ctx->has_obj + ctx->has_dkey + ctx->has_akey; + + if (ctx->print_details) + ddb_print_sv(ctx->ctx, val, indent); + else + ddb_print_path(ctx->ctx, val->ddbs_path, indent); + return 0; +} + +static int +ls_array_handler(struct ddb_array *val, void *args) +{ + struct ls_ctx *ctx = args; + int indent = ctx->has_cont + ctx->has_obj + ctx->has_dkey + ctx->has_akey; + + if (ctx->print_details) + ddb_print_array(ctx->ctx, val, indent); + else + ddb_print_path(ctx->ctx, val->ddba_path, indent); + return 0; +} + +static struct vos_tree_handlers handlers = { + .ddb_cont_handler = ls_cont_handler, + .ddb_obj_handler = ls_obj_handler, + .ddb_dkey_handler = ls_dkey_handler, + .ddb_akey_handler = ls_akey_handler, + .ddb_array_handler = ls_array_handler, + .ddb_sv_handler = ls_sv_handler, +}; + +int +ddb_run_ls(struct ddb_ctx *ctx, struct ls_options *opt) +{ + int rc; + struct dv_indexed_tree_path itp = {0}; + struct dv_tree_path vtp; + struct ls_ctx lsctx = {0}; + + if (daos_handle_is_inval(ctx->dc_poh)) { + ddb_error(ctx, "Not connected to a pool. Use 'open' to connect to a pool.\n"); + return -DER_NONEXIST; + } + rc = init_path(ctx, opt->path, &itp); + + if (!SUCCESS(rc)) + return rc; + + itp_to_vos_path(&itp, &vtp); + + ddb_print(ctx, "Listing contents of '"); + itp_print_full(ctx, &itp); + ddb_print(ctx, "'\n"); + if (!SUCCESS(ddb_vtp_verify(ctx->dc_poh, &vtp))) { + ddb_print(ctx, "Not a valid path\n"); + itp_free(&itp); + return -DER_NONEXIST; + } + + if (itp_has_recx_complete(&itp)) { + itp_free(&itp); + /* recx doesn't actually have anything under it. */ + return 0; + } + lsctx.print_details = opt->details; + lsctx.ctx = ctx; + rc = dv_iterate(ctx->dc_poh, &vtp, opt->recursive, &handlers, &lsctx, &itp); + + itp_free(&itp); + + return rc; +} + +static int +print_superblock_cb(void *cb_arg, struct ddb_superblock *sb) +{ + struct ddb_ctx *ctx = cb_arg; + + ddb_print_superblock(ctx, sb); + + return 0; +} + +int +ddb_run_superblock_dump(struct ddb_ctx *ctx) +{ + int rc; + + rc = dv_superblock(ctx->dc_poh, print_superblock_cb, ctx); + + if (rc == -DER_DF_INVAL) + ddb_error(ctx, "Error with pool superblock"); + + return rc; +} + +struct dump_value_args { + struct ddb_ctx *dva_ctx; + struct dv_indexed_tree_path *dva_vtp; + char *dva_dst_path; +}; + +static int +print_value_cb(void *cb_args, d_iov_t *value) +{ + struct dump_value_args *args = cb_args; + struct ddb_ctx *ctx = args->dva_ctx; + char buf[256]; + + if (value->iov_len == 0) { + ddb_print(ctx, "No value at: "); + itp_print_full(ctx, args->dva_vtp); + ddb_print(ctx, "\n"); + return 0; + } + + ddb_iov_to_printable_buf(value, buf, ARRAY_SIZE(buf)); + ddb_printf(ctx, "Value (size: %lu):\n", value->iov_len); + ddb_printf(ctx, "%s\n", buf); + return 0; +} + +static int +write_file_value_cb(void *cb_args, d_iov_t *value) +{ + struct dump_value_args *args = cb_args; + struct ddb_ctx *ctx = args->dva_ctx; + + D_ASSERT(ctx->dc_io_ft.ddb_write_file); + + if (value->iov_len == 0) { + ddb_print(ctx, "No value at: "); + itp_print_full(ctx, args->dva_vtp); + ddb_print(ctx, "\n"); + + return 0; + } + + ddb_printf(ctx, "Dumping value (size: %lu) to: %s\n", + value->iov_len, args->dva_dst_path); + + return ctx->dc_io_ft.ddb_write_file(args->dva_dst_path, value); +} + +int +ddb_run_value_dump(struct ddb_ctx *ctx, struct value_dump_options *opt) +{ + struct dv_indexed_tree_path itp = {0}; + struct dv_tree_path vtp; + struct dump_value_args dva = {0}; + dv_dump_value_cb cb = NULL; + int rc; + + if (!opt->path) { + ddb_error(ctx, "A VOS path to dump is required.\n"); + return -DER_INVAL; + } + + rc = init_path(ctx, opt->path, &itp); + if (!SUCCESS(rc)) + return rc; + + itp_print_full(ctx, &itp); + ddb_print(ctx, "\n"); + + if (!itp_has_value(&itp)) { + ddb_errorf(ctx, "Path [%s] is incomplete.\n", opt->path); + itp_free(&itp); + return -DDBER_INCOMPLETE_PATH_VALUE; + } + + if (opt->dst && strlen(opt->dst) > 0) + cb = write_file_value_cb; + else + cb = print_value_cb; + + dva.dva_dst_path = opt->dst; + dva.dva_ctx = ctx; + dva.dva_vtp = &itp; + + itp_to_vos_path(&itp, &vtp); + + rc = dv_dump_value(ctx->dc_poh, &vtp, cb, &dva); + itp_free(&itp); + + return rc; +} + +static int +dump_ilog_entry_cb(void *cb_arg, struct ddb_ilog_entry *entry) +{ + struct ddb_ctx *ctx = cb_arg; + + ddb_print_ilog_entry(ctx, entry); + + return 0; +} + +int +ddb_run_ilog_dump(struct ddb_ctx *ctx, struct ilog_dump_options *opt) +{ + struct dv_indexed_tree_path itp = {0}; + daos_handle_t coh; + int rc; + + if (!opt->path) { + ddb_error(ctx, ilog_path_required_error_message); + return -DER_INVAL; + } + + rc = init_path(ctx, opt->path, &itp); + if (!SUCCESS(rc)) + return rc; + itp_print_full(ctx, &itp); + ddb_print(ctx, "\n"); + + if (!itp_has_cont(&itp)) { + ddb_error(ctx, ilog_path_required_error_message); + return -DER_INVAL; + } + + rc = dv_cont_open(ctx->dc_poh, itp_cont(&itp), &coh); + if (!SUCCESS(rc)) { + itp_free(&itp); + return rc; + } + + if (itp_has_akey(&itp)) { + rc = dv_get_key_ilog_entries(coh, *itp_oid(&itp), itp_dkey(&itp), itp_akey(&itp), + dump_ilog_entry_cb, ctx); + } else if (itp_has_dkey(&itp)) { + rc = dv_get_key_ilog_entries(coh, *itp_oid(&itp), itp_dkey(&itp), NULL, + dump_ilog_entry_cb, ctx); + } else if (itp_has_obj(&itp)) { + rc = dv_get_obj_ilog_entries(coh, *itp_oid(&itp), dump_ilog_entry_cb, ctx); + } else { + ddb_error(ctx, ilog_path_required_error_message); + rc = -DER_INVAL; + } + + dv_cont_close(&coh); + itp_free(&itp); + + return rc; +} + +struct dtx_cb_args { + struct ddb_ctx *ctx; + uint32_t entry_count; +}; + +static int +active_dtx_cb(struct dv_dtx_active_entry *entry, void *cb_arg) +{ + struct dtx_cb_args *args = cb_arg; + + ddb_print_dtx_active(args->ctx, entry); + args->entry_count++; + + return 0; +} + +static int +committed_cb(struct dv_dtx_committed_entry *entry, void *cb_arg) +{ + struct dtx_cb_args *args = cb_arg; + + ddb_print_dtx_committed(args->ctx, entry); + args->entry_count++; + + return 0; +} + +int +ddb_run_dtx_dump(struct ddb_ctx *ctx, struct dtx_dump_options *opt) +{ + struct dv_indexed_tree_path itp; + int rc; + daos_handle_t coh; + bool both = !(opt->committed ^ opt->active); + struct dtx_cb_args args = {.ctx = ctx, .entry_count = 0}; + + rc = init_path(ctx, opt->path, &itp); + if (!SUCCESS(rc)) + return rc; + + if (!itp_has_cont(&itp)) { + ddb_error(ctx, "Path to object is required.\n"); + itp_free(&itp); + return -DER_INVAL; + } + + rc = dv_cont_open(ctx->dc_poh, itp_cont(&itp), &coh); + if (!SUCCESS(rc)) { + itp_free(&itp); + return rc; + } + + itp_print_full(ctx, &itp); + ddb_print(ctx, "\n"); + + if (both || opt->active) { + ddb_print(ctx, "Active Transactions:\n"); + rc = dv_dtx_get_act_table(coh, active_dtx_cb, &args); + if (!SUCCESS(rc)) { + itp_free(&itp); + return rc; + } + ddb_printf(ctx, "%d Active Entries\n", args.entry_count); + } + if (both || opt->committed) { + args.entry_count = 0; + ddb_print(ctx, "Committed Transactions:\n"); + rc = dv_dtx_get_cmt_table(coh, committed_cb, &args); + if (!SUCCESS(rc)) { + itp_free(&itp); + return rc; + } + ddb_printf(ctx, "%d Committed Entries\n", args.entry_count); + } + + dv_cont_close(&coh); + itp_free(&itp); + + return 0; +} + +int +ddb_run_rm(struct ddb_ctx *ctx, struct rm_options *opt) +{ + struct dv_indexed_tree_path itp; + struct dv_tree_path vtp; + int rc; + + if (!ctx->dc_write_mode) { + ddb_error(ctx, error_msg_write_mode_only); + return -DER_INVAL; + } + + rc = init_path(ctx, opt->path, &itp); + + if (!SUCCESS(rc)) + return rc; + itp_to_vos_path(&itp, &vtp); + + rc = dv_delete(ctx->dc_poh, &vtp); + + if (!SUCCESS(rc)) { + ddb_errorf(ctx, "Error: "DF_RC"\n", DP_RC(rc)); + itp_free(&itp); + + return rc; + } + + itp_print_full(ctx, &itp); + ddb_print(ctx, " deleted\n"); + + itp_free(&itp); + + return 0; +} + +int +ddb_run_value_load(struct ddb_ctx *ctx, struct value_load_options *opt) +{ + struct dv_indexed_tree_path itp = {0}; + struct dv_tree_path vtp = {0}; + d_iov_t iov = {0}; + size_t file_size; + int rc; + + if (!ctx->dc_write_mode) { + ddb_error(ctx, error_msg_write_mode_only); + return -DER_INVAL; + } + + rc = init_path(ctx, opt->dst, &itp); + + if (!SUCCESS(rc)) { + /* It's okay that the path doesn't exist as long as the container does */ + if (itp_has_cont_complete(&itp)) { + rc = 0; + } else { + D_ERROR("Must at least have a valid container\n"); + return -DDBER_INVALID_CONT; + } + } + + itp_print_full(ctx, &itp); + ddb_print(ctx, "\n"); + + if (!ctx->dc_io_ft.ddb_get_file_exists(opt->src)) { + ddb_errorf(ctx, "Unable to access '%s'\n", opt->src); + D_GOTO(done, rc = -DER_INVAL); + } + + file_size = ctx->dc_io_ft.ddb_get_file_size(opt->src); + if (file_size == 0) + D_GOTO(done, rc = -DER_INVAL); + rc = daos_iov_alloc(&iov, file_size, false); + if (!SUCCESS(rc)) { + ddb_errorf(ctx, "System error: "DF_RC"\n", DP_RC(rc)); + D_GOTO(done, rc); + } + + rc = (int)ctx->dc_io_ft.ddb_read_file(opt->src, &iov); + if (rc < 0) { + ddb_errorf(ctx, "System error: "DF_RC"\n", DP_RC(rc)); + D_GOTO(done, rc); + } else if (!(rc == iov.iov_buf_len && rc == iov.iov_len)) { + D_ERROR("Bytes read from file does not match results from get file size\n"); + D_GOTO(done, rc = -DER_UNKNOWN); + } + + itp_to_vos_path(&itp, &vtp); + rc = dv_update(ctx->dc_poh, &vtp, &iov); + if (!SUCCESS(rc)) { + ddb_errorf(ctx, "Unable to update path: "DF_RC"\n", DP_RC(rc)); + D_GOTO(done, rc); + } + +done: + daos_iov_free(&iov); + itp_free(&itp); + + if (SUCCESS(rc)) + ddb_printf(ctx, "Successfully loaded file '%s'\n", opt->src); + + return rc; +} + +static int +process_ilog_op(struct ddb_ctx *ctx, char *path, enum ddb_ilog_op op) +{ + struct dv_indexed_tree_path itp = {0}; + daos_handle_t coh = {0}; + int rc; + + if (!ctx->dc_write_mode) { + ddb_error(ctx, error_msg_write_mode_only); + return -DER_INVAL; + } + + if (path == NULL) { + ddb_error(ctx, ilog_path_required_error_message); + return -DER_INVAL; + } + + rc = init_path(ctx, path, &itp); + + if (!SUCCESS(rc)) + return rc; + itp_print_full(ctx, &itp); + ddb_print(ctx, "\n"); + + if (!itp_has_cont(&itp)) { + ddb_error(ctx, ilog_path_required_error_message); + return -DER_INVAL; + } + + rc = dv_cont_open(ctx->dc_poh, itp_cont(&itp), &coh); + if (!SUCCESS(rc)) { + itp_free(&itp); + return rc; + } + + if (itp_has_akey(&itp)) { + rc = dv_process_key_ilog_entries(coh, *itp_oid(&itp), itp_dkey(&itp), + itp_akey(&itp), op); + } else if (itp_has_dkey(&itp)) { + rc = dv_process_key_ilog_entries(coh, *itp_oid(&itp), itp_dkey(&itp), NULL, op); + } else if (itp_has_obj(&itp)) { + rc = dv_process_obj_ilog_entries(coh, *itp_oid(&itp), op); + } else { + ddb_error(ctx, ilog_path_required_error_message); + rc = -DER_INVAL; + } + + dv_cont_close(&coh); + itp_free(&itp); + + if (SUCCESS(rc)) + ddb_print(ctx, "Done\n"); + else + ddb_errorf(ctx, "Failed to %s ilogs: "DF_RC"\n", + op == DDB_ILOG_OP_ABORT ? "abort" : "persist", DP_RC(rc)); + return rc; +} + +int +ddb_run_ilog_clear(struct ddb_ctx *ctx, struct ilog_clear_options *opt) +{ + return process_ilog_op(ctx, opt->path, DDB_ILOG_OP_ABORT); +} + +int +ddb_run_ilog_commit(struct ddb_ctx *ctx, struct ilog_commit_options *opt) +{ + return process_ilog_op(ctx, opt->path, DDB_ILOG_OP_PERSIST); +} + +int +ddb_run_dtx_cmt_clear(struct ddb_ctx *ctx, struct dtx_cmt_clear_options *opt) +{ + struct dv_indexed_tree_path itp = {0}; + daos_handle_t coh = {0}; + int rc; + + if (!ctx->dc_write_mode) { + ddb_error(ctx, error_msg_write_mode_only); + return -DER_INVAL; + } + + if (opt->path == NULL) { + ddb_error(ctx, "path is required\n"); + return -DER_INVAL; + } + + rc = init_path(ctx, opt->path, &itp); + if (!SUCCESS(rc)) + D_GOTO(done, rc); + itp_print_full(ctx, &itp); + ddb_print(ctx, "\n"); + + if (!itp_has_cont(&itp)) + D_GOTO(done, rc = -DER_INVAL); + + rc = dv_cont_open(ctx->dc_poh, itp_cont(&itp), &coh); + if (!SUCCESS(rc)) + D_GOTO(done, rc); + + rc = dv_dtx_clear_cmt_table(coh); + if (rc < 0) + D_GOTO(done, rc); + + ddb_printf(ctx, "Cleared %d dtx committed entries\n", rc); + rc = 0; + +done: + itp_free(&itp); + dv_cont_close(&coh); + return rc; +} + +static int +sync_smd_cb(void *cb_args, uuid_t pool_id, uint32_t vos_id, uint64_t blob_id, + daos_size_t blob_size, uuid_t dev_id) +{ + struct ddb_ctx *ctx = cb_args; + + ddb_printf(ctx, "> Sync Info - pool: "DF_UUIDF", target id: %d, blob id: %lu, " + "blob_size: %lu\n", DP_UUID(pool_id), + vos_id, blob_id, blob_size); + ddb_printf(ctx, "> Sync Info - dev: "DF_UUIDF", target id: %d\n", DP_UUID(dev_id), vos_id); + + return 0; +} + +int +ddb_run_smd_sync(struct ddb_ctx *ctx, struct smd_sync_options *opt) +{ + /* Some defaults */ + char nvme_conf[256] = "/mnt/daos/daos_nvme.conf"; + char db_path[256] = "/mnt/daos"; + int rc; + + if (daos_handle_is_valid(ctx->dc_poh)) { + ddb_print(ctx, "Close pool connection before attempting to sync smd\n"); + return -DER_INVAL; + } + + if (opt->nvme_conf != NULL && strlen(opt->nvme_conf) > 0) + strncpy(nvme_conf, opt->nvme_conf, ARRAY_SIZE(nvme_conf) - 1); + if (opt->db_path != NULL && strlen(opt->db_path) > 0) + strncpy(db_path, opt->db_path, ARRAY_SIZE(db_path) - 1); + + ddb_printf(ctx, "Using nvme config file: '%s' and smd db path: '%s'\n", nvme_conf, db_path); + rc = dv_sync_smd(nvme_conf, db_path, sync_smd_cb, ctx); + ddb_printf(ctx, "Done: "DF_RC"\n", DP_RC(rc)); + return rc; +} + +struct dump_vea_cb_args { + struct ddb_ctx *dva_ctx; + uint32_t dva_count; +}; + +static int +dump_vea_cb(void *cb_arg, struct vea_free_extent *vfe) +{ + struct dump_vea_cb_args *args = cb_arg; + + ddb_printf(args->dva_ctx, "[Region %d] offset: %lu, block count: %d, age: %d\n", + args->dva_count, + vfe->vfe_blk_off, + vfe->vfe_blk_cnt, + vfe->vfe_age); + + args->dva_count++; + return 0; +} + +int +ddb_run_vea_dump(struct ddb_ctx *ctx) +{ + struct dump_vea_cb_args args = {.dva_ctx = ctx, .dva_count = 0}; + int rc; + + rc = dv_enumerate_vea(ctx->dc_poh, dump_vea_cb, &args); + + ddb_printf(ctx, "Total Free Regions: %d\n", args.dva_count); + + return rc; +} + +static int +parse_uint32_t(char *str) +{ + uint32_t result = atoi(str); + char verify_str[32]; + + snprintf(verify_str, ARRAY_SIZE(verify_str), "%d", result); + + if (strcmp(str, verify_str) == 0) + return result; + + return -DER_INVAL; + +} + +struct update_vea_verify_region_cb_args { + struct ddb_ctx *ctx; + struct vea_free_extent potential_extent; +}; + +/** + * + * @param n - new extent to insert or update + * @param e - existing extent + * @return + */ +static bool +vfe_overlap(struct vea_free_extent *n, struct vea_free_extent *e) +{ + uint64_t a_lo = n->vfe_blk_off; + uint64_t a_hi = n->vfe_blk_off + n->vfe_blk_cnt - 1; + uint64_t b_lo = e->vfe_blk_off; + uint64_t b_hi = e->vfe_blk_off + e->vfe_blk_cnt - 1; + + return !(a_hi < b_lo || a_lo > b_hi); +} + +static int +update_vea_verify_region_cb(void *cb_arg, struct vea_free_extent *vfe) +{ + struct update_vea_verify_region_cb_args *args = cb_arg; + + if (vfe_overlap(vfe, &args->potential_extent)) { + ddb_errorf(args->ctx, "New free region {%lu, %d} overlaps with {%lu, %d}\n", + args->potential_extent.vfe_blk_off, + args->potential_extent.vfe_blk_cnt, + vfe->vfe_blk_off, vfe->vfe_blk_cnt); + return -DER_INVAL; + } + + return 0; +} + +static int +verify_free(struct ddb_ctx *ctx, uint64_t offset, uint32_t blk_cnt) +{ + struct update_vea_verify_region_cb_args args = {0}; + + args.potential_extent.vfe_blk_off = offset; + args.potential_extent.vfe_blk_cnt = blk_cnt; + args.ctx = ctx; + return dv_enumerate_vea(ctx->dc_poh, update_vea_verify_region_cb, &args); +} + +int +ddb_run_vea_update(struct ddb_ctx *ctx, struct vea_update_options *opt) +{ + uint64_t offset; + uint32_t blk_cnt; + int rc; + + if (!ctx->dc_write_mode) { + ddb_error(ctx, error_msg_write_mode_only); + return -DER_INVAL; + } + + offset = parse_uint32_t(opt->offset); + if (offset <= 0) { + ddb_errorf(ctx, "'%s' is not a valid offset\n", opt->offset); + return -DER_INVAL; + } + blk_cnt = parse_uint32_t(opt->blk_cnt); + if (blk_cnt <= 0) { + ddb_errorf(ctx, "'%s' is not a valid block size\n", opt->blk_cnt); + return -DER_INVAL; + } + + rc = verify_free(ctx, offset, blk_cnt); + if (!SUCCESS(rc)) + return rc; + + ddb_printf(ctx, "Adding free region to vea {%lu, %d}\n", offset, blk_cnt); + rc = dv_vea_free_region(ctx->dc_poh, offset, blk_cnt); + if (!SUCCESS(rc)) + ddb_errorf(ctx, "Unable to add new free region: "DF_RC"\n", DP_RC(rc)); + + return rc; +} + +/* Information used while modifying a dtx active entry */ +struct dtx_modify_args { + struct dv_indexed_tree_path itp; + struct dtx_id dti; + daos_handle_t coh; +}; + +/* setup information needed for calling commit or abort active dtx entry */ +static int +dtx_modify_init(struct ddb_ctx *ctx, char *path, char *dtx_id_str, struct dtx_modify_args *args) +{ + int rc; + struct dv_indexed_tree_path *itp = &args->itp; + + rc = init_path(ctx, path, itp); + if (!SUCCESS(rc)) + D_GOTO(error, rc); + + itp_print_full(ctx, itp); + ddb_print(ctx, "\n"); + + if (!itp_has_cont(itp)) { + ddb_error(ctx, "Path to container is required\n"); + D_GOTO(error, rc = -DER_INVAL); + } + + rc = dv_cont_open(ctx->dc_poh, itp_cont(itp), &args->coh); + if (!SUCCESS(rc)) { + ddb_errorf(ctx, "Unable to open container: "DF_RC"\n", DP_RC(rc)); + D_GOTO(error, rc); + } + + rc = ddb_parse_dtx_id(dtx_id_str, &args->dti); + if (!SUCCESS(rc)) { + ddb_errorf(ctx, "Invalid dtx_id: %s\n", dtx_id_str); + D_GOTO(error, rc); + } + return 0; + +error: + dv_cont_close(&args->coh); + itp_free(itp); + return rc; +} + +static void +dtx_modify_fini(struct dtx_modify_args *args) +{ + dv_cont_close(&args->coh); + itp_free(&args->itp); +} + +int +ddb_run_dtx_act_commit(struct ddb_ctx *ctx, struct dtx_act_commit_options *opt) +{ + struct dtx_modify_args args = {0}; + int rc; + + if (!ctx->dc_write_mode) { + ddb_error(ctx, error_msg_write_mode_only); + return -DER_INVAL; + } + + rc = dtx_modify_init(ctx, opt->path, opt->dtx_id, &args); + if (!SUCCESS(rc)) + return rc; + /* Marking entries as committed returns the number of entries committed */ + rc = dv_dtx_commit_active_entry(args.coh, &args.dti); + if (rc < 0) { + ddb_errorf(ctx, "Error marking entry as committed: "DF_RC"\n", DP_RC(rc)); + } else if (rc > 0) { + ddb_print(ctx, "Entry marked as committed\n"); + rc = 0; + } else { + ddb_print(ctx, "No entry found to mark as committed\n"); + } + + dtx_modify_fini(&args); + + return rc; +} + +int ddb_run_dtx_act_abort(struct ddb_ctx *ctx, struct dtx_act_abort_options *opt) +{ + struct dtx_modify_args args = {0}; + int rc; + + if (!ctx->dc_write_mode) { + ddb_error(ctx, error_msg_write_mode_only); + return -DER_INVAL; + } + + rc = dtx_modify_init(ctx, opt->path, opt->dtx_id, &args); + if (!SUCCESS(rc)) + return rc; + + rc = dv_dtx_abort_active_entry(args.coh, &args.dti); + if (SUCCESS(rc)) { + ddb_print(ctx, "Entry marked as aborted\n"); + } else if (rc == -DER_NONEXIST) { + ddb_print(ctx, "No entry found to mark as aborted\n"); + rc = 0; + } else { + ddb_errorf(ctx, "Error marking entry as aborted: "DF_RC"\n", DP_RC(rc)); + } + + dtx_modify_fini(&args); + return rc; +} diff --git a/src/ddb/ddb_common.h b/src/ddb/ddb_common.h new file mode 100644 index 00000000000..7f281ebd852 --- /dev/null +++ b/src/ddb/ddb_common.h @@ -0,0 +1,51 @@ +/** + * (C) Copyright 2019-2022 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ + +#ifndef __DAOS_DDB_COMMON_H +#define __DAOS_DDB_COMMON_H + +#include +#include +#include +#include +#include "ddb.h" +#define COMMAND_NAME_MAX 64 + +#define SUCCESS(rc) ((rc) == DER_SUCCESS) + +#define ddb_print(ctx, str) \ + do { if ((ctx)->dc_io_ft.ddb_print_message) \ + (ctx)->dc_io_ft.ddb_print_message(str); \ + else \ + printf(str); } while (0) + +#define ddb_printf(ctx, fmt, ...) \ + do { if ((ctx)->dc_io_ft.ddb_print_message) \ + (ctx)->dc_io_ft.ddb_print_message(fmt, __VA_ARGS__); \ + else \ + printf(fmt, __VA_ARGS__); \ + } while (0) + +#define ddb_error(ctx, str) \ + do { if (ctx->dc_io_ft.ddb_print_error) \ + ctx->dc_io_ft.ddb_print_error(str); \ + else \ + printf(str); } while (0) + +#define ddb_errorf(ctx, fmt, ...) \ + do { if ((ctx)->dc_io_ft.ddb_print_error) \ + (ctx)->dc_io_ft.ddb_print_error(fmt, __VA_ARGS__); \ + else \ + printf(fmt, __VA_ARGS__); \ + } while (0) + +struct argv_parsed { + char **ap_argv; + void *ap_ctx; + uint32_t ap_argc; +}; + +#endif /* __DAOS_DDB_COMMON_H */ diff --git a/src/ddb/ddb_entry.c b/src/ddb/ddb_entry.c new file mode 100644 index 00000000000..92b816ae859 --- /dev/null +++ b/src/ddb/ddb_entry.c @@ -0,0 +1,30 @@ +/** + * (C) Copyright 2022 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ + +#include +#include +#include "ddb_main.h" + +int main(int argc, char *argv[]) +{ + struct ddb_ctx ctx; + int rc; + + ddb_ctx_init(&ctx); + + rc = ddb_init(); + if (rc != 0) { + fprintf(stderr, "Error with ddb_init: "DF_RC"\n", DP_RC(rc)); + return -rc; + } + rc = ddb_main(&ctx.dc_io_ft, argc, argv); + if (rc != 0) + fprintf(stderr, "Error: "DF_RC"\n", DP_RC(rc)); + + ddb_fini(); + + return -rc; +} diff --git a/src/ddb/ddb_main.c b/src/ddb/ddb_main.c new file mode 100644 index 00000000000..59231bfd759 --- /dev/null +++ b/src/ddb/ddb_main.c @@ -0,0 +1,268 @@ +/** + * (C) Copyright 2022 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ + +#include +#include +#include "ddb_main.h" +#include "ddb_common.h" +#include "ddb_parse.h" +#include "ddb_vos.h" +#include "ddb.h" +#include +#include + +int +ddb_init() +{ + int rc = daos_debug_init(DAOS_LOG_DEFAULT); + + return rc; +} + +void +ddb_fini() +{ + daos_debug_fini(); +} + +/* Default implementations */ + +static char * +get_input(char *buf, uint32_t buf_len) +{ + return fgets(buf, buf_len, stdin); +} + +static int +print_error(const char *fmt, ...) +{ + va_list args; + int rc; + + va_start(args, fmt); + rc = vfprintf(stderr, fmt, args); + va_end(args); + + return rc; +} + +static int +write_file(const char *dst_path, d_iov_t *contents) +{ + FILE *f; + int rc; + + f = fopen(dst_path, "w"); + if (f == NULL) { + rc = daos_errno2der(errno); + print_error("Unable to open path '%s': "DF_RC"\n", dst_path, DP_RC(rc)); + return rc; + } + + fwrite(contents->iov_buf, 1, contents->iov_len, f); + + fclose(f); + + return 0; +} + +static size_t +get_file_size(const char *path) +{ + struct stat st; + + if (stat(path, &st) == 0) + return st.st_size; + + return -DER_INVAL; +} + +static size_t +read_file(const char *path, d_iov_t *contents) +{ + FILE *f; + int rc; + size_t result; + + f = fopen(path, "r"); + if (f == NULL) { + rc = daos_errno2der(errno); + print_error("Unable to open path '%s': "DF_RC"\n", path, DP_RC(rc)); + return rc; + } + + result = fread(contents->iov_buf, 1, contents->iov_buf_len, f); + + fclose(f); + + contents->iov_len = result; + + return result; +} + +static bool +file_exists(const char *path) +{ + return access(path, F_OK) == 0; +} + +static int +get_lines(const char *path, ddb_io_line_cb line_cb, void *cb_args) +{ + FILE *f; + char *line = NULL; + uint64_t len = 0; + uint64_t read; + int rc = 0; + + f = fopen(path, "r"); + if (f == NULL) { + rc = daos_errno2der(errno); + print_error("Unable to open path '%s': "DF_RC"\n", path, DP_RC(rc)); + return rc; + } + + while ((read = getline(&line, &len, f)) != -1) { + rc = line_cb(cb_args, line, read); + if (!SUCCESS(rc)) { + print_error("Issue with line '%s': "DF_RC"\n", line, DP_RC(rc)); + break; + } + } + + rc = daos_errno2der(errno); + if (!SUCCESS(rc)) + print_error("Error reading line from file '%s': "DF_RC"\n", path, DP_RC(rc)); + + fclose(f); + if (line) + D_FREE(line); + + return rc; +} + +void +ddb_ctx_init(struct ddb_ctx *ctx) +{ + memset(ctx, 0, sizeof(*ctx)); + + ctx->dc_io_ft.ddb_print_message = printf; + ctx->dc_io_ft.ddb_print_error = print_error; + ctx->dc_io_ft.ddb_get_input = get_input; + ctx->dc_io_ft.ddb_write_file = write_file; + ctx->dc_io_ft.ddb_read_file = read_file; + ctx->dc_io_ft.ddb_get_file_size = get_file_size; + ctx->dc_io_ft.ddb_get_file_exists = file_exists; + ctx->dc_io_ft.ddb_get_lines = get_lines; +} + +static bool +all_whitespace(const char *str, uint32_t str_len) +{ + int i; + + for (i = 0; i < str_len; i++) { + if (!isspace(str[i])) + return false; + } + return true; +} + +static int +process_line_cb(void *cb_args, char *line, uint32_t line_len) +{ + struct ddb_ctx *ctx = cb_args; + + ddb_printf(ctx, "Command: %s", line); + /* ignore empty lines */ + if (all_whitespace(line, line_len)) + return 0; + return ddb_run_cmd(ctx, line, ctx->dc_write_mode); +} + +#define str_has_value(str) ((str) != NULL && strlen(str) > 0) + +int +ddb_main(struct ddb_io_ft *io_ft, int argc, char *argv[]) +{ + struct program_args pa = {0}; + uint32_t input_buf_len = 1024; + char *input_buf; + int rc; + struct ddb_ctx ctx = {0}; + + D_ASSERT(io_ft); + ctx.dc_io_ft = *io_ft; + + D_ALLOC(input_buf, input_buf_len); + if (input_buf == NULL) + return -DER_NOMEM; + + rc = ddb_parse_program_args(&ctx, argc, argv, &pa); + if (!SUCCESS(rc)) + D_GOTO(done, rc); + + if (pa.pa_get_help) { + ddb_program_help(&ctx); + D_GOTO(done, rc); + } + + ctx.dc_write_mode = pa.pa_write_mode; + + if (str_has_value(pa.pa_r_cmd_run) && str_has_value(pa.pa_cmd_file)) { + ddb_print(&ctx, "Cannot use both '-R' and '-f'.\n"); + D_GOTO(done, rc = -DER_INVAL); + } + + if (str_has_value(pa.pa_pool_path)) { + rc = dv_pool_open(pa.pa_pool_path, &ctx.dc_poh); + if (!SUCCESS(rc)) + D_GOTO(done, rc); + } + + if (str_has_value(pa.pa_r_cmd_run)) { + rc = ddb_run_cmd(&ctx, pa.pa_r_cmd_run, pa.pa_write_mode); + if (!SUCCESS(rc)) + D_ERROR("Command '%s' failed: "DF_RC"\n", input_buf, DP_RC(rc)); + D_GOTO(done, rc); + } + + if (str_has_value(pa.pa_cmd_file)) { + if (!io_ft->ddb_get_file_exists(pa.pa_cmd_file)) { + ddb_errorf(&ctx, "Unable to access file: '%s'\n", pa.pa_cmd_file); + D_GOTO(done, rc = -DER_INVAL); + } + + rc = io_ft->ddb_get_lines(pa.pa_cmd_file, process_line_cb, &ctx); + D_GOTO(done, rc); + } + + while (!ctx.dc_should_quit) { + io_ft->ddb_print_message("$ "); + io_ft->ddb_get_input(input_buf, input_buf_len); + + /* Remove newline */ + if (input_buf[strlen(input_buf) - 1] == '\n') + input_buf[strlen(input_buf) - 1] = '\0'; + + rc = ddb_run_cmd(&ctx, input_buf, pa.pa_write_mode); + if (!SUCCESS(rc)) { + D_ERROR("Command '%s' failed: "DF_RC"\n", input_buf, DP_RC(rc)); + ddb_printf(&ctx, "Command '%s' failed: "DF_RC"\n", input_buf, DP_RC(rc)); + } + } + +done: + if (daos_handle_is_valid(ctx.dc_poh)) { + int tmp_rc = dv_pool_close(ctx.dc_poh); + + if (rc == 0) + rc = tmp_rc; + } + D_FREE(input_buf); + + return rc; +} diff --git a/src/ddb/ddb_main.h b/src/ddb/ddb_main.h new file mode 100644 index 00000000000..eee6d0dbc87 --- /dev/null +++ b/src/ddb/ddb_main.h @@ -0,0 +1,15 @@ +/** + * (C) Copyright 2022 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ + +#ifndef DAOS_DDB_MAIN_H +#define DAOS_DDB_MAIN_H + +#include +#include "ddb_common.h" + +int ddb_main(struct ddb_io_ft *io_ft, int argc, char *argv[]); + +#endif /* DAOS_DDB_MAIN_H */ diff --git a/src/ddb/ddb_parse.c b/src/ddb/ddb_parse.c new file mode 100644 index 00000000000..db4f42ae660 --- /dev/null +++ b/src/ddb/ddb_parse.c @@ -0,0 +1,469 @@ +/** + * (C) Copyright 2019-2022 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ + +#include +#include +#include +#include "ddb_common.h" +#include "ddb_parse.h" + +int +vos_path_parse(const char *path, struct vos_file_parts *vos_file_parts) +{ + uint32_t path_len = strlen(path) + 1; + char *path_copy; + char *tok; + int rc = -DER_INVAL; + + D_ASSERT(path != NULL && vos_file_parts != NULL); + + D_ALLOC(path_copy, path_len); + if (path_copy == NULL) + return -DER_NOMEM; + strcpy(path_copy, path); + + tok = strtok(path_copy, "/"); + while (tok != NULL && rc != 0) { + rc = uuid_parse(tok, vos_file_parts->vf_pool_uuid); + if (!SUCCESS(rc)) { + strcat(vos_file_parts->vf_db_path, "/"); + strcat(vos_file_parts->vf_db_path, tok); + } + tok = strtok(NULL, "/"); + } + + if (rc != 0 || tok == NULL) { + D_ERROR("Incomplete path: %s\n", path); + D_GOTO(done, rc = -DER_INVAL); + } + + strncpy(vos_file_parts->vf_vos_file, tok, ARRAY_SIZE(vos_file_parts->vf_vos_file) - 1); + + /* + * file name should be vos-N ... split on "-" + * If not, might be test, just assume target of 0 + */ + strtok(tok, "-"); + tok = strtok(NULL, "-"); + if (tok != NULL) { + D_WARN("vos file name not in correct format: %s\n", vos_file_parts->vf_vos_file); + vos_file_parts->vf_target_idx = atoi(tok); + } + +done: + if (!SUCCESS(rc)) { + /* Reset to if not valid */ + memset(vos_file_parts, 0, sizeof(*vos_file_parts)); + } + D_FREE(path_copy); + return rc; +} + + +int +ddb_str2argv_create(const char *buf, struct argv_parsed *parse_args) +{ + wordexp_t *we; + int rc; + + D_ALLOC_PTR(we); + if (we == NULL) + return -DER_NOMEM; + + rc = wordexp(buf, we, WRDE_SHOWERR | WRDE_UNDEF); + if (rc != 0) { + D_FREE(we); + return -DER_INVAL; + } + + parse_args->ap_argc = we->we_wordc; + parse_args->ap_argv = we->we_wordv; + parse_args->ap_ctx = we; + + return rc; +} + +void +ddb_str2argv_free(struct argv_parsed *parse_args) +{ + wordfree(parse_args->ap_ctx); + D_FREE(parse_args->ap_ctx); +} + +int +ddb_parse_program_args(struct ddb_ctx *ctx, uint32_t argc, char **argv, struct program_args *pa) +{ + struct option program_options[] = { + { "write_mode", no_argument, NULL, 'w' }, + { "run_cmd", required_argument, NULL, 'R' }, + { "cmd_file", required_argument, NULL, 'f' }, + { "help", required_argument, NULL, 'h' }, + { NULL } + }; + int index = 0, opt; + + optind = 0; /* Reinitialize getopt */ + opterr = 0; + while ((opt = getopt_long(argc, argv, "wR:f:h", program_options, &index)) != -1) { + switch (opt) { + case 'w': + pa->pa_write_mode = true; + break; + case 'R': + pa->pa_r_cmd_run = optarg; + break; + case 'f': + pa->pa_cmd_file = optarg; + break; + case 'h': + pa->pa_get_help = true; + break; + case '?': + ddb_errorf(ctx, "'%c'(0x%x) is unknown\n", optopt, optopt); + default: + return -DER_INVAL; + } + } + + if (argc - optind > 1) { + ddb_error(ctx, "Too many commands\n"); + return -DER_INVAL; + } + if (argc - optind == 1) + pa->pa_pool_path = argv[optind]; + + return 0; +} + +int +ddb_parse_dtx_id(const char *dtx_id_str, struct dtx_id *dtx_id) +{ + char cpy[128] = {0}; + char validate_buf[128] = {0}; + char *tok; + + if (dtx_id_str == NULL) + return -DER_INVAL; + + strncpy(cpy, dtx_id_str, sizeof(cpy) - 1); + + tok = strtok(cpy, "."); + if (tok == NULL) + return -DER_INVAL; + if (uuid_parse(tok, dtx_id->dti_uuid) < 0) + return -DER_INVAL; + + tok = strtok(NULL, "."); + dtx_id->dti_hlc = strtoll(tok, NULL, 16); + + /* Validate input was complete and in correct format */ + snprintf(validate_buf, 128, DF_DTIF, DP_DTI(dtx_id)); + if (strncmp(dtx_id_str, validate_buf, 128) != 0) + return -DER_INVAL; + + return DER_SUCCESS; +} + +/* + * A key can be a string, integer, or arbitrary binary data in hex format. The following functions + * parse a string input (usually provided in a VOS path) into the appropriate daos_key_t. In order + * for a string to match when doing a fetch, it must be exactly the same, including the iov_len of + * the key. The DDB help output explains the expected format of the string. + * + * When a string is parsed into a key, the key buffer will be allocated to the appropriate size. + * + */ + +/* These types are for integer or binary types */ +enum key_value_type { + KEY_VALUE_TYPE_UNKNOWN, + KEY_VALUE_TYPE_UINT8, + KEY_VALUE_TYPE_UINT16, + KEY_VALUE_TYPE_UINT32, + KEY_VALUE_TYPE_UINT64, + KEY_VALUE_TYPE_BIN, +}; + +/* Helper function for allocating the memory for a key */ +static int +key_alloc(daos_key_t *key, void *value, uint32_t value_len) +{ + int rc; + + rc = daos_iov_alloc(key, value_len, true); + if (!SUCCESS(rc)) + return rc; + + memcpy(key->iov_buf, value, value_len); + return 0; +} + +/* Helper for setting the type if the input matches the provided type (i.e. uint32) */ +#define if_type_is_set(input, type, type_str, type_value) do { \ + if (strncmp(input, type_str, strlen(type_str)) == 0) { \ + type = type_value; \ + input += strlen(type_str); \ + } \ +} while (0) + +/* Helper for parsing the string into a key if the input matches the provided type (i.e. uint32) */ +#define if_type_is_parse(t, t_enum, type, key_str, key, rc) do { \ + if (t == t_enum) { \ + type value = strtoul(key_str, NULL, 0); \ + rc = key_alloc(key, &value, sizeof(value)); \ + } \ +} while (0) + +/* + * The format of the size portion of the key is a number surrounded by the open and close + * characters, generally '()' or '{}'. + * + * For example, a string key can have a size provided to specify the length of the key. This is + * needed if strlen(key_str) != iov_len. For example if a null terminator is included as part + * of the key. + * + * Return number of chars consumed, or error + */ +static int +key_parse_size(const char *input, size_t *size, char open, char close) +{ + const char *value_str; + int len = 0; + + if (input[0] != open) + return -DER_INVAL; + + input++; + + value_str = input; + + while (isdigit(input[len])) + len++; + input += len; + + if (input[0] != close) + return -DER_INVAL; + *size = strtoul(value_str, NULL, 10); + if (*size == 0) + return -DER_INVAL; + + return len + 2; /* +2 for '{', '}' */ +} + +/* Tests if the string input looks like it could be a hex number (starts with '0x' */ +static inline bool +is_hex(const char *input) +{ + if (strlen(input) <= 2) + return false; + return (input[0] == '0' && (input[1] == 'x' || input[1] == 'X')); +} + +/* Parse a key that is arbitrary binary data represented as hex. */ +static int +key_parse_bin(const char *input, daos_key_t *key) +{ + uint8_t *buf; + size_t len = 0; + size_t data_len; + int i; + + if (!is_hex(input)) { + D_ERROR("binary data should be represented as hex\n"); + return -DER_INVAL; + } + input += 2; + while (isxdigit(input[len])) + len++; + + if (len % 2 != 0) { + D_ERROR("incomplete bytes not supported. Please prepend leading 0\n"); + return -DER_INVAL; + } + + data_len = len / 2; + D_ALLOC(buf, data_len); + if (buf == NULL) + return -DER_NOMEM; + + for (i = 0; i < len; i += 2) { + char tmp[3] = {0}; + uint8_t byte; + + tmp[0] = input[i]; + tmp[1] = input[i + 1]; + byte = strtoul(tmp, NULL, 16); + buf[i/2] = byte; + } + + d_iov_set(key, buf, data_len); + return 0; +} + +/* Parse a key that is an int. */ +static int +key_parse_int(enum key_value_type type, const char *input, daos_key_t *key) +{ + int rc = -DER_INVAL; + + if_type_is_parse(type, KEY_VALUE_TYPE_UINT8, uint8_t, input, key, rc); + if_type_is_parse(type, KEY_VALUE_TYPE_UINT16, uint16_t, input, key, rc); + if_type_is_parse(type, KEY_VALUE_TYPE_UINT32, uint32_t, input, key, rc); + if_type_is_parse(type, KEY_VALUE_TYPE_UINT64, uint64_t, input, key, rc); + + return rc; +} + +/* + * Parse a non-string key (integer or binary). + * + * Both integers and binary keys have similar format: "{type: value}", where type is the last part + * of the key_value_type enum (as lowercase). Binary can also include a size: "{bin(size): 0x1234}" + * + * Return number of chars consumed, or error + */ +static int +key_parse_typed(const char *key_str, daos_key_t *key) +{ + enum key_value_type type = KEY_VALUE_TYPE_UNKNOWN; + const char *value_str; + size_t size = 0; + int rc; + const char *key_str_idx; + + key_str_idx = key_str; + if (key_str_idx[0] != '{') + return -DER_INVAL; + + key_str_idx++; + + /* get the specific type */ + if_type_is_set(key_str_idx, type, "uint8", KEY_VALUE_TYPE_UINT8); + if_type_is_set(key_str_idx, type, "uint16", KEY_VALUE_TYPE_UINT16); + if_type_is_set(key_str_idx, type, "uint32", KEY_VALUE_TYPE_UINT32); + if_type_is_set(key_str_idx, type, "uint64", KEY_VALUE_TYPE_UINT64); + if_type_is_set(key_str_idx, type, "bin", KEY_VALUE_TYPE_BIN); + if (type == KEY_VALUE_TYPE_UNKNOWN) + return -DER_INVAL; + + /* is there a size */ + if (key_str_idx[0] == '(') { + rc = key_parse_size(key_str_idx, &size, '(', ')'); + if (rc < 0) + return rc; + key_str_idx += rc; + } + + if (key_str_idx[0] != ':') /* ':' should separate the type and value */ + return -DER_INVAL; + + key_str_idx++; + + value_str = key_str_idx; + + /* have key value ... just verifying the rest is valid number */ + if (is_hex(key_str_idx)) { + key_str_idx += 2; + while (isxdigit(key_str_idx[0])) + key_str_idx++; + } else { + while (isdigit(key_str_idx[0])) + key_str_idx++; + } + + if (key_str_idx[0] != '}') + return -DER_INVAL; + key_str_idx++; + + if (type == KEY_VALUE_TYPE_BIN) + rc = key_parse_bin(value_str, key); + else + rc = key_parse_int(type, value_str, key); + + if (!SUCCESS(rc)) + return rc; + return key_str_idx - key_str; +} + +/* + * Parse a string key. + * String keys need to be able to support specifying size of the key and to escape special + * characters ('{', '}', '/'). + * + * Return number of chars consumed, or error + */ +static int +key_parse_str(const char *input, daos_key_t *key) +{ + size_t key_len = 0; + size_t size = 0; + uint32_t escaped_chars = 0; + int i, j; + int rc; + const char *ptr; + + /* size_open char can't be curly brace */ + if (input[0] == '{' || input[0] == '}') + return -DER_INVAL; + + ptr = input; + while (ptr[0] != '\0' && ptr[0] != '/') { + if (ptr[0] == '\\') { + ptr += 1; /* move past escape character */ + if (ptr[0] == '\0') /* escape character can't be last */ + return -DER_INVAL; + /* don't really care what escaping as long as not the end */ + ptr += 1; + escaped_chars++; + key_len++; + + } else if (ptr[0] == '}') { + return -DER_INVAL; /* should never see this here */ + } else if (ptr[0] == '{') { + rc = key_parse_size(ptr, &size, '{', '}'); + if (rc < 0) + return rc; + ptr += rc; + if (ptr[0] != '\0' && ptr[0] != '/') /* size should be last thing */ + return -DER_INVAL; + } else { + ptr++; + key_len++; + } + } + if (size == 0) + size = key_len; + if (size < key_len) + return -DER_INVAL; + + rc = daos_iov_alloc(key, size, true); + if (!SUCCESS(rc)) + return -DER_NOMEM; + + for (i = 0, j = 0; i < key_len + escaped_chars; ++i) { + if (input[i] != '\\') + ((char *)key->iov_buf)[j++] = input[i]; + } + + return (int)(ptr - input); +} + +/* + * Parse string input into a daos_key_t. The buffer for the key will be allocated. The caller + * is expected to call daos_iov_free() to free the memory. + * + * Return number of chars consumed, or error + */ +int +ddb_parse_key(const char *input, daos_key_t *key) +{ + if (input == NULL || strlen(input) == 0) + return -DER_INVAL; + + return input[0] == '{' ? + key_parse_typed(input, key) : + key_parse_str(input, key); +} diff --git a/src/ddb/ddb_parse.h b/src/ddb/ddb_parse.h new file mode 100644 index 00000000000..376df83387b --- /dev/null +++ b/src/ddb/ddb_parse.h @@ -0,0 +1,53 @@ +/** + * (C) Copyright 2019-2022 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ + +#ifndef __DAOS_DDB_PARSE_H +#define __DAOS_DDB_PARSE_H + +#include +#include +#include +#include +#include +#include "ddb_common.h" + +struct program_args { + char *pa_cmd_file; + char *pa_r_cmd_run; + char *pa_pool_path; + bool pa_write_mode; + bool pa_get_help; +}; + +struct vos_file_parts { + char vf_db_path[64]; + uuid_t vf_pool_uuid; + char vf_vos_file[16]; + uint32_t vf_target_idx; +}; + +/* Parse a path to a VOS file to get needed parts for initializing vos */ +int vos_path_parse(const char *path, struct vos_file_parts *vos_file_parts); + +/* Parse a string into an array of words with the count of words */ +int ddb_str2argv_create(const char *buf, struct argv_parsed *parse_args); + +/* Free resources used for str2argv */ +void ddb_str2argv_free(struct argv_parsed *parse_args); + +/* Parse argc/argv into the program arguments/options */ +int ddb_parse_program_args(struct ddb_ctx *ctx, uint32_t argc, char **argv, + struct program_args *pa); + +/* See ddb_iov_to_printable_buf for how the keys will be printed */ +int ddb_parse_key(const char *input, daos_key_t *key); + +/* Parse a string into the parts of a dtx_id. See DF_DTIF for how the format of the dtx_id is + * expected to be. + */ +int ddb_parse_dtx_id(const char *dtx_id_str, struct dtx_id *dtx_id); + +#endif /** __DAOS_DDB_PARSE_H */ diff --git a/src/ddb/ddb_printer.c b/src/ddb/ddb_printer.c new file mode 100644 index 00000000000..a78ce92e37b --- /dev/null +++ b/src/ddb/ddb_printer.c @@ -0,0 +1,227 @@ +/** + * (C) Copyright 2022 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ + +#include "ddb_printer.h" + +static void +print_indent(struct ddb_ctx *ctx, int c) +{ + int i; + + for (i = 0; i < c; i++) + ddb_print(ctx, " "); +} + +bool +ddb_can_print(d_iov_t *iov) +{ + char *str = iov->iov_buf; + uint32_t len = iov->iov_len; + int i; + + for (i = 0 ; i < len ; i++) { + if (str[i] == '\0') + return true; + if (!isprint(str[i]) && str[i] != '\n' && str[i] != '\r') + return false; + } + return true; +} + +/* + * Converts contents of an iov to something that is more printable. + * + * Returns number of characters that would have been written if buf_len was long + * enough, not including null terminator + */ +int +ddb_iov_to_printable_buf(d_iov_t *iov, char buf[], uint32_t buf_len) +{ + if (iov->iov_len == 0 || iov->iov_buf == NULL) + return 0; + + if (ddb_can_print(iov)) + return snprintf(buf, buf_len, "%.*s", (int)iov->iov_len, (char *)iov->iov_buf); + + switch (iov->iov_len) { + case sizeof(uint8_t): + return snprintf(buf, buf_len, "uint8:0x%x", ((uint8_t *)iov->iov_buf)[0]); + case sizeof(uint16_t): + return snprintf(buf, buf_len, "uint16:0x%04hx", ((uint16_t *)iov->iov_buf)[0]); + case sizeof(uint32_t): + return snprintf(buf, buf_len, "uint32:0x%x", ((uint32_t *)iov->iov_buf)[0]); + case sizeof(uint64_t): + return snprintf(buf, buf_len, "uint64:0x%lx", ((uint64_t *)iov->iov_buf)[0]); + default: + { + char tmp_buf[32]; + uint32_t new_len; + uint32_t result = 0; + int i; + + result += snprintf(buf, buf_len, "bin(%lu):0x", iov->iov_len); + + for (i = 0; i < iov->iov_len; i++) { + new_len = snprintf(tmp_buf, ARRAY_SIZE(tmp_buf), "%02x", + ((uint8_t *)iov->iov_buf)[i]); + if (new_len + result > buf_len) { + /* Buffer not big enough */ + result += new_len; + } else { + result += sprintf(buf + result, "%s", tmp_buf); + } + } + + if (result > buf_len) { + buf[buf_len - 1] = '\0'; + buf[buf_len - 2] = '.'; + buf[buf_len - 3] = '.'; + buf[buf_len - 4] = '.'; + + } + return result; + } + } +} + +void +ddb_print_cont(struct ddb_ctx *ctx, struct ddb_cont *cont) +{ + ddb_printf(ctx, DF_IDX" "DF_UUIDF"\n", DP_IDX(cont->ddbc_idx), + DP_UUID(cont->ddbc_cont_uuid)); +} + +void +ddb_print_obj(struct ddb_ctx *ctx, struct ddb_obj *obj, uint32_t indent) +{ + print_indent(ctx, indent); + ddb_printf(ctx, DF_IDX" '"DF_OID"' (type: %s, groups: %d)\n", + DP_IDX(obj->ddbo_idx), + DP_OID(obj->ddbo_oid), + obj->ddbo_otype_str, + obj->ddbo_nr_grps); +} + +void +ddb_print_key(struct ddb_ctx *ctx, struct ddb_key *key, uint32_t indent) +{ + const uint32_t buf_len = 64; + char buf[buf_len]; + + memset(buf, 0, buf_len); + + ddb_iov_to_printable_buf(&key->ddbk_key, buf, buf_len); + + print_indent(ctx, indent); + if (ddb_can_print(&key->ddbk_key)) { + ddb_printf(ctx, DF_IDX" '%s' (%lu)%s\n", + DP_IDX(key->ddbk_idx), + buf, + key->ddbk_key.iov_len, + key->ddbk_child_type == VOS_ITER_SINGLE ? " (SV)" : + key->ddbk_child_type == VOS_ITER_RECX ? " (ARRAY)" : ""); + return; + } + + ddb_printf(ctx, DF_IDX" {%s}%s\n", DP_IDX(key->ddbk_idx), buf, + key->ddbk_child_type == VOS_ITER_SINGLE ? " (SV)" : + key->ddbk_child_type == VOS_ITER_RECX ? " (ARRAY)" : ""); +} + +void +ddb_print_sv(struct ddb_ctx *ctx, struct ddb_sv *sv, uint32_t indent) +{ + print_indent(ctx, indent); + ddb_printf(ctx, DF_IDX" Single Value (Length: "DF_U64" bytes)\n", + sv->ddbs_idx, + sv->ddbs_record_size); +} + +void +ddb_print_array(struct ddb_ctx *ctx, struct ddb_array *array, uint32_t indent) +{ + print_indent(ctx, indent); + ddb_printf(ctx, DF_IDX" Array Value (Length: "DF_U64" records, Record Indexes: " + "{"DF_U64"-"DF_U64"}, Record Size: "DF_U64")\n", + array->ddba_idx, + array->ddba_recx.rx_nr, + array->ddba_recx.rx_idx, + array->ddba_recx.rx_idx + array->ddba_recx.rx_nr - 1, + array->ddba_record_size); +} + +void +ddb_print_path(struct ddb_ctx *ctx, struct dv_indexed_tree_path *itp, uint32_t indent) +{ + print_indent(ctx, indent); + itp_print_full(ctx, itp); + ddb_print(ctx, "\n"); +} + +void +ddb_bytes_hr(uint64_t bytes, char *buf, uint32_t buf_len) +{ + int i = 0; + static const char *const units[] = {"B", "KB", "MB", "GB", "TB"}; + + while (bytes >= 1024) { + bytes /= 1024; + i++; + } + snprintf(buf, buf_len, "%lu%s", bytes, units[i]); +} + +static void +print_bytes(struct ddb_ctx *ctx, char *prefix, uint64_t bytes) +{ + char buf[32]; + + ddb_bytes_hr(bytes, buf, ARRAY_SIZE(buf)); + ddb_printf(ctx, "%s: %s\n", prefix, buf); +} + +void +ddb_print_superblock(struct ddb_ctx *ctx, struct ddb_superblock *sb) +{ + ddb_printf(ctx, "Pool UUID: "DF_UUIDF"\n", DP_UUID(sb->dsb_id)); + ddb_printf(ctx, "Format Version: %d\n", sb->dsb_durable_format_version); + ddb_printf(ctx, "Containers: %lu\n", sb->dsb_cont_nr); + print_bytes(ctx, "SCM Size", sb->dsb_scm_sz); + print_bytes(ctx, "NVME Size", sb->dsb_nvme_sz); + print_bytes(ctx, "Block Size", sb->dsb_blk_sz); + ddb_printf(ctx, "Reserved Blocks: %d\n", sb->dsb_hdr_blks); + print_bytes(ctx, "Block Device Capacity", sb->dsb_tot_blks); +} + +void +ddb_print_ilog_entry(struct ddb_ctx *ctx, struct ddb_ilog_entry *entry) +{ + ddb_printf(ctx, "Index: %d\n", entry->die_idx); + ddb_printf(ctx, "\tStatus: %s (%d)\n", entry->die_status_str, entry->die_status); + ddb_printf(ctx, "\tEpoch: %lu\n", entry->die_epoch); + ddb_printf(ctx, "\tTxn ID: %d\n", entry->die_tx_id); +} + +void +ddb_print_dtx_committed(struct ddb_ctx *ctx, struct dv_dtx_committed_entry *entry) +{ + ddb_printf(ctx, "ID: "DF_DTIF"\n", DP_DTI(&entry->ddtx_id)); + ddb_printf(ctx, "\tEpoch: "DF_U64"\n", entry->ddtx_epoch); +} + +void +ddb_print_dtx_active(struct ddb_ctx *ctx, struct dv_dtx_active_entry *entry) +{ + ddb_printf(ctx, "ID: "DF_DTIF"\n", DP_DTI(&entry->ddtx_id)); + ddb_printf(ctx, "\tEpoch: "DF_U64"\n", entry->ddtx_epoch); + ddb_printf(ctx, "\tHandle Time: "DF_U64"\n", entry->ddtx_handle_time); + ddb_printf(ctx, "\tGrp Cnt: %d\n", entry->ddtx_grp_cnt); + ddb_printf(ctx, "\tVer: %d\n", entry->ddtx_ver); + ddb_printf(ctx, "\tRec Cnt: %d\n", entry->ddtx_rec_cnt); + ddb_printf(ctx, "\tMbs Flags: %d\n", entry->ddtx_mbs_flags); + ddb_printf(ctx, "\tFlags: %d\n", entry->ddtx_flags); + ddb_printf(ctx, "\tOid: "DF_UOID"\n", DP_UOID(entry->ddtx_oid)); +} diff --git a/src/ddb/ddb_printer.h b/src/ddb/ddb_printer.h new file mode 100644 index 00000000000..5c6cc5a7332 --- /dev/null +++ b/src/ddb/ddb_printer.h @@ -0,0 +1,33 @@ +/** + * (C) Copyright 2022 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ + +#ifndef DAOS_DDB_PRINTER_H +#define DAOS_DDB_PRINTER_H + +#include "ddb_vos.h" + +#define DF_IDX "[%d]" +#define DP_IDX(idx) idx + +int ddb_iov_to_printable_buf(d_iov_t *iov, char buf[], uint32_t buf_len); +void ddb_print_cont(struct ddb_ctx *ctx, struct ddb_cont *cont); +void ddb_print_obj(struct ddb_ctx *ctx, struct ddb_obj *obj, uint32_t indent); +void ddb_print_key(struct ddb_ctx *ctx, struct ddb_key *key, uint32_t indent); +void ddb_print_sv(struct ddb_ctx *ctx, struct ddb_sv *sv, uint32_t indent); +void ddb_print_array(struct ddb_ctx *ctx, struct ddb_array *sv, uint32_t indent); +void ddb_print_path(struct ddb_ctx *ctx, struct dv_indexed_tree_path *itp, uint32_t indent); +void ddb_print_superblock(struct ddb_ctx *ctx, struct ddb_superblock *sb); +void ddb_print_ilog_entry(struct ddb_ctx *ctx, struct ddb_ilog_entry *entry); +void ddb_print_dtx_committed(struct ddb_ctx *ctx, struct dv_dtx_committed_entry *entry); +void ddb_print_dtx_active(struct ddb_ctx *ctx, struct dv_dtx_active_entry *entry); + +bool ddb_can_print(d_iov_t *iov); + +/* some utility functions helpful for printing */ +void ddb_bytes_hr(uint64_t bytes, char *buf, uint32_t buf_len); + + +#endif /* DAOS_DDB_PRINTER_H */ diff --git a/src/ddb/ddb_spdk.c b/src/ddb/ddb_spdk.c new file mode 100644 index 00000000000..74892b9b3cc --- /dev/null +++ b/src/ddb/ddb_spdk.c @@ -0,0 +1,561 @@ +/** + * (C) Copyright 2022 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ddb_common.h" +#include "ddb_spdk.h" + +#define TRACE(...) D_DEBUG(DB_TRACE, __VA_ARGS__) + +/* + * According to https://spdk.io/doc/concurrency.html, the best way to manage concurrency is to + * use a state machine. + */ + +/* States of the machine */ +enum DDB_SPDK_ST { + DDB_SPDK_ST_BDEV, + DDB_SPDK_ST_BS_OPEN_ASYNC, + DDB_SPDK_ST_BLOB_ITER_ASYNC, + DDB_SPDK_ST_BLOB_READ_ASYNC, + DDB_SPDK_ST_BLOB_CLOSE_ASYNC, + DDB_SPDK_ST_SEND_INFO, + DDB_SPDK_ST_BS_CLOSE_ASYNC, + DDB_SPDK_ST_DONE, +}; + +/* Just for debugging purposes */ +static const char * +state_str(enum DDB_SPDK_ST s) +{ + switch (s) { + case DDB_SPDK_ST_BDEV: + return "DDB_SPDK_ST_BDEV"; + case DDB_SPDK_ST_BS_OPEN_ASYNC: + return "DDB_SPDK_ST_BS_OPEN_ASYNC"; + case DDB_SPDK_ST_BLOB_ITER_ASYNC: + return "DDB_SPDK_ST_BLOB_ITER_ASYNC"; + case DDB_SPDK_ST_BLOB_READ_ASYNC: + return "DDB_SPDK_ST_BLOB_READ_ASYNC"; + case DDB_SPDK_ST_SEND_INFO: + return "DDB_SPDK_ST_SEND_INFO"; + case DDB_SPDK_ST_BLOB_CLOSE_ASYNC: + return "DDB_SPDK_ST_BLOB_CLOSE_ASYNC"; + case DDB_SPDK_ST_BS_CLOSE_ASYNC: + return "DDB_SPDK_ST_BS_CLOSE_ASYNC"; + case DDB_SPDK_ST_DONE: + return "DDB_SPDK_ST_DONE"; + default: + return "UNKNOWN"; + } +} + +static void +print_transition(enum DDB_SPDK_ST a, enum DDB_SPDK_ST b, int rc) +{ + if (a != b) + TRACE("%s -> %s, rc: "DF_RC"\n", state_str(a), state_str(b), DP_RC(rc)); +} + +#define BDEV_NAME_MAX 128 +struct ddb_spdk_context { + /* Used for passing info back to the caller */ + struct ddbs_sync_info dsc_dsi; + ddbs_sync_cb dsc_cb_func; + void *dsc_cb_arg; + + /* For managing the interaction with spdk */ + struct spdk_bdev *dsc_bdev; + struct spdk_bs_dev *dsc_bs_dev; + struct spdk_blob_store *dsc_bs; + struct spdk_blob *dsc_blob; + struct spdk_io_channel *dsc_channel; + uint8_t *dsc_read_buf; + uint64_t dsc_io_unit_size; + + /* For managing the state machine */ + enum DDB_SPDK_ST dsc_state; + bool dsc_async_state_done; + bool dsc_running; + + /* Capture any error along the way */ + int dsc_rc; +}; + +static int +dsc_init(struct ddb_spdk_context **ctx) +{ + D_ALLOC_PTR(*ctx); + + if (*ctx == NULL) { + D_ERROR("Could not alloc ctx\n"); + return -DER_NOMEM; + } + + return 0; +} + +static void +dsc_fini(struct ddb_spdk_context *ctx) +{ + D_FREE(ctx); +} + +/* Setup what's needed to do a blob read */ +static int +dsc_read_setup(struct ddb_spdk_context *ctx) { + D_ASSERT(ctx->dsc_bs != NULL); + ctx->dsc_channel = spdk_bs_alloc_io_channel(ctx->dsc_bs); + if (ctx->dsc_channel == NULL) + return -DER_NOMEM; + + ctx->dsc_io_unit_size = spdk_bs_get_io_unit_size(ctx->dsc_bs); + ctx->dsc_read_buf = spdk_malloc(ctx->dsc_io_unit_size, 0x1000, NULL, + SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); + if (ctx->dsc_read_buf == NULL) + return -DER_NOMEM; + + return 0; +} + +static void +dsc_read_teardown(struct ddb_spdk_context *ctx) +{ + if (ctx->dsc_channel) { + spdk_bs_free_io_channel(ctx->dsc_channel); + ctx->dsc_channel = NULL; + } + if (ctx->dsc_read_buf) { + spdk_free(ctx->dsc_read_buf); + ctx->dsc_read_buf = NULL; + } + ctx->dsc_io_unit_size = 0; +} + +static void +dsc_record_error(struct ddb_spdk_context *ctx, int bs_errno) +{ + /* only keep error if there is one and rc isn't already an error code */ + if (bs_errno != 0 && ctx->dsc_rc == 0) { + ctx->dsc_rc = daos_errno2der(-bs_errno); + TRACE("Recording error: "DF_RC"\n", DP_RC(ctx->dsc_rc)); + } +} + +/* + * The next section of functions are callback/async pairs. SPDK relies heavily on callback + * functions. The "async" function executes an SPDK function which takes a callback. In general, + * the callback function should come right before the function that executes the SPDK function. The + * callback function will record any error. Due to the async nature, the state machine might exit + * before the callbacks are executed by SPDK, therefore the callbacks may restart the state + * machine at the appropriate state if needed. The 'before'/'after' tracing in the async methods + * with the 'callback' trace is helpful in seeing how the async works which was critical in + * developing and debugging the state machine as well as viewing the behavior of SPDK. + */ + +/* Allow the callback functions to restart the state machine */ +static void dsc_continue_state_machine_after_async(struct ddb_spdk_context *ctx); + +static void +blob_close_cb(void *cb_arg, int bs_errno) +{ + struct ddb_spdk_context *ctx = cb_arg; + + TRACE("blob close callback\n"); + + dsc_record_error(ctx, bs_errno); + dsc_continue_state_machine_after_async(ctx); +} + +static void +dsc_blob_close_async(struct ddb_spdk_context *ctx) +{ + TRACE("blob close (before)\n"); + spdk_blob_close(ctx->dsc_blob, blob_close_cb, ctx); + TRACE("blob close (after)\n"); +} + +static void +bs_open_complete_cb(void *cb_arg, struct spdk_blob_store *bs, int bs_errno) +{ + struct ddb_spdk_context *ctx = cb_arg; + + TRACE("bs open callback\n"); + if (!SUCCESS(bs_errno)) { + dsc_record_error(ctx, bs_errno); + return; + } + ctx->dsc_bs = bs; + + /* now setup for reading */ + ctx->dsc_rc = dsc_read_setup(ctx); + dsc_continue_state_machine_after_async(ctx); +} + +static void +dsc_bs_open_async(struct ddb_spdk_context *ctx) +{ + TRACE("bs open (before)\n"); + spdk_bs_load(ctx->dsc_bs_dev, NULL, bs_open_complete_cb, ctx); + TRACE("bs open (close)\n"); +} + +static void +bs_close_cb(void *cb_arg, int bs_errno) +{ + struct ddb_spdk_context *ctx = cb_arg; + + TRACE("bs close callback\n"); + dsc_record_error(ctx, bs_errno); + ctx->dsc_bs = NULL; + + dsc_continue_state_machine_after_async(ctx); +} + +static void +dsc_bs_close_async(struct ddb_spdk_context *ctx) { + dsc_read_teardown(ctx); + + if (ctx->dsc_bs) { + TRACE("close bs (before)\n"); + spdk_bs_unload(ctx->dsc_bs, bs_close_cb, ctx); + TRACE("close bs (after)\n"); + } else { + TRACE("bs already closed??\n"); + } +} + +static void +dsc_blob_iter_cb(void *cb_arg, struct spdk_blob *blb, int bs_errno) +{ + struct ddb_spdk_context *ctx = cb_arg; + + TRACE("blob iter callback\n"); + + if (bs_errno != 0) { + /* + * No more blobs to process. This will indicate to + * the state machine to close the blobstore. + */ + ctx->dsc_blob = NULL; + if (bs_errno != -ENOENT) { + dsc_record_error(ctx, bs_errno); + TRACE("error\n"); + } else { + TRACE("No more blobs\n"); + } + } else { + TRACE("setting blob\n"); + ctx->dsc_blob = blb; + } + dsc_continue_state_machine_after_async(ctx); +} + +static void +dsc_blob_iter_async(struct ddb_spdk_context *ctx) +{ + D_ASSERT(ctx->dsc_bs != NULL); + + if (ctx->dsc_blob == NULL) { + TRACE("first blob (before)\n"); + spdk_bs_iter_first(ctx->dsc_bs, dsc_blob_iter_cb, ctx); + TRACE("first blob (after)\n"); + } else { + TRACE("next blob (before)\n"); + spdk_bs_iter_next(ctx->dsc_bs, ctx->dsc_blob, dsc_blob_iter_cb, ctx); + TRACE("next blob (after)\n"); + } +} + +static void +blob_read_hdr_cb(void *cb_arg, int bs_errno) +{ + struct ddb_spdk_context *ctx = cb_arg; + + TRACE("read blob callback\n"); + + dsc_record_error(ctx, bs_errno); + if (bs_errno == 0) { + struct bio_blob_hdr *hdr; + + D_ASSERT(ctx->dsc_read_buf != NULL); + hdr = (struct bio_blob_hdr *) ctx->dsc_read_buf; + /* verify the header */ + if (hdr->bbh_magic == BIO_BLOB_HDR_MAGIC) { + ctx->dsc_dsi.dsi_hdr = hdr; + } else { + D_PRINT("BIO_BLOB_HDR_MAGIC is not correct for blob id '%lu'. " + "Got '%x' but expected '%x'\n", + spdk_blob_get_id(ctx->dsc_blob), hdr->bbh_magic, + BIO_BLOB_HDR_MAGIC); + ctx->dsc_rc = -DER_UNKNOWN; + } + } + + dsc_continue_state_machine_after_async(ctx); +} + +static void +dsc_blob_read_hdr_async(struct ddb_spdk_context *ctx) +{ + TRACE("reading blob (before)\n"); + spdk_blob_io_read(ctx->dsc_blob, ctx->dsc_channel, ctx->dsc_read_buf, 0, 1, + blob_read_hdr_cb, ctx); + TRACE("reading blob (after)\n"); +} + +static void +base_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *cb_arg) +{ + D_WARN("Unsupported bdev event type: %d\n", type); +} + +static void +dsc_bdev(struct ddb_spdk_context *ctx) +{ + char bdev_name[BDEV_NAME_MAX]; + int err; + int rc = 0; + + if (ctx->dsc_bdev == NULL) + ctx->dsc_bdev = spdk_bdev_first(); + else + ctx->dsc_bdev = spdk_bdev_next(ctx->dsc_bdev); + + if (ctx->dsc_bdev == NULL) + return; + + strncpy(bdev_name, spdk_bdev_get_name(ctx->dsc_bdev), sizeof(bdev_name) - 1); + bdev_name[sizeof(bdev_name) - 1] = '\0'; + + TRACE("Creating bs dev for device name: %s\n", bdev_name); + err = spdk_bdev_create_bs_dev_ext(bdev_name, base_bdev_event_cb, NULL, &ctx->dsc_bs_dev); + if (err != 0) { + rc = daos_errno2der(-err); + D_ERROR("Could not create blob bdev: %s\n", spdk_strerror(-err)); + } + ctx->dsc_rc = rc; +} + +static void +dsc_get_dev_id(struct ddb_spdk_context *ctx) +{ + struct spdk_bs_type bstype = spdk_bs_get_bstype(ctx->dsc_bs); + + memcpy(ctx->dsc_dsi.dsi_dev_id, bstype.bstype, sizeof(ctx->dsc_dsi.dsi_dev_id)); + ctx->dsc_dsi.dsi_cluster_size = spdk_bs_get_cluster_size(ctx->dsc_bs); + ctx->dsc_dsi.dsi_cluster_nr = spdk_blob_get_num_clusters(ctx->dsc_blob); + TRACE("Got device id: "DF_UUID"\n", DP_UUID(ctx->dsc_dsi.dsi_dev_id)); +} + +static void +dsc_send_info(struct ddb_spdk_context *ctx) +{ + dsc_get_dev_id(ctx); + + TRACE("sending info to callback\n"); + ctx->dsc_cb_func(&ctx->dsc_dsi, ctx->dsc_cb_arg); +} + +static void +dsc_if_error_handle_state_change(struct ddb_spdk_context *ctx) +{ + enum DDB_SPDK_ST prev_state = ctx->dsc_state; + + if (ctx->dsc_rc == 0) + return; + switch (ctx->dsc_state) { + case DDB_SPDK_ST_BDEV: + ctx->dsc_state = DDB_SPDK_ST_DONE; + break; + case DDB_SPDK_ST_BS_OPEN_ASYNC: + case DDB_SPDK_ST_BLOB_ITER_ASYNC: + ctx->dsc_state = DDB_SPDK_ST_BS_CLOSE_ASYNC; + break; + case DDB_SPDK_ST_BLOB_READ_ASYNC: + ctx->dsc_state = DDB_SPDK_ST_BLOB_CLOSE_ASYNC; + break; + case DDB_SPDK_ST_SEND_INFO: + case DDB_SPDK_ST_BLOB_CLOSE_ASYNC: + case DDB_SPDK_ST_BS_CLOSE_ASYNC: + case DDB_SPDK_ST_DONE: + break; + } + if (prev_state != ctx->dsc_state) + /* Forced a transition so reset async state */ + ctx->dsc_async_state_done = false; + + TRACE("Error State ("DF_RC"): Transitioning from %s --> %s\n", + DP_RC(ctx->dsc_rc), state_str(prev_state), state_str(ctx->dsc_state)); +} + +/* Macros to help define the function for each state and what the next state should be */ +#define ST_TSN(ctx, fn, next_state) \ + do { \ + fn; \ + ctx->dsc_state = next_state; \ + } while (0) +#define ST_TSN_COND(ctx, fn, cond, true_state, false_state) \ + do { \ + fn; \ + if ((cond)) \ + ctx->dsc_state = true_state; \ + else \ + ctx->dsc_state = false_state; \ + } while (0) +#define ST_TSN_ASYNC(ctx, fn, next_state) \ + do { \ + if (ctx->dsc_async_state_done) { \ + ctx->dsc_async_state_done = false; \ + ctx->dsc_state = next_state; \ + } else { \ + fn; \ + } \ + } while (0) +#define ST_TSN_COND_ASYNC(ctx, fn, cond, true_state, false_state) \ + do { \ + if (ctx->dsc_async_state_done) { \ + ctx->dsc_async_state_done = false; \ + if ((cond)) \ + ctx->dsc_state = true_state; \ + else \ + ctx->dsc_state = false_state; \ + } else { \ + fn; \ + } \ + } while (0) + +static void +dsc_run_state_machine(struct ddb_spdk_context *ctx) +{ + enum DDB_SPDK_ST prev_state; + + ctx->dsc_running = true; + + TRACE("\nState Machine starting with state: %s\n", state_str(ctx->dsc_state)); + + do { + dsc_if_error_handle_state_change(ctx); + prev_state = ctx->dsc_state; + switch (ctx->dsc_state) { + case DDB_SPDK_ST_BDEV: + ST_TSN_COND(ctx, dsc_bdev(ctx), + /* + * if bdev == NULL then no more devices, everything should + * already be closed so just finish + */ + ctx->dsc_bdev != NULL, DDB_SPDK_ST_BS_OPEN_ASYNC, + DDB_SPDK_ST_DONE); + break; + case DDB_SPDK_ST_BS_OPEN_ASYNC: + ST_TSN_ASYNC(ctx, dsc_bs_open_async(ctx), DDB_SPDK_ST_BLOB_ITER_ASYNC); + break; + case DDB_SPDK_ST_BLOB_ITER_ASYNC: + ST_TSN_COND_ASYNC(ctx, dsc_blob_iter_async(ctx), + /* if blob == NULL then there are no more blobs */ + ctx->dsc_blob == NULL, DDB_SPDK_ST_BS_CLOSE_ASYNC, + DDB_SPDK_ST_BLOB_READ_ASYNC); + break; + case DDB_SPDK_ST_BLOB_READ_ASYNC: + ST_TSN_ASYNC(ctx, dsc_blob_read_hdr_async(ctx), DDB_SPDK_ST_SEND_INFO); + break; + case DDB_SPDK_ST_SEND_INFO: + ST_TSN(ctx, dsc_send_info(ctx), DDB_SPDK_ST_BLOB_ITER_ASYNC); + break; + case DDB_SPDK_ST_BLOB_CLOSE_ASYNC: + /* After closing, start the iteration loop over */ + ST_TSN_ASYNC(ctx, dsc_blob_close_async(ctx), DDB_SPDK_ST_BLOB_ITER_ASYNC); + break; + case DDB_SPDK_ST_BS_CLOSE_ASYNC: + ST_TSN_ASYNC(ctx, dsc_bs_close_async(ctx), DDB_SPDK_ST_BDEV); + break; + case DDB_SPDK_ST_DONE: + spdk_app_stop(ctx->dsc_rc); + break; + } + print_transition(prev_state, ctx->dsc_state, ctx->dsc_rc); + /* + * If the state hasn't changed then leave the state machine. Should + * get called again by a asynchronous callback if not done. Note: if dsc_async_state_done is + * true then the state changed by a callback because the machine always leaves + * dsc_async_state_done to false. + */ + } while ((prev_state != ctx->dsc_state) || ctx->dsc_async_state_done); + TRACE("Leaving state machine on state: %s\n\n", state_str(ctx->dsc_state)); + ctx->dsc_running = false; +} + +static void +dsc_continue_state_machine_after_async(struct ddb_spdk_context *ctx) +{ + /* + * Sometimes the callbacks are run after the state machine leaves and sometimes right after + * the "parent" function is called. Callbacks should re-enter the state machine if it's + * not already running. Set a callback done flag so the machine knows if it was already + * called. + */ + ctx->dsc_async_state_done = true; + if (!ctx->dsc_running) { + TRACE("Restarting state machine at state: %s\n", state_str(ctx->dsc_state)); + dsc_run_state_machine(ctx); + } +} + +static void +app_start_cb(void *arg) +{ + struct ddb_spdk_context *ctx = arg; + + /* start by getting the first bdev */ + ctx->dsc_state = DDB_SPDK_ST_BDEV; + dsc_run_state_machine(ctx); +} + +/* + * This is used by the smd sync command for ddb. Most of the SMD table info can be rebuilt by using + * information saved in the SPDK blobs used for each target. + * + * Using the state machine above, will start an spdk_app that will iterate over the blobs, read + * the blob header (a daos construct, see struct bio_blob_hdr), gather other information needed + * from the blob or blobstore and pass to the callback function provided + */ +int +ddbs_for_each_bio_blob_hdr(const char *nvme_json, ddbs_sync_cb cb, void *cb_arg) +{ + struct spdk_app_opts opts = {0}; + struct ddb_spdk_context *ctx = NULL; + int rc; + + D_ASSERT(cb != NULL); + + rc = dsc_init(&ctx); + if (!SUCCESS(rc)) + return rc; + ctx->dsc_cb_func = cb; + ctx->dsc_cb_arg = cb_arg; + + spdk_app_opts_init(&opts, sizeof(opts)); + opts.print_level = SPDK_LOG_DISABLED; + opts.name = "ddb_spdk"; + opts.json_config_file = nvme_json; + rc = spdk_app_start(&opts, app_start_cb, ctx); + if (!SUCCESS(rc)) + D_ERROR("Failed: "DF_RC"\n", DP_RC(rc)); + + dsc_fini(ctx); + + spdk_app_fini(); + return rc; +} diff --git a/src/ddb/ddb_spdk.h b/src/ddb/ddb_spdk.h new file mode 100644 index 00000000000..03b080f0676 --- /dev/null +++ b/src/ddb/ddb_spdk.h @@ -0,0 +1,23 @@ +/** + * (C) Copyright 2022 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ + +#ifndef DAOS_DDB_SPDK_H +#define DAOS_DDB_SPDK_H + +#include + +struct ddbs_sync_info { + struct bio_blob_hdr *dsi_hdr; + uuid_t dsi_dev_id; + uint64_t dsi_cluster_size; + uint64_t dsi_cluster_nr; +}; + +typedef void (*ddbs_sync_cb)(struct ddbs_sync_info *dsi, void *cb_arg); + +int ddbs_for_each_bio_blob_hdr(const char *nvme_json, ddbs_sync_cb cb, void *cb_arg); + +#endif /* DAOS_DDB_SPDK_H */ diff --git a/src/ddb/ddb_tree_path.c b/src/ddb/ddb_tree_path.c new file mode 100644 index 00000000000..c564b62bb7e --- /dev/null +++ b/src/ddb/ddb_tree_path.c @@ -0,0 +1,983 @@ +/** + * (C) Copyright 2023 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ + +#include "ddb_tree_path.h" +#include "ddb_printer.h" +#include "ddb_parse.h" + +/* + * ------------------------------------------------------ + * Functions for parsing a path into the structure + * ------------------------------------------------------ + */ + +/* this should not be used for parsing string keys ... doesn't check for escaped chars */ +static int +str_part_len(const char *p) +{ + int i = 0; + + while (p[i] != '\0' && p[i] != '/') + i++; + + return i; +} + +/* parse the string to a bracketed index "[123]" */ +int +try_parse_idx(const char *str, uint32_t *idx) +{ + uint32_t str_len; + + D_ASSERT(str); + str_len = str_part_len(str); + + if (str_len < 3) /* must be at least 3 chars */ + return -DER_INVAL; + + if (str[0] == '[' && str[str_len - 1] == ']') { + *idx = atol(str + 1); + return str_len; + } + return -DER_INVAL; +} + +int +parse_cont(const char *cont, struct dv_indexed_tree_path *itp) +{ + uuid_t cont_uuid; + char cont_uuid_str[DAOS_UUID_STR_SIZE] = {0}; + int rc; + int i; + + if (str_part_len(cont) == 0) + return 0; + + if (cont[0] == '[') { + uint32_t idx; + + rc = try_parse_idx(cont, &idx); + if (rc < 0) + return -DDBER_INVALID_CONT; + itp_set_cont_idx(itp, idx); + return rc; + } + + for (i = 0; i < min(DAOS_UUID_STR_SIZE, strlen(cont)); ++i) + cont_uuid_str[i] = cont[i]; + cont_uuid_str[DAOS_UUID_STR_SIZE - 1] = '\0'; + rc = uuid_parse(cont_uuid_str, cont_uuid); + if (rc != 0) + return -DDBER_INVALID_CONT; + if (!itp_set_cont_part_value(itp, cont_uuid)) + return -DDBER_INVALID_CONT; + + return DAOS_UUID_STR_SIZE - 1; /* don't include the '\0' */ +} + +int +parse_oid(const char *oid_str, struct dv_indexed_tree_path *itp) +{ + uint64_t oid_parts[4] = {0}; /* 4 parts to the oid */ + const char *oid_str_idx = oid_str; + daos_unit_oid_t oid = {0}; + int i; + int rc; + + if (strlen(oid_str) == 0) + return 0; + + if (oid_str[0] == '[') { + uint32_t idx; + + rc = try_parse_idx(oid_str, &idx); + if (rc < 0) + return -DDBER_INVALID_OBJ; + itp_idx_set(itp, PATH_PART_OBJ, idx); + return rc; + } + + for (i = 0; i < ARRAY_SIZE(oid_parts); ++i) { + if (i > 0 && oid_str_idx[0] != '.') + return -DDBER_INVALID_OBJ; + if (i > 0) + oid_str_idx++; + if (strlen(oid_str_idx) == 0 || oid_str_idx[0] == '/') + /* found end of oid before expected */ + return -DDBER_INVALID_OBJ; + oid_parts[i] = atoll(oid_str_idx); + while (isdigit(oid_str_idx[0])) + oid_str_idx++; + } + + oid.id_pub.hi = oid_parts[0]; + oid.id_pub.lo = oid_parts[1]; + oid.id_shard = oid_parts[2]; + oid.id_layout_ver = oid_parts[3]; + + itp_set_obj_part_value(itp, oid); + + return (int)(oid_str_idx - oid_str); +} + +int +parse_recx(const char *recx_str, struct dv_indexed_tree_path *itp) +{ + daos_recx_t recx = {0}; + const char *dash; + const char *close; + uint64_t lo; + uint64_t hi; + int rc; + + if (strlen(recx_str) == 0) + return 0; + + if (recx_str[0] == '[') { + uint32_t idx; + + rc = try_parse_idx(recx_str, &idx); + if (rc < 0) + return -DDBER_INVALID_RECX; + itp_idx_set(itp, PATH_PART_RECX, idx); + + return rc; + } + + if (recx_str[0] != '{' || recx_str[strlen(recx_str) - 1] != '}') + return -DDBER_INVALID_RECX; + + dash = recx_str + 1; + while (isdigit(dash[0])) + dash++; + + /* found no digits */ + if (dash == recx_str + 1) + return -DDBER_INVALID_RECX; + + if (dash[0] != '-') + return -DDBER_INVALID_RECX; + + close = dash + 1; + while (isdigit(close[0])) + close++; + if (close[0] != '}') + return -DDBER_INVALID_RECX; + + lo = atoll(recx_str + 1); + hi = atoll(dash + 1); + + recx.rx_idx = lo; + recx.rx_nr = hi - lo + 1; + + itp_set_recx_part_value(itp, &recx); + + return strlen(recx_str); +} + +static int +parse_key(const char *key_str, struct dv_indexed_tree_path *itp, enum path_parts key_part) +{ + daos_key_t key = {0}; + int rc; + + if (strlen(key_str) == 0) + return 0; + + /* is an index */ + if (key_str[0] == '[') { + uint32_t idx; + + rc = try_parse_idx(key_str, &idx); + if (rc < 0) + return rc; + itp_idx_set(itp, key_part, idx); + return rc; + } + + rc = ddb_parse_key(key_str, &key); + if (rc < 0) + return rc; + + itp_part_value_set(itp, key_part, &key); + daos_iov_free(&key); + + return rc; +} + +int +itp_parse(const char *path, struct dv_indexed_tree_path *itp) +{ + const char *path_idx; + d_iov_t key = {0}; + int rc; + + /* Setup itp */ + D_ASSERT(itp); + memset(itp, 0, sizeof(*itp)); + + /* If there is no path, leave it empty */ + if (path == NULL || strlen(path) == 0) + return 0; + + path_idx = path; + + if (path_idx[0] == '/') + path_idx++; + + /* Container UUID */ + rc = parse_cont(path_idx, itp); + if (rc < 0) + return rc; + path_idx += rc; + + if (path_idx[0] == '\0') + return 0; + + if (path_idx[0] != '/') + return -DDBER_INVALID_CONT; + path_idx++; + + /* OID */ + rc = parse_oid(path_idx, itp); + if (rc < 0) + return rc; + path_idx += rc; + + if (path_idx[0] == '\0') + return 0; + + if (path_idx[0] != '/') + return -DDBER_INVALID_OBJ; + + /* DKEY */ + path_idx++; + rc = parse_key(path_idx, itp, PATH_PART_DKEY); + if (rc < 0) + return -DDBER_INVALID_DKEY; + itp_set_dkey_part_value(itp, &key); + daos_iov_free(&key); + path_idx += rc; + + if (path_idx[0] == '\0') + return 0; + if (path_idx[0] != '/') + return -DDBER_INVALID_DKEY; + + /* AKEY */ + path_idx++; + rc = parse_key(path_idx, itp, PATH_PART_AKEY); + if (rc < 0) + return -DDBER_INVALID_AKEY; + + path_idx += rc; + + if (path_idx[0] == '\0') + return 0; + if (path_idx[0] != '/') + return -DDBER_INVALID_AKEY; + + /* RECX */ + path_idx++; + rc = parse_recx(path_idx, itp); + + if (rc < 0) + return rc; + + path_idx += rc; + + if (path_idx[0] == '\0') + return 0; + if (path_idx[0] != '/') + return -DDBER_INVALID_RECX; + path_idx++; + if (strlen(path_idx) > 0) + return -DER_INVAL; + return 0; +} + +bool +itp_part_set_cont(union itp_part_type *part, void *part_value) +{ + const uint8_t *cont_uuid = part_value; + + if (cont_uuid == NULL || uuid_is_null(cont_uuid)) + return false; + + uuid_copy(part->itp_uuid, cont_uuid); + return true; +} + +bool +itp_part_set_obj(union itp_part_type *part, void *part_value) +{ + daos_unit_oid_t *oid = part_value; + + if (daos_unit_oid_is_null(*oid)) + return false; + + part->itp_oid = *oid; + return true; +} + +bool +itp_part_set_key(union itp_part_type *part, void *part_value) +{ + daos_key_t *key = part_value; + + if (key->iov_len == 0) + return false; + daos_iov_copy(&part->itp_key, key); + return true; +} + +bool +itp_part_set_recx(union itp_part_type *part, void *part_value) +{ + daos_recx_t *recx = part_value; + + if (recx->rx_nr == 0) + return false; + + part->itp_recx = *recx; + return true; +} + +static bool (*part_set_fn[PATH_PART_END])(union itp_part_type *part, void *part_value) = { + itp_part_set_cont, + itp_part_set_obj, + itp_part_set_key, + itp_part_set_key, + itp_part_set_recx, +}; + +bool +itp_part_value_set(struct dv_indexed_tree_path *itp, enum path_parts part_key, void *part_value) +{ + struct indexed_tree_path_part *p = &itp->itp_parts[part_key]; + + if (part_set_fn[part_key](&p->itp_part_value, part_value)) { + p->itp_has_part_value = true; + return true; + } + return false; +} + +bool +itp_idx_set(struct dv_indexed_tree_path *itp, enum path_parts part_key, uint32_t idx) +{ + struct indexed_tree_path_part *p = &itp->itp_parts[part_key]; + + if (idx != INVALID_IDX) { + p->itp_has_part_idx = true; + p->itp_part_idx = idx; + + return true; + } + + return false; +} + +static bool +itp_set(struct dv_indexed_tree_path *itp, enum path_parts part_key, void *part_value, + uint32_t part_idx) +{ + int i; + + /* Make sure everything before this part is already set */ + for (i = 0; i < part_key; ++i) { + if (!(itp->itp_parts[i].itp_has_part_value && itp->itp_parts[i].itp_has_part_idx)) + return false; + } + + return itp_idx_set(itp, part_key, part_idx) && + itp_part_value_set(itp, part_key, part_value); +} + +bool +itp_set_cont(struct dv_indexed_tree_path *itp, uuid_t cont_uuid, uint32_t idx) +{ + return itp_set(itp, PATH_PART_CONT, cont_uuid, idx); +} + +bool +itp_set_cont_idx(struct dv_indexed_tree_path *itp, uint32_t idx) +{ + return itp_idx_set(itp, PATH_PART_CONT, idx); +} + +bool +itp_set_cont_part_value(struct dv_indexed_tree_path *itp, unsigned char *cont_uuid) +{ + return itp_part_value_set(itp, PATH_PART_CONT, cont_uuid); +} + +bool +itp_set_obj(struct dv_indexed_tree_path *itp, daos_unit_oid_t oid, uint32_t idx) +{ + return itp_set(itp, PATH_PART_OBJ, &oid, idx); +} + +bool +itp_set_obj_part_value(struct dv_indexed_tree_path *itp, daos_unit_oid_t oid) +{ + return itp_part_value_set(itp, PATH_PART_OBJ, &oid); +} + +bool +itp_set_dkey(struct dv_indexed_tree_path *itp, daos_key_t *key, uint32_t idx) +{ + return itp_set(itp, PATH_PART_DKEY, key, idx); +} + +bool +itp_set_dkey_part_value(struct dv_indexed_tree_path *itp, daos_key_t *key) +{ + return itp_part_value_set(itp, PATH_PART_DKEY, key); +} + +bool +itp_set_akey(struct dv_indexed_tree_path *itp, daos_key_t *key, uint32_t idx) +{ + return itp_set(itp, PATH_PART_AKEY, key, idx); +} + +bool +itp_set_akey_part_value(struct dv_indexed_tree_path *itp, daos_key_t *key) +{ + return itp_part_value_set(itp, PATH_PART_AKEY, key); +} + +bool +itp_set_recx(struct dv_indexed_tree_path *itp, daos_recx_t *recx, uint32_t idx) +{ + return itp_set(itp, PATH_PART_RECX, recx, idx); +} + +bool +itp_set_recx_part_value(struct dv_indexed_tree_path *itp, daos_recx_t *recx) +{ + return itp_part_value_set(itp, PATH_PART_RECX, recx); +} + +void +unset_path_part(struct indexed_tree_path_part *part) +{ + part->itp_has_part_value = part->itp_has_part_idx = false; + memset(&part->itp_part_value, 0, sizeof(part->itp_part_value)); +} + +void +itp_unset_recx(struct dv_indexed_tree_path *itp) +{ + unset_path_part(&itp->itp_parts[PATH_PART_RECX]); +} + + +void +itp_unset_akey(struct dv_indexed_tree_path *itp) +{ + if (itp->itp_parts[PATH_PART_AKEY].itp_has_part_value) + daos_iov_free(&itp->itp_parts[PATH_PART_AKEY].itp_part_value.itp_key); + unset_path_part(&itp->itp_parts[PATH_PART_AKEY]); + itp_unset_recx(itp); +} + +void +itp_unset_dkey(struct dv_indexed_tree_path *itp) +{ + if (itp->itp_parts[PATH_PART_DKEY].itp_has_part_value) + daos_iov_free(&itp->itp_parts[PATH_PART_DKEY].itp_part_value.itp_key); + unset_path_part(&itp->itp_parts[PATH_PART_DKEY]); + itp_unset_akey(itp); +} + +void +itp_unset_obj(struct dv_indexed_tree_path *itp) +{ + unset_path_part(&itp->itp_parts[PATH_PART_OBJ]); + itp_unset_dkey(itp); +} + +void +itp_unset_cont(struct dv_indexed_tree_path *itp) +{ + unset_path_part(&itp->itp_parts[PATH_PART_CONT]); + itp_unset_obj(itp); +} + +int +itp_idx(struct dv_indexed_tree_path *itp, enum path_parts part_key) +{ + return itp->itp_parts[part_key].itp_part_idx; +} + +bool +itp_has_complete(struct dv_indexed_tree_path *itp, enum path_parts part_key) +{ + return itp->itp_parts[part_key].itp_has_part_value && + itp->itp_parts[part_key].itp_has_part_idx; +} + +bool +itp_has(struct dv_indexed_tree_path *itp, enum path_parts part_key) +{ + return itp->itp_parts[part_key].itp_has_part_value || + itp->itp_parts[part_key].itp_has_part_idx; +} + +bool +itp_has_value(struct dv_indexed_tree_path *itp) +{ + return itp_has(itp, PATH_PART_RECX) || + (itp_has(itp, PATH_PART_AKEY) && itp->itp_child_type == PATH_PART_SV); +} + +bool +itp_has_idx(struct dv_indexed_tree_path *itp, enum path_parts part_key) +{ + return itp->itp_parts[part_key].itp_has_part_idx; +} + +bool +itp_has_part_value(struct dv_indexed_tree_path *itp, enum path_parts part_key) +{ + return itp->itp_parts[part_key].itp_has_part_value; +} + +bool +itp_has_cont_complete(struct dv_indexed_tree_path *itp) +{ + return itp_has_complete(itp, PATH_PART_CONT); +} + +bool +itp_has_cont(struct dv_indexed_tree_path *itp) +{ + return itp_has(itp, PATH_PART_CONT); +} + +bool +itp_has_obj_complete(struct dv_indexed_tree_path *itp) +{ + return itp_has_complete(itp, PATH_PART_OBJ); +} + +bool +itp_has_obj(struct dv_indexed_tree_path *itp) +{ + return itp_has(itp, PATH_PART_OBJ); +} + +bool +itp_has_dkey_complete(struct dv_indexed_tree_path *itp) +{ + return itp_has_complete(itp, PATH_PART_DKEY); +} + +bool +itp_has_dkey(struct dv_indexed_tree_path *itp) +{ + return itp_has(itp, PATH_PART_DKEY); +} + +bool +itp_has_akey_complete(struct dv_indexed_tree_path *itp) +{ + return itp_has_complete(itp, PATH_PART_AKEY); +} + +bool +itp_has_akey(struct dv_indexed_tree_path *itp) +{ + return itp_has(itp, PATH_PART_AKEY); +} + +bool +itp_has_recx_complete(struct dv_indexed_tree_path *itp) +{ + return itp_has_complete(itp, PATH_PART_RECX); +} + +bool +itp_has_recx(struct dv_indexed_tree_path *itp) +{ + return itp_has(itp, PATH_PART_RECX); +} + +int +itp_verify(struct dv_indexed_tree_path *itp) +{ + enum path_parts i; + uint32_t path_part_to_error[] = { + /* Must match the ordering of enum path_parts */ + DDBER_INVALID_CONT, + DDBER_INVALID_OBJ, + DDBER_INVALID_DKEY, + DDBER_INVALID_AKEY, + DDBER_INVALID_RECX, + }; + + for (i = PATH_PART_CONT; i < PATH_PART_END - 1; ++i) { /* -1 because SV not included */ + if (itp->itp_parts[i].itp_has_part_idx != itp->itp_parts[i].itp_has_part_value) + return -path_part_to_error[i]; + } + + return 0; +} + +/* Functions for getting parts */ +static union itp_part_type * +itp_value(struct dv_indexed_tree_path *itp, enum path_parts path_key) +{ + return &itp->itp_parts[path_key].itp_part_value; +} + +uint8_t * +itp_cont(struct dv_indexed_tree_path *itp) +{ + return itp_value(itp, PATH_PART_CONT)->itp_uuid; +} + +daos_unit_oid_t * +itp_oid(struct dv_indexed_tree_path *itp) +{ + return &itp_value(itp, PATH_PART_OBJ)->itp_oid; +} + +daos_key_t * +itp_dkey(struct dv_indexed_tree_path *itp) +{ + return &itp_value(itp, PATH_PART_DKEY)->itp_key; +} + +daos_key_t * +itp_akey(struct dv_indexed_tree_path *itp) +{ + return &itp_value(itp, PATH_PART_AKEY)->itp_key; +} + +daos_recx_t * +itp_recx(struct dv_indexed_tree_path *itp) +{ + return &itp_value(itp, PATH_PART_RECX)->itp_recx; +} + +int +itp_cont_idx(struct dv_indexed_tree_path *itp) +{ + return itp_idx(itp, PATH_PART_CONT); +} + +int +itp_obj_idx(struct dv_indexed_tree_path *itp) +{ + return itp_idx(itp, PATH_PART_OBJ); +} + +int +itp_dkey_idx(struct dv_indexed_tree_path *itp) +{ + return itp_idx(itp, PATH_PART_DKEY); +} + +int +itp_akey_idx(struct dv_indexed_tree_path *itp) +{ + return itp_idx(itp, PATH_PART_AKEY); +} + +int +itp_recx_idx(struct dv_indexed_tree_path *itp) +{ + return itp_idx(itp, PATH_PART_RECX); +} + +/* dv_tree_path Functions */ +void +itp_to_vos_path(struct dv_indexed_tree_path *itp, struct dv_tree_path *result) +{ + memset(result, 0, sizeof(*result)); + + if (itp_has_part_value(itp, PATH_PART_CONT)) + uuid_copy(result->vtp_cont, itp_cont(itp)); + if (itp_has_part_value(itp, PATH_PART_OBJ)) + result->vtp_oid = *itp_oid(itp); + + if (itp_has_part_value(itp, PATH_PART_DKEY)) + result->vtp_dkey = *itp_dkey(itp); + if (itp_has_part_value(itp, PATH_PART_AKEY)) { + result->vtp_is_recx = itp->itp_child_type == PATH_PART_RECX; + result->vtp_akey = *itp_akey(itp); + } + if (itp_has_part_value(itp, PATH_PART_RECX)) { + result->vtp_recx = *itp_recx(itp); + result->vtp_is_recx = true; + } +} + +bool +dv_has_cont(struct dv_tree_path *vtp) +{ + return !uuid_is_null(vtp->vtp_cont); +} + +bool +dv_has_obj(struct dv_tree_path *vtp) +{ + return !(vtp->vtp_oid.id_pub.lo == 0 && + vtp->vtp_oid.id_pub.hi == 0); +} + +bool +dv_has_dkey(struct dv_tree_path *vtp) +{ + return vtp->vtp_dkey.iov_len > 0; +} + +bool +dv_has_akey(struct dv_tree_path *vtp) +{ + return vtp->vtp_akey.iov_len > 0; +} + +bool +dv_has_recx(struct dv_tree_path *vtp) +{ + return vtp->vtp_recx.rx_nr > 0; +} + +bool +dvp_is_complete(struct dv_tree_path *vtp) +{ + return dv_has_cont(vtp) && dv_has_obj(vtp) && dv_has_dkey(vtp) && dv_has_akey(vtp); +} + +bool +dvp_is_empty(struct dv_tree_path *vtp) +{ + return !dv_has_cont(vtp) && !dv_has_obj(vtp) && !dv_has_dkey(vtp) && !dv_has_akey(vtp); +} + +/* + * --------------------------------------------------- + * Functions for printing the path + * --------------------------------------------------- + */ + +void +itp_print_part_cont(struct ddb_ctx *ctx, union itp_part_type *v) +{ + ddb_printf(ctx, DF_UUIDF, DP_UUID(v->itp_uuid)); +} + +void +itp_print_part_obj(struct ddb_ctx *ctx, union itp_part_type *v) +{ + ddb_printf(ctx, DF_UOID, DP_UOID(v->itp_oid)); +} + +bool +itp_key_safe_str(char *buf, size_t buf_len) +{ + char tmp[buf_len]; + char *tmp_idx = tmp; + char *tmp_end = tmp + buf_len - 1; + int i; + char escape_chars[] = { '/', '{', '}', '\\' }; + + if (strnlen(buf, buf_len) == 0) + return true; + + for (i = 0; i < strnlen(buf, buf_len); ++i) { + int e; + bool escaped = false; + + if (tmp_idx + 1 >= tmp_end) { /* +1 for escape character if needed */ + D_ERROR("Buffer was too small to hold the escape characters"); + return false; + } + for (e = 0; e < ARRAY_SIZE(escape_chars) && !escaped; ++e) { + if (buf[i] == escape_chars[e]) { + sprintf(tmp_idx, "\\%c", buf[i]); + tmp_idx += 2; + escaped = true; + } + } + if (!escaped) { + sprintf(tmp_idx, "%c", buf[i]); + tmp_idx++; + } + } + strncpy(buf, tmp, buf_len); + + return true; +} + +void +itp_print_part_key(struct ddb_ctx *ctx, union itp_part_type *key_part) +{ + char buf[DDB_MAX_PRITABLE_KEY]; + d_iov_t *key_iov = &key_part->itp_key; + + ddb_iov_to_printable_buf(key_iov, buf, ARRAY_SIZE(buf)); + if (ddb_can_print(key_iov)) { + /* +1 to make sure there's room for a null terminator */ + char key_str[key_part->itp_key.iov_len + 1]; + + memcpy(key_str, key_iov->iov_buf, key_iov->iov_len); + key_str[key_iov->iov_len] = '\0'; + /* buffer should be plenty big, but just in case ... */ + if (!itp_key_safe_str(buf, ARRAY_SIZE(buf))) { + ddb_print(ctx, "(ISSUE PRINTING KEY)"); + return; + } + /* print the size with the string key if the size isn't strlen. That way + * parsing the string into a valid key will work + */ + if (key_iov->iov_len != strlen(key_str)) + ddb_printf(ctx, "%s{%lu}", buf, key_iov->iov_len); + else + ddb_printf(ctx, "%s", buf); + } else { + /* is an int or binary and already formatted in iov_to_pritable_buf */ + ddb_printf(ctx, "{%s}", buf); + } +} + +void +itp_print_part_recx(struct ddb_ctx *ctx, union itp_part_type *v) +{ + ddb_printf(ctx, DF_DDB_RECX, DP_DDB_RECX(v->itp_recx)); +} + +static void (*print_fn[PATH_PART_END])(struct ddb_ctx *ctx, union itp_part_type *v) = { + itp_print_part_cont, + itp_print_part_obj, + itp_print_part_key, + itp_print_part_key, + itp_print_part_recx, +}; + +void +itp_print_parts(struct ddb_ctx *ctx, struct dv_indexed_tree_path *itp) +{ + int i; + + if (!itp->itp_parts[PATH_PART_CONT].itp_has_part_value) { + ddb_print(ctx, "/"); + return; + } + + for (i = 0; i < PATH_PART_END; i++) { + if (!itp->itp_parts[i].itp_has_part_value) + break; + ddb_print(ctx, "/"); + print_fn[i](ctx, &itp->itp_parts[i].itp_part_value); + } +} + +void +itp_print_indexes(struct ddb_ctx *ctx, struct dv_indexed_tree_path *itp) +{ + int i; + + for (i = 0; i < PATH_PART_END; i++) { + if (!itp->itp_parts[i].itp_has_part_idx) + return; + ddb_printf(ctx, "/"DF_IDX, DP_IDX(itp->itp_parts[i].itp_part_idx)); + } +} + +void +itp_print_full(struct ddb_ctx *ctx, struct dv_indexed_tree_path *itp) +{ + char part_name[][PATH_PART_END] = { + "CONT", + "OBJ", + "DKEY", + "AKEY", + "RECX" + }; + int i; + int part_set = -1; + + D_ASSERT(itp != NULL); + + for (i = 0; i < PATH_PART_END; ++i) { + if (itp->itp_parts[i].itp_has_part_idx != itp->itp_parts[i].itp_has_part_value) { + ddb_print(ctx, INVALID_PATH); + return; + } else if (itp->itp_parts[i].itp_has_part_idx) { + part_set++; + } + } + + /* nothing in path */ + if (part_set == -1) { + ddb_print(ctx, "/"); + return; + } + + ddb_printf(ctx, "%s: ", part_name[part_set]); + ddb_print(ctx, "("); + itp_print_indexes(ctx, itp); + ddb_print(ctx, ") "); + + itp_print_parts(ctx, itp); +} + +void +itp_copy(struct dv_indexed_tree_path *src, struct dv_indexed_tree_path *dst) +{ + if (src == NULL || dst == NULL) + return; + *src = *dst; + itp_set_cont(dst, itp_cont(src), itp_cont_idx(src)); + itp_set_obj(dst, *itp_oid(src), itp_obj_idx(src)); + itp_set_dkey(dst, itp_dkey(src), itp_dkey_idx(src)); + itp_set_akey(dst, itp_akey(src), itp_akey_idx(src)); + itp_set_recx(dst, itp_recx(src), itp_recx_idx(src)); +} + +/* If any memory was allocated for the path structure, free it */ +void +itp_free(struct dv_indexed_tree_path *itp) +{ + itp_unset_dkey(itp); + itp_unset_akey(itp); + + memset(itp, 0, sizeof(*itp)); +} + +static const char * const path_type[] = { + "", + "Container", + "Object", + "DKEY", + "AKEY", + "RECX", +}; + +int +itp_handle_path_parse_error(struct ddb_ctx *ctx, int rc) +{ + if (!(-rc >= DDBER_INVALID_UNKNOWN && -rc <= DDBER_INCOMPLETE_PATH_VALUE)) + return rc; + + rc = -rc; + if (rc == DDBER_INVALID_CONT || rc == DDBER_INVALID_OBJ || rc == DDBER_INVALID_DKEY || + rc == DDBER_INVALID_AKEY || rc == DDBER_INVALID_RECX) { + + ddb_printf(ctx, "%s is invalid\n", path_type[rc - ERROR_BASE]); + } else if (rc == DDBER_INCOMPLETE_PATH_VALUE) { + ddb_print(ctx, "Incomplete Path. Value needed.\n"); + } else { + ddb_print(ctx, "Unknown error parsing the path.\n"); + } + + return -DER_INVAL; +} diff --git a/src/ddb/ddb_tree_path.h b/src/ddb/ddb_tree_path.h new file mode 100644 index 00000000000..af8c3d92dcf --- /dev/null +++ b/src/ddb/ddb_tree_path.h @@ -0,0 +1,204 @@ +/** + * (C) Copyright 2023 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ + +#ifndef DAOS_DDB_TREE_PATH_H +#define DAOS_DDB_TREE_PATH_H + +#include "ddb_common.h" + +#define DF_DDB_RECX "{"DF_U64"-"DF_U64"}" +#define DP_DDB_RECX(r) (r).rx_idx, ((r).rx_idx + (r).rx_nr - 1) + +#define INVALID_IDX (-1) +#define INVALID_PATH "INVALID PATH" +#define DDB_MAX_PRITABLE_KEY 1024 + +#define ERROR_BASE 5000 +enum ddb_parse_error { + DDBER_INVALID_UNKNOWN = ERROR_BASE + 0, + DDBER_INVALID_CONT = ERROR_BASE + 1, + DDBER_INVALID_OBJ = ERROR_BASE + 2, + DDBER_INVALID_DKEY = ERROR_BASE + 3, + DDBER_INVALID_AKEY = ERROR_BASE + 4, + DDBER_INVALID_RECX = ERROR_BASE + 5, + DDBER_INCOMPLETE_PATH_VALUE = ERROR_BASE + 6, +}; + +int itp_handle_path_parse_error(struct ddb_ctx *ctx, int rc); + +enum path_parts { + PATH_PART_CONT = 0, + PATH_PART_OBJ = 1, + PATH_PART_DKEY = 2, + PATH_PART_AKEY = 3, + PATH_PART_RECX = 4, + PATH_PART_SV = 5, + PATH_PART_END = 6 +}; + +/* + * VOS paths have multiple parts (container, object, dkey, akey, recx) and each part has 2 pieces, + * its 'value' (i.e. container uuid, object id, etc) and an index. The indexed_tree_path_part + * structure stores the part's value and index, while the dv_indexed_tree_path structure contains + * all path parts for a VOS path. + */ +struct indexed_tree_path_part { + union itp_part_type { + uuid_t itp_uuid; + daos_unit_oid_t itp_oid; + daos_key_t itp_key; /* akey or dkey */ + daos_recx_t itp_recx; + } itp_part_value; + uint32_t itp_part_idx; + bool itp_has_part_idx; + bool itp_has_part_value; +}; + +struct dv_indexed_tree_path { + struct indexed_tree_path_part itp_parts[PATH_PART_END]; + enum path_parts itp_child_type; +}; + +/** + * Parse string input to a structured path directing to a given node in a VOS tree. The format of + * the path should be VOS path parts separated by a forward slash ('/'), starting with a + * container to the depth desired. The path parts can be the unique identifier for the part or an + * index (as provided by the list command). Path parts include: + * Container: full uuid (uuid_t a formatted by DF_UUIDF) + * Object Id: full unit object ID (daos_unit_oid_t as formatted by DF_UOID) + * D Key: string representation of the key + * A Key: string representation of the key + * RECX: start idx - end idx. This is different than how recx might be printed in + * log files (using DF_RECX format). Instead will use DF_DDB_RECX and look like: + * {lo-hi} + * @param path input path + * @param itp output structure path + * @return 0 if success, else error + */ +int itp_parse(const char *path, struct dv_indexed_tree_path *itp); + +/* Deep copy of the path */ +void itp_copy(struct dv_indexed_tree_path *src, struct dv_indexed_tree_path *dst); + +/* Free any memory that was allocated for the path structures */ +void itp_free(struct dv_indexed_tree_path *itp); + +/* Generic functions for setting the path parts */ +bool +itp_part_value_set(struct dv_indexed_tree_path *itp, enum path_parts part_key, void *part_value); +bool + itp_idx_set(struct dv_indexed_tree_path *itp, enum path_parts part_key, uint32_t idx); + +/* Functions for setting parts as a specific path part (i.e. container, object, ... */ +bool itp_part_set_cont(union itp_part_type *part, void *part_value); +bool itp_part_set_obj(union itp_part_type *part, void *part_value); +bool itp_part_set_key(union itp_part_type *part, void *part_value); +bool itp_part_set_recx(union itp_part_type *part, void *part_value); + +/* Functions for setting the parts (cont, obj, ...) of a indexed tree path */ +bool itp_set_cont(struct dv_indexed_tree_path *itp, uuid_t cont_uuid, uint32_t idx); +bool itp_set_cont_idx(struct dv_indexed_tree_path *itp, uint32_t idx); +bool itp_set_cont_part_value(struct dv_indexed_tree_path *itp, unsigned char *cont_uuid); +bool itp_set_obj(struct dv_indexed_tree_path *itp, daos_unit_oid_t oid, uint32_t idx); +bool itp_set_obj_part_value(struct dv_indexed_tree_path *itp, daos_unit_oid_t oid); +bool itp_set_dkey(struct dv_indexed_tree_path *itp, daos_key_t *key, uint32_t idx); +bool itp_set_dkey_part_value(struct dv_indexed_tree_path *itp, daos_key_t *key); +bool itp_set_akey(struct dv_indexed_tree_path *itp, daos_key_t *key, uint32_t idx); +bool itp_set_akey_part_value(struct dv_indexed_tree_path *itp, daos_key_t *key); +bool itp_set_recx(struct dv_indexed_tree_path *itp, daos_recx_t *recx, uint32_t idx); +bool itp_set_recx_part_value(struct dv_indexed_tree_path *itp, daos_recx_t *recx); + +void itp_unset_recx(struct dv_indexed_tree_path *itp); +void itp_unset_akey(struct dv_indexed_tree_path *itp); +void itp_unset_dkey(struct dv_indexed_tree_path *itp); +void itp_unset_obj(struct dv_indexed_tree_path *itp); +void itp_unset_cont(struct dv_indexed_tree_path *itp); + +/* Get the part's index */ +int +itp_idx(struct dv_indexed_tree_path *itp, enum path_parts part_key); + +/* path part has both index and part_value */ +bool +itp_has_complete(struct dv_indexed_tree_path *itp, enum path_parts part_key); + +/* path part has either index or part_value */ +bool +itp_has(struct dv_indexed_tree_path *itp, enum path_parts part_key); + +/* path part has an index */ +bool +itp_has_idx(struct dv_indexed_tree_path *itp, enum path_parts part_key); + +/* path part has a part value */ +bool + itp_has_part_value(struct dv_indexed_tree_path *itp, enum path_parts part_key); + +/* Have specific complete part or partial part */ +bool itp_has_cont_complete(struct dv_indexed_tree_path *itp); +bool itp_has_cont(struct dv_indexed_tree_path *itp); +bool itp_has_obj_complete(struct dv_indexed_tree_path *itp); +bool itp_has_obj(struct dv_indexed_tree_path *itp); +bool itp_has_dkey_complete(struct dv_indexed_tree_path *itp); +bool itp_has_dkey(struct dv_indexed_tree_path *itp); +bool itp_has_akey_complete(struct dv_indexed_tree_path *itp); +bool itp_has_akey(struct dv_indexed_tree_path *itp); +bool itp_has_recx_complete(struct dv_indexed_tree_path *itp); +bool itp_has_recx(struct dv_indexed_tree_path *itp); +int itp_verify(struct dv_indexed_tree_path *itp); + +/* path is complete to a value (array or single value) */ +bool itp_has_value(struct dv_indexed_tree_path *itp); + +/* Functions for getting specific parts' part_values */ +uint8_t *itp_cont(struct dv_indexed_tree_path *itp); +daos_unit_oid_t *itp_oid(struct dv_indexed_tree_path *itp); +daos_key_t *itp_dkey(struct dv_indexed_tree_path *itp); +daos_key_t *itp_akey(struct dv_indexed_tree_path *itp); +daos_recx_t *itp_recx(struct dv_indexed_tree_path *itp); + +/* Functions for getting specific parts' index */ +int itp_cont_idx(struct dv_indexed_tree_path *itp); +int itp_obj_idx(struct dv_indexed_tree_path *itp); +int itp_dkey_idx(struct dv_indexed_tree_path *itp); +int itp_akey_idx(struct dv_indexed_tree_path *itp); +int itp_recx_idx(struct dv_indexed_tree_path *itp); + +/* Printing functions */ +void itp_print_indexes(struct ddb_ctx *ctx, struct dv_indexed_tree_path *itp); +void itp_print_parts(struct ddb_ctx *ctx, struct dv_indexed_tree_path *itp); +void itp_print_full(struct ddb_ctx *ctx, struct dv_indexed_tree_path *itp); +void itp_print_part_key(struct ddb_ctx *ctx, union itp_part_type *key_part); + +/* + * This function is used when printing keys. It checks each character in the buffer and will + * prepend the escape character ('\') before special characters (example '{', '}', ...). This way, + * printed keys can be used (copy/pasted) directly in VOS paths. + */ +bool itp_key_safe_str(char *buf, size_t buf_len); + +/* + * Tree Path. Simplified version of itp + */ +struct dv_tree_path { + uuid_t vtp_cont; + daos_unit_oid_t vtp_oid; + daos_key_t vtp_dkey; + daos_key_t vtp_akey; + daos_recx_t vtp_recx; + bool vtp_is_recx; +}; + +void itp_to_vos_path(struct dv_indexed_tree_path *itp, struct dv_tree_path *result); +bool dv_has_cont(struct dv_tree_path *vtp); +bool dv_has_obj(struct dv_tree_path *vtp); +bool dv_has_dkey(struct dv_tree_path *vtp); +bool dv_has_akey(struct dv_tree_path *vtp); +bool dv_has_recx(struct dv_tree_path *vtp); +bool dvp_is_complete(struct dv_tree_path *vtp); +bool dvp_is_empty(struct dv_tree_path *vtp); + +#endif /* DAOS_DDB_TREE_PATH_H */ diff --git a/src/ddb/ddb_vos.c b/src/ddb/ddb_vos.c new file mode 100644 index 00000000000..520ca4d561f --- /dev/null +++ b/src/ddb/ddb_vos.c @@ -0,0 +1,1855 @@ +/** + * (C) Copyright 2022-2023 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ + +#include +#include +#include +#include +#include +#include "ddb_common.h" +#include "ddb_parse.h" +#include "ddb_vos.h" +#include "ddb_spdk.h" +#define ddb_vos_iterate(param, iter_type, recursive, anchors, cb, args) \ + vos_iterate(param, iter_type, recursive, \ + anchors, cb, NULL, args, NULL) + +int +dv_pool_open(char *path, daos_handle_t *poh) +{ + struct vos_file_parts path_parts = {0}; + uint32_t flags = 0; /* Will need to be a flag to ignore uuid check */ + int rc; + + /* + * Currently the vos file is required to be in the same path daos_engine created it in. + * This is so that the sys_db file exists and the pool uuid and target id can be obtained + * from the path. It should be considered in the future how to get these from another + * source. + */ + rc = vos_path_parse(path, &path_parts); + if (!SUCCESS(rc)) + return rc; + + rc = vos_self_init(path_parts.vf_db_path, true, path_parts.vf_target_idx); + if (!SUCCESS(rc)) { + D_ERROR("Failed to initialize VOS with path '%s': "DF_RC"\n", + path_parts.vf_db_path, DP_RC(rc)); + return rc; + } + + rc = vos_pool_open(path, path_parts.vf_pool_uuid, flags, poh); + if (!SUCCESS(rc)) { + D_ERROR("Failed to open pool: "DF_RC"\n", DP_RC(rc)); + vos_self_fini(); + } + + return rc; +} + +int +dv_pool_close(daos_handle_t poh) +{ + int rc; + + rc = vos_pool_close(poh); + vos_self_fini(); + + return rc; +} + +int +dv_cont_open(daos_handle_t poh, uuid_t uuid, daos_handle_t *coh) +{ + return vos_cont_open(poh, uuid, coh); +} + +int +dv_cont_close(daos_handle_t *coh) +{ + int rc; + + D_ASSERT(coh); + if (daos_handle_is_inval(*coh)) + return 0; + + rc = vos_cont_close(*coh); + + *coh = DAOS_HDL_INVAL; + + return rc; +} + +struct search_args { + uint32_t sa_idx; + uint32_t sa_current; + uuid_t sa_uuid; + daos_unit_oid_t sa_uoid; + daos_key_t sa_key; + daos_recx_t sa_recx; +}; + +static int +get_by_idx_cb(daos_handle_t ih, vos_iter_entry_t *entry, vos_iter_type_t type, + vos_iter_param_t *param, void *cb_arg, unsigned int *acts) +{ + struct search_args *args = cb_arg; + + /* not found yet */ + if (args->sa_idx != args->sa_current) { + args->sa_current++; + return 0; + } + + switch (type) { + case VOS_ITER_COUUID: + uuid_copy(args->sa_uuid, entry->ie_couuid); + break; + case VOS_ITER_OBJ: + args->sa_uoid = entry->ie_oid; + break; + case VOS_ITER_DKEY: + args->sa_key = entry->ie_key; + break; + case VOS_ITER_AKEY: + args->sa_key = entry->ie_key; + break; + case VOS_ITER_SINGLE: + break; + case VOS_ITER_RECX: + args->sa_recx = entry->ie_orig_recx; + break; + case VOS_ITER_DTX: + break; + case VOS_ITER_NONE: + break; + } + + return 1; +} + +static int +get_by_idx(daos_handle_t hdl, uint32_t idx, struct search_args *args, daos_unit_oid_t *uoid, + daos_key_t *dkey, daos_key_t *akey, vos_iter_type_t type) +{ + vos_iter_param_t param = {0}; + struct vos_iter_anchors anchors = {0}; + int rc; + bool found; + + args->sa_idx = idx; + + param.ip_hdl = hdl; + param.ip_epr.epr_hi = DAOS_EPOCH_MAX; + if (uoid) + param.ip_oid = *uoid; + if (dkey) + param.ip_dkey = *dkey; + if (akey) + param.ip_akey = *akey; + rc = vos_iterate(¶m, type, false, &anchors, get_by_idx_cb, NULL, args, NULL); + if (rc < 0) + return rc; + + found = rc == 1; + if (!found) + return -DER_NONEXIST; + + return 0; +} + +int +dv_get_cont_uuid(daos_handle_t poh, uint32_t idx, uuid_t uuid) +{ + struct search_args args = {0}; + int rc; + + rc = get_by_idx(poh, idx, &args, NULL, NULL, NULL, VOS_ITER_COUUID); + if (SUCCESS(rc)) + uuid_copy(uuid, args.sa_uuid); + return rc; +} + +static int +get_cont_idx_cb(daos_handle_t ih, vos_iter_entry_t *entry, vos_iter_type_t type, + vos_iter_param_t *param, void *cb_arg, unsigned int *acts) +{ + struct search_args *args = cb_arg; + + D_ASSERT(type == VOS_ITER_COUUID); + if (uuid_compare(args->sa_uuid, entry->ie_couuid) == 0) { + /* found */ + return 1; + } + args->sa_idx++; + + return 0; +} + +int +dv_get_cont_idx(daos_handle_t poh, uuid_t uuid) +{ + struct search_args args = {0}; + vos_iter_param_t param = {0}; + struct vos_iter_anchors anchors = {0}; + int found; + + uuid_copy(args.sa_uuid, uuid); + param.ip_hdl = poh; + param.ip_epr.epr_hi = DAOS_EPOCH_MAX; + found = vos_iterate(¶m, VOS_ITER_COUUID, false, &anchors, get_cont_idx_cb, NULL, + &args, NULL); + if (found) + return args.sa_idx; + return -DDBER_INVALID_CONT; +} + +int +dv_get_object_oid(daos_handle_t coh, uint32_t idx, daos_unit_oid_t *uoid) +{ + struct search_args args = {0}; + int rc; + + D_ASSERT(uoid != NULL); + if (daos_handle_is_inval(coh)) + return -DER_INVAL; + + rc = get_by_idx(coh, idx, &args, NULL, NULL, NULL, VOS_ITER_OBJ); + if (SUCCESS(rc)) + *uoid = args.sa_uoid; + + return rc; +} + +int +dv_get_dkey(daos_handle_t coh, daos_unit_oid_t uoid, uint32_t idx, daos_key_t *dkey) +{ + struct search_args args = {0}; + int rc; + + D_ASSERT(dkey != NULL); + if (daos_handle_is_inval(coh) || daos_unit_oid_is_null(uoid)) + return -DER_INVAL; + + rc = get_by_idx(coh, idx, &args, &uoid, NULL, NULL, VOS_ITER_DKEY); + if (SUCCESS(rc)) + daos_iov_copy(dkey, &args.sa_key); + + return rc; +} + +int +dv_get_akey(daos_handle_t coh, daos_unit_oid_t uoid, daos_key_t *dkey, uint32_t idx, + daos_key_t *akey) +{ + struct search_args args = {0}; + int rc; + + D_ASSERT(dkey != NULL); + D_ASSERT(akey != NULL); + if (daos_handle_is_inval(coh) || daos_unit_oid_is_null(uoid)) + return -DER_INVAL; + + rc = get_by_idx(coh, idx, &args, &uoid, dkey, NULL, VOS_ITER_AKEY); + if (SUCCESS(rc)) + daos_iov_copy(akey, &args.sa_key); + + return rc; +} + +int +dv_get_recx(daos_handle_t coh, daos_unit_oid_t uoid, daos_key_t *dkey, daos_key_t *akey, + uint32_t idx, daos_recx_t *recx) +{ + struct search_args args = {0}; + int rc; + + D_ASSERT(dkey != NULL); + D_ASSERT(akey != NULL); + D_ASSERT(recx != NULL); + if (daos_handle_is_inval(coh) || daos_unit_oid_is_null(uoid)) + return -DER_INVAL; + + rc = get_by_idx(coh, idx, &args, &uoid, dkey, akey, VOS_ITER_RECX); + if (SUCCESS(rc)) + *recx = args.sa_recx; + + return rc; +} + +#define daos_recx_match(a, b) ((a).rx_idx == (b.rx_idx) && (a).rx_nr == (b).rx_nr) + +struct path_verify_args { + struct dv_indexed_tree_path *pva_itp; + uint32_t pva_current_idx; + uint32_t pva_current_idxs[PATH_PART_END]; +}; + +static bool +compare_oid(vos_iter_entry_t *entry, union itp_part_type *part) +{ + return daos_unit_oid_compare(part->itp_oid, entry->ie_oid) == 0; +} + +static bool +compare_key(vos_iter_entry_t *entry, union itp_part_type *part) +{ + return daos_key_match(&part->itp_key, &entry->ie_key); +} + +static bool +compare_recx(vos_iter_entry_t *entry, union itp_part_type *part) +{ + return daos_recx_match(part->itp_recx, entry->ie_orig_recx); +} + +static bool +vos_vtp_compare(struct dv_indexed_tree_path *vtp, vos_iter_entry_t *entry, enum path_parts part_key) +{ + bool (*cmp_fn[PATH_PART_END])(vos_iter_entry_t *entry, union itp_part_type *part) = { + NULL, /* Won't be comparing containers */ + compare_oid, + compare_key, + compare_key, + compare_recx, + }; + + D_ASSERT(part_key < PATH_PART_END); + D_ASSERT(cmp_fn[part_key] != NULL); + + return cmp_fn[part_key](entry, &vtp->itp_parts[part_key].itp_part_value); +} + +static void +set_oid(vos_iter_entry_t *entry, union itp_part_type *part) +{ + itp_part_set_obj(part, &entry->ie_oid); +} + +static void +set_key(vos_iter_entry_t *entry, union itp_part_type *part) +{ + itp_part_set_key(part, &entry->ie_key); +} + +static void +set_recx(vos_iter_entry_t *entry, union itp_part_type *part) +{ + itp_part_set_recx(part, &entry->ie_orig_recx); +} + +static void +vos_itp_set(struct dv_indexed_tree_path *itp, vos_iter_entry_t *entry, enum path_parts part_key) +{ + void (*set_fn[PATH_PART_END])(vos_iter_entry_t *entry, union itp_part_type *part) = { + NULL, /* Won't set containers */ + set_oid, + set_key, + set_key, + set_recx, + }; + + D_ASSERT(part_key < PATH_PART_END); + D_ASSERT(set_fn[part_key] != NULL); + + set_fn[part_key](entry, &itp->itp_parts[part_key].itp_part_value); + itp->itp_parts[part_key].itp_has_part_value = true; +} + +#define VOS_ITER_LARGEST VOS_ITER_DTX + +static enum path_parts +vos_iterator_type_to_path_part(vos_iter_type_t type) +{ + int map[VOS_ITER_LARGEST] = {0}; + + map[VOS_ITER_COUUID] = PATH_PART_CONT; + map[VOS_ITER_OBJ] = PATH_PART_OBJ; + map[VOS_ITER_DKEY] = PATH_PART_DKEY; + map[VOS_ITER_AKEY] = PATH_PART_AKEY; + map[VOS_ITER_RECX] = PATH_PART_RECX; + map[VOS_ITER_SINGLE] = PATH_PART_SV; + + return map[type]; +} + +static enum path_parts +vos_enum_to_path_part(vos_iter_type_t t) +{ + enum path_parts map[VOS_ITER_LARGEST]; + + map[VOS_ITER_OBJ] = PATH_PART_OBJ; + map[VOS_ITER_DKEY] = PATH_PART_DKEY; + map[VOS_ITER_AKEY] = PATH_PART_AKEY; + map[VOS_ITER_RECX] = PATH_PART_RECX; + map[VOS_ITER_SINGLE] = PATH_PART_END; /* nothing for single value */ + + return map[t]; +} + +static enum path_parts +vos_enum_to_parent_path_part(vos_iter_type_t t) +{ + int map[VOS_ITER_LARGEST]; + + map[VOS_ITER_OBJ] = PATH_PART_CONT; + map[VOS_ITER_DKEY] = PATH_PART_OBJ; + map[VOS_ITER_AKEY] = PATH_PART_DKEY; + map[VOS_ITER_RECX] = PATH_PART_AKEY; + map[VOS_ITER_SINGLE] = PATH_PART_AKEY; + + return map[t]; +} + +static int +verify_path_pre_cb(daos_handle_t ih, vos_iter_entry_t *entry, vos_iter_type_t type, + vos_iter_param_t *param, void *cb_arg, unsigned int *acts) +{ + struct path_verify_args *args = cb_arg; + struct dv_indexed_tree_path *itp = args->pva_itp; + + if (!(type == VOS_ITER_OBJ || + type == VOS_ITER_DKEY || + type == VOS_ITER_AKEY || + type == VOS_ITER_RECX)) + return 0; /* these are the only parts of the path */ + + if (itp_has_complete(itp, vos_enum_to_parent_path_part(type))) { + enum path_parts part_key = vos_enum_to_path_part(type); + + if (itp_has_idx(itp, part_key)) { + if (itp_idx(itp, part_key) == args->pva_current_idxs[part_key]) { + /* set the part */ + vos_itp_set(itp, entry, part_key); + itp->itp_child_type = + vos_iterator_type_to_path_part(entry->ie_child_type); + + args->pva_current_idx = 0; + } else { + /* looking for index, but not found yet */ + args->pva_current_idxs[part_key]++; + args->pva_current_idx++; + *acts = VOS_ITER_CB_SKIP; + } + } else if (itp_has_part_value(itp, part_key)) { + if (vos_vtp_compare(itp, entry, part_key)) { + /* need to verify part and capture index */ + itp_idx_set(itp, part_key, args->pva_current_idxs[part_key]); + itp->itp_child_type = + vos_iterator_type_to_path_part(entry->ie_child_type); + args->pva_current_idx = 0; + + } else { + *acts = VOS_ITER_CB_SKIP; + args->pva_current_idx++; + args->pva_current_idxs[part_key]++; + } + } + } + return 0; +} + +static int +verify_path_post_cb(daos_handle_t ih, vos_iter_entry_t *entry, vos_iter_type_t type, + vos_iter_param_t *param, void *cb_arg, unsigned int *acts) +{ + struct path_verify_args *args = cb_arg; + struct dv_indexed_tree_path *itp = args->pva_itp; + + switch (type) { + case VOS_ITER_NONE: + break; + case VOS_ITER_COUUID: + break; + case VOS_ITER_OBJ: + if (itp_has_obj_complete(itp)) + *acts = VOS_ITER_CB_ABORT; + break; + case VOS_ITER_DKEY: + if (itp_has_dkey_complete(itp)) + *acts = VOS_ITER_CB_ABORT; + break; + case VOS_ITER_AKEY: + if (itp_has_akey_complete(itp)) + *acts = VOS_ITER_CB_ABORT; + break; + case VOS_ITER_SINGLE: + break; + case VOS_ITER_RECX: + if (itp_has_recx_complete(itp)) + *acts = VOS_ITER_CB_ABORT; + break; + case VOS_ITER_DTX: + break; + } + return 0; +} + +int +dv_path_verify(daos_handle_t poh, struct dv_indexed_tree_path *itp) +{ + vos_iter_param_t param = {0}; + struct vos_iter_anchors anchors = {0}; + struct path_verify_args args = {0}; + daos_handle_t coh = {0}; + int rc = 0; + + /* empty path is fine */ + if (!itp_has_cont(itp)) + return 0; + + if (itp_has_idx(itp, PATH_PART_CONT)) { + uuid_t uuid; + + rc = dv_get_cont_uuid(poh, itp_idx(itp, PATH_PART_CONT), uuid); + if (!SUCCESS(rc)) { + D_ERROR("Unable to get container index %d\n", itp_idx(itp, PATH_PART_CONT)); + if (rc == -DER_NONEXIST) + rc = -DDBER_INVALID_CONT; + return rc; + } + itp_set_cont_part_value(itp, uuid); + } else { + rc = dv_get_cont_idx(poh, itp_cont(itp)); + if (rc < 0) + return rc; + itp_set_cont_idx(itp, rc); + } + + rc = dv_cont_open(poh, itp_cont(itp), &coh); + if (!SUCCESS(rc)) { + D_ERROR("Unable to open container "DF_UUIDF"\n", + itp->itp_parts[PATH_PART_CONT].itp_part_value.itp_uuid); + if (rc == -DER_NONEXIST) + rc = -DDBER_INVALID_CONT; + return rc; + } + + args.pva_current_idx = 0; + args.pva_itp = itp; + + param.ip_hdl = coh; + param.ip_epr.epr_hi = DAOS_EPOCH_MAX; + + rc = vos_iterate(¶m, VOS_ITER_OBJ, true, &anchors, + verify_path_pre_cb, verify_path_post_cb, &args, NULL); + dv_cont_close(&coh); + if (!SUCCESS(rc)) { + D_ERROR("Issue verifying path: "DF_RC"\n", DP_RC(rc)); + return rc; + } + + return itp_verify(itp); +} + +struct ddb_iter_ctx { + struct dv_indexed_tree_path itp; + daos_handle_t poh; + struct vos_tree_handlers *handlers; + void *handler_args; + uuid_t current_cont; + uint32_t cont_seen; + daos_unit_oid_t current_obj; + uint32_t obj_seen; + daos_key_t current_dkey; + uint32_t dkey_seen; + daos_key_t current_akey; + uint32_t akey_seen; + uint32_t value_seen; +}; + +static int +handle_cont(struct ddb_iter_ctx *ctx, vos_iter_entry_t *entry, vos_iter_param_t *param) +{ + struct ddb_cont cont = {0}; + + D_ASSERT(ctx && ctx->handlers && ctx->handlers->ddb_cont_handler); + + itp_set_cont(&ctx->itp, entry->ie_couuid, ctx->cont_seen); + cont.ddbc_path = &ctx->itp; + itp_unset_obj(&ctx->itp); + + uuid_copy(ctx->current_cont, entry->ie_couuid); + uuid_copy(cont.ddbc_cont_uuid, entry->ie_couuid); + cont.ddbc_idx = ctx->cont_seen++; + + /* Restart object count for container */ + ctx->obj_seen = 0; + + return ctx->handlers->ddb_cont_handler(&cont, ctx->handler_args); +} + +static void +get_object_type(enum daos_otype_t type, char *type_str) +{ + switch (type) { + case DAOS_OT_MULTI_HASHED: + strcpy(type_str, "DAOS_OT_MULTI_HASHED"); + break; + case DAOS_OT_OIT: + strcpy(type_str, "DAOS_OT_OIT"); + break; + case DAOS_OT_DKEY_UINT64: + strcpy(type_str, "DAOS_OT_DKEY_UINT64"); + break; + case DAOS_OT_AKEY_UINT64: + strcpy(type_str, "DAOS_OT_AKEY_UINT64"); + break; + case DAOS_OT_MULTI_UINT64: + strcpy(type_str, "DAOS_OT_MULTI_UINT64"); + break; + case DAOS_OT_DKEY_LEXICAL: + strcpy(type_str, "DAOS_OT_DKEY_LEXICAL"); + break; + case DAOS_OT_AKEY_LEXICAL: + strcpy(type_str, "DAOS_OT_AKEY_LEXICAL"); + break; + case DAOS_OT_MULTI_LEXICAL: + strcpy(type_str, "DAOS_OT_MULTI_LEXICAL"); + break; + case DAOS_OT_KV_HASHED: + strcpy(type_str, "DAOS_OT_KV_HASHED"); + break; + case DAOS_OT_KV_UINT64: + strcpy(type_str, "DAOS_OT_KV_UINT64"); + break; + case DAOS_OT_KV_LEXICAL: + strcpy(type_str, "DAOS_OT_KV_LEXICAL"); + break; + case DAOS_OT_ARRAY: + strcpy(type_str, "DAOS_OT_ARRAY"); + break; + case DAOS_OT_ARRAY_ATTR: + strcpy(type_str, "DAOS_OT_ARRAY_ATTR"); + break; + case DAOS_OT_ARRAY_BYTE: + strcpy(type_str, "DAOS_OT_ARRAY_BYTE"); + break; + default: + strcpy(type_str, "UNKNOWN"); + break; + } +} + +void +dv_oid_to_obj(daos_obj_id_t oid, struct ddb_obj *obj) +{ + obj->ddbo_oid = oid; + obj->ddbo_nr_grps = (oid.hi & OID_FMT_META_MASK) >> OID_FMT_META_SHIFT; + + /* + * It would be nice to get the object class name, but currently that is client + * functionality and this tool is being installed as a server binary. If that changes, the + * following code might be used ... + * char obj_class_name[32]; + * int rc = obj_class_init(); + * daos_oclass_id_t oclass; + * oclass = daos_obj_id2class(obj->ddbo_oid); + * if (!SUCCESS(rc)) + * return rc; + * daos_oclass_id2name(oclass, obj_class_name); + * obj_class_fini(); + */ + + obj->ddbo_otype = daos_obj_id2type(oid); + get_object_type(obj->ddbo_otype, obj->ddbo_otype_str); +} + +static int +handle_obj(struct ddb_iter_ctx *ctx, vos_iter_entry_t *entry) +{ + struct ddb_obj obj = {0}; + + D_ASSERT(ctx && ctx->handlers && ctx->handlers->ddb_obj_handler); + + dv_oid_to_obj(entry->ie_oid.id_pub, &obj); + itp_set_obj(&ctx->itp, entry->ie_oid, ctx->obj_seen); + itp_unset_dkey(&ctx->itp); + obj.ddbo_path = &ctx->itp; + + obj.ddbo_idx = ctx->obj_seen++; + + ctx->current_obj = entry->ie_oid; + + /* Restart dkey count for the object */ + ctx->dkey_seen = 0; + itp_unset_dkey(&ctx->itp); + + return ctx->handlers->ddb_obj_handler(&obj, ctx->handler_args); +} + +static int +handle_dkey(struct ddb_iter_ctx *ctx, vos_iter_entry_t *entry) +{ + struct ddb_key dkey = {0}; + int rc; + + D_ASSERT(ctx && ctx->handlers && ctx->handlers->ddb_dkey_handler); + itp_unset_dkey(&ctx->itp); /* make sure dkey is freed from any previous handle */ + itp_set_dkey(&ctx->itp, &entry->ie_key, ctx->dkey_seen); + + dkey.ddbk_path = &ctx->itp; + dkey.ddbk_idx = ctx->dkey_seen++; + dkey.ddbk_key = entry->ie_key; + dkey.ddbk_child_type = entry->ie_child_type; + + ctx->current_dkey = entry->ie_key; + + /* Restart the akey count for the dkey */ + ctx->akey_seen = 0; + itp_unset_akey(&ctx->itp); + + rc = ctx->handlers->ddb_dkey_handler(&dkey, ctx->handler_args); + return rc; +} + +static int +handle_akey(struct ddb_iter_ctx *ctx, vos_iter_entry_t *entry) +{ + struct ddb_key akey = {0}; + int rc; + + D_ASSERT(ctx && ctx->handlers && ctx->handlers->ddb_akey_handler); + itp_unset_akey(&ctx->itp); /* make sure akey is freed from any previous handle */ + itp_set_akey(&ctx->itp, &entry->ie_key, ctx->akey_seen); + itp_unset_recx(&ctx->itp); + + akey.ddbk_path = &ctx->itp; + akey.ddbk_idx = ctx->akey_seen++; + akey.ddbk_key = entry->ie_key; + akey.ddbk_child_type = entry->ie_child_type; + + ctx->current_akey = entry->ie_key; + + /* Restart the values seen for the akey */ + ctx->value_seen = 0; + + rc = ctx->handlers->ddb_akey_handler(&akey, ctx->handler_args); + return rc; +} + +static int +handle_sv(struct ddb_iter_ctx *ctx, vos_iter_entry_t *entry) +{ + struct ddb_sv value = {0}; + + D_ASSERT(ctx && ctx->handlers && ctx->handlers->ddb_sv_handler); + value.ddbs_record_size = entry->ie_rsize; + value.ddbs_idx = ctx->value_seen++; + value.ddbs_path = &ctx->itp; + + return ctx->handlers->ddb_sv_handler(&value, ctx->handler_args); +} + +static int +handle_array(struct ddb_iter_ctx *ctx, vos_iter_entry_t *entry) +{ + struct ddb_array value = {0}; + + D_ASSERT(ctx && ctx->handlers && ctx->handlers->ddb_array_handler); + itp_set_recx(&ctx->itp, &entry->ie_orig_recx, ctx->value_seen); + value.ddba_path = &ctx->itp; + value.ddba_record_size = entry->ie_rsize; + value.ddba_recx = entry->ie_orig_recx; + value.ddba_idx = ctx->value_seen++; + + return ctx->handlers->ddb_array_handler(&value, ctx->handler_args); +} + +static int +handle_iter_cb(daos_handle_t ih, vos_iter_entry_t *entry, vos_iter_type_t type, + vos_iter_param_t *param, void *cb_arg, unsigned int *acts) +{ + switch (type) { + case VOS_ITER_COUUID: + return handle_cont(cb_arg, entry, param); + case VOS_ITER_OBJ: + return handle_obj(cb_arg, entry); + case VOS_ITER_DKEY: + return handle_dkey(cb_arg, entry); + case VOS_ITER_AKEY: + return handle_akey(cb_arg, entry); + case VOS_ITER_SINGLE: + return handle_sv(cb_arg, entry); + case VOS_ITER_RECX: + return handle_array(cb_arg, entry); + case VOS_ITER_DTX: + D_ASSERT(1); /* shouldn't get here */ + break; + case VOS_ITER_NONE: + break; + } + + return 0; +} + +static int +iter_cont_recurse_cb(daos_handle_t ih, vos_iter_entry_t *entry, vos_iter_type_t type, + vos_iter_param_t *param, void *cb_arg, unsigned int *acts) +{ + vos_iter_param_t cont_param = {0}; + struct vos_iter_anchors anchors = {0}; + daos_handle_t coh; + int rc; + + D_ASSERT(type == VOS_ITER_COUUID); + + rc = handle_cont(cb_arg, entry, param); + if (!SUCCESS(rc)) + return rc; + + /* recursively iterate the objects in the container */ + rc = vos_cont_open(param->ip_hdl, entry->ie_couuid, &coh); + if (!SUCCESS(rc)) + return rc; + + cont_param.ip_hdl = coh; + cont_param.ip_epr.epr_hi = DAOS_EPOCH_MAX; + rc = ddb_vos_iterate(&cont_param, VOS_ITER_OBJ, true, &anchors, handle_iter_cb, cb_arg); + + if (rc != 0) + D_ERROR("vos_iterate error: "DF_RC"\n", DP_RC(rc)); + + rc = vos_cont_close(coh); + + return rc; +} + +static int +iter_cont_recurse(vos_iter_param_t *param, struct ddb_iter_ctx *ctx) +{ + struct vos_iter_anchors anchors = {0}; + + return ddb_vos_iterate(param, VOS_ITER_COUUID, false, &anchors, iter_cont_recurse_cb, ctx); +} + +int +dv_iterate(daos_handle_t poh, struct dv_tree_path *path, bool recursive, + struct vos_tree_handlers *handlers, void *handler_args, + struct dv_indexed_tree_path *itp) +{ + vos_iter_param_t param = {0}; + struct vos_iter_anchors anchors = {0}; + int rc; + daos_handle_t coh = DAOS_HDL_INVAL; + vos_iter_type_t type; + struct ddb_iter_ctx ctx = {0}; + + ctx.handlers = handlers; + ctx.handler_args = handler_args; + ctx.poh = poh; + itp_copy(&ctx.itp, itp); + + param.ip_epr.epr_hi = DAOS_EPOCH_MAX; + + if (uuid_is_null(path->vtp_cont)) { + param.ip_hdl = poh; + + if (recursive) { + /* + * currently vos_iterate doesn't handle recursive iteration starting with a + * container. This works around that limitation. + */ + rc = iter_cont_recurse(¶m, &ctx); + itp_free(&ctx.itp); + return rc; + } + rc = + ddb_vos_iterate(¶m, VOS_ITER_COUUID, false, &anchors, handle_iter_cb, &ctx); + itp_free(&ctx.itp); + return rc; + } + + rc = vos_cont_open(poh, path->vtp_cont, &coh); + if (!SUCCESS(rc)) { + itp_free(&ctx.itp); + return rc; + } + + param.ip_hdl = coh; + param.ip_oid = path->vtp_oid; + param.ip_dkey = path->vtp_dkey; + param.ip_akey = path->vtp_akey; + + if (!dv_has_obj(path)) + type = VOS_ITER_OBJ; + else if (!dv_has_dkey(path)) + type = VOS_ITER_DKEY; + else if (!dv_has_akey(path)) + type = VOS_ITER_AKEY; + else if (path->vtp_is_recx) + type = VOS_ITER_RECX; + else + type = VOS_ITER_SINGLE; + + rc = ddb_vos_iterate(¶m, type, recursive, &anchors, handle_iter_cb, &ctx); + itp_free(&ctx.itp); + if (!daos_handle_is_inval(coh)) + vos_cont_close(coh); + + return rc; +} + +int +dv_superblock(daos_handle_t poh, dv_dump_superblock_cb cb, void *cb_args) +{ + struct ddb_superblock sb = {0}; + struct vos_pool *pool; + struct vos_pool_df *pool_df; + + D_ASSERT(cb); + + pool = vos_hdl2pool(poh); + + if (pool == NULL) + return -DER_INVAL; + + pool_df = pool->vp_pool_df; + + if (pool_df == NULL || pool_df->pd_magic != POOL_DF_MAGIC) + return -DER_DF_INVAL; + + uuid_copy(sb.dsb_id, pool_df->pd_id); + sb.dsb_durable_format_version = pool_df->pd_version; + sb.dsb_cont_nr = pool_df->pd_cont_nr; + sb.dsb_nvme_sz = pool_df->pd_nvme_sz; + sb.dsb_scm_sz = pool_df->pd_scm_sz; + + sb.dsb_blk_sz = pool_df->pd_vea_df.vsd_blk_sz; + sb.dsb_hdr_blks = pool_df->pd_vea_df.vsd_hdr_blks; + sb.dsb_tot_blks = pool_df->pd_vea_df.vsd_tot_blks; + + + cb(cb_args, &sb); + + return 0; +} + +int +dv_dump_value(daos_handle_t poh, struct dv_tree_path *path, dv_dump_value_cb dump_cb, void *cb_arg) +{ + daos_iod_t iod = {0}; + d_sg_list_t sgl; + daos_handle_t coh; + size_t data_size; + int rc; + + d_sgl_init(&sgl, 1); + + rc = vos_cont_open(poh, path->vtp_cont, &coh); + if (!SUCCESS(rc)) + return rc; + + iod.iod_name = path->vtp_akey; + iod.iod_recxs = &path->vtp_recx; + iod.iod_nr = 1; + iod.iod_size = 0; + iod.iod_type = path->vtp_recx.rx_nr == 0 ? DAOS_IOD_SINGLE : DAOS_IOD_ARRAY; + + /* First, get record size */ + rc = vos_obj_fetch(coh, path->vtp_oid, DAOS_EPOCH_MAX, 0, &path->vtp_dkey, 1, &iod, NULL); + if (!SUCCESS(rc)) { + d_sgl_fini(&sgl, true); + vos_cont_close(coh); + + return rc; + } + + data_size = iod.iod_size; + + if (path->vtp_recx.rx_nr > 0) + data_size *= path->vtp_recx.rx_nr; + + D_ALLOC(sgl.sg_iovs[0].iov_buf, data_size); + if (sgl.sg_iovs[0].iov_buf == NULL) + return -DER_NOMEM; + sgl.sg_iovs[0].iov_buf_len = data_size; + + rc = vos_obj_fetch(coh, path->vtp_oid, DAOS_EPOCH_MAX, 0, &path->vtp_dkey, 1, &iod, &sgl); + if (!SUCCESS(rc)) { + D_ERROR("Unable to fetch object: "DF_RC"\n", DP_RC(rc)); + d_sgl_fini(&sgl, true); + vos_cont_close(coh); + + return rc; + } + + if (dump_cb) + rc = dump_cb(cb_arg, &sgl.sg_iovs[0]); + + d_sgl_fini(&sgl, true); + vos_cont_close(coh); + + return rc; +} + +static void +ilog_entry_status(enum ilog_status status, char *status_str, uint32_t status_str_len) +{ + switch (status) { + + case ILOG_INVALID: + snprintf(status_str, status_str_len, "INVALID"); + break; + case ILOG_COMMITTED: + snprintf(status_str, status_str_len, "COMMITTED"); + break; + case ILOG_UNCOMMITTED: + snprintf(status_str, status_str_len, "UNCOMMITTED"); + break; + case ILOG_REMOVED: + snprintf(status_str, status_str_len, "REMOVED"); + break; + } +} + + +static int +cb_foreach_entry(dv_dump_ilog_entry cb, void *cb_args, struct ilog_entries *entries) +{ + struct ilog_entry e; + struct ddb_ilog_entry ent = {0}; + int rc; + + ilog_foreach_entry(entries, &e) { + ent.die_idx = e.ie_idx; + ent.die_status = e.ie_status; + ilog_entry_status(e.ie_status, ent.die_status_str, ARRAY_SIZE(ent.die_status_str)); + ent.die_epoch = e.ie_id.id_epoch; + ent.die_tx_id = e.ie_id.id_tx_id; + ent.die_update_minor_eph = e.ie_id.id_update_minor_eph; + ent.die_punch_minor_eph = e.ie_id.id_punch_minor_eph; + + rc = cb(cb_args, &ent); + if (!SUCCESS(rc)) + return rc; + } + + return 0; +} + +int +dv_get_obj_ilog_entries(daos_handle_t coh, daos_unit_oid_t oid, dv_dump_ilog_entry cb, + void *cb_args) +{ + struct ilog_entries entries = {0}; + struct ilog_desc_cbs cbs = {0}; + struct vos_container *cont = NULL; + struct vos_obj_df *obj_df = NULL; + struct umem_instance *umm; + int rc; + + D_ASSERT(cb); + if (daos_handle_is_inval(coh) || daos_unit_oid_is_null(oid)) + return -DER_INVAL; + + ilog_fetch_init(&entries); + cont = vos_hdl2cont(coh); + + rc = vos_oi_find(cont, oid, &obj_df, NULL); + if (!SUCCESS(rc)) { + if (rc == -DER_NONEXIST) + return -DER_INVAL; + return rc; + } + + umm = vos_cont2umm(cont); + + vos_ilog_desc_cbs_init(&cbs, coh); + rc = ilog_fetch(umm, &obj_df->vo_ilog, &cbs, DAOS_INTENT_DEFAULT, false, &entries); + if (rc == -DER_NONEXIST) /* no entries exist ... not an error */ + return 0; + if (!SUCCESS(rc)) + return rc; + + rc = cb_foreach_entry(cb, cb_args, &entries); + return rc; +} + +static int +process_ilog_entries(daos_handle_t coh, struct umem_instance *umm, struct ilog_df *ilog, + enum ddb_ilog_op op) +{ + struct ilog_entries entries = {0}; + struct ilog_desc_cbs cbs = {0}; + daos_handle_t loh; + struct ilog_entry e; + int rc; + + vos_ilog_desc_cbs_init(&cbs, coh); + ilog_fetch_init(&entries); + + rc = ilog_fetch(umm, ilog, &cbs, DAOS_INTENT_DEFAULT, false, &entries); + if (!SUCCESS(rc)) + return rc; + + rc = ilog_open(umm, ilog, &cbs, false, &loh); + if (rc != 0) + return rc; + ilog_foreach_entry(&entries, &e) { + if (op == DDB_ILOG_OP_ABORT) + rc = ilog_abort(loh, &e.ie_id); + else if (op == DDB_ILOG_OP_PERSIST) + rc = ilog_persist(loh, &e.ie_id); + + if (!SUCCESS(rc)) { + ilog_close(loh); + return rc; + } + } + + ilog_close(loh); + + return 0; +} + +int +dv_process_obj_ilog_entries(daos_handle_t coh, daos_unit_oid_t oid, enum ddb_ilog_op op) +{ + struct vos_container *cont = NULL; + struct vos_obj_df *obj_df = NULL; + int rc; + + if (daos_handle_is_inval(coh) || daos_unit_oid_is_null(oid)) + return -DER_INVAL; + + cont = vos_hdl2cont(coh); + + rc = vos_oi_find(cont, oid, &obj_df, NULL); + if (!SUCCESS(rc)) { + if (rc == -DER_NONEXIST) + return -DER_INVAL; + return rc; + } + + return process_ilog_entries(coh, vos_cont2umm(cont), &obj_df->vo_ilog, op); +} + +static inline int +ddb_key_iter_fetch_helper(struct vos_obj_iter *oiter, struct vos_rec_bundle *rbund) +{ + d_iov_t kiov; + d_iov_t riov; + struct dcs_csum_info csum = {0}; + d_iov_t key = {0}; + + tree_rec_bundle2iov(rbund, &riov); + + rbund->rb_iov = &key; + rbund->rb_csum = &csum; + + d_iov_set(rbund->rb_iov, NULL, 0); /* no copy */ + ci_set_null(rbund->rb_csum); + + return dbtree_iter_fetch(oiter->it_hdl, &kiov, &riov, NULL); +} + +struct ilog_cb_args { + daos_key_t *key; + dv_dump_ilog_entry cb; + void *cb_args; + enum ddb_ilog_op op; +}; + +static int +key_ilog_cb(daos_handle_t ih, vos_iter_entry_t *entry, vos_iter_type_t type, + vos_iter_param_t *param, void *cb_arg, unsigned int *acts) +{ + struct vos_iterator *iter = vos_hdl2iter(ih); + struct vos_obj_iter *oiter = vos_iter2oiter(iter); + struct umem_instance *umm; + struct vos_rec_bundle rbund; + struct ilog_cb_args *args = cb_arg; + struct vos_krec_df *krec; + int rc; + struct ilog_desc_cbs cbs = {0}; + daos_handle_t coh = param->ip_hdl; + struct ilog_entries entries = {0}; + + D_ASSERT(type == VOS_ITER_DKEY || type == VOS_ITER_AKEY); + if (!daos_key_match(&entry->ie_key, args->key)) + return 0; + + ilog_fetch_init(&entries); + + rc = ddb_key_iter_fetch_helper(oiter, &rbund); + if (!SUCCESS(rc)) + return rc; + + krec = rbund.rb_krec; + umm = vos_obj2umm(oiter->it_obj); + + vos_ilog_desc_cbs_init(&cbs, coh); + + rc = ilog_fetch(umm, &krec->kr_ilog, &cbs, DAOS_INTENT_DEFAULT, false, &entries); + if (!SUCCESS(rc)) + return rc; + + rc = cb_foreach_entry(args->cb, args->cb_args, &entries); + + return rc; +} + +int +dv_get_key_ilog_entries(daos_handle_t coh, daos_unit_oid_t oid, daos_key_t *dkey, daos_key_t *akey, + dv_dump_ilog_entry cb, void *cb_args) +{ + vos_iter_param_t param = {0}; + struct vos_iter_anchors anchors = {0}; + struct ilog_cb_args args = {0}; + vos_iter_type_t type = VOS_ITER_DKEY; + + D_ASSERT(cb); + + if (daos_handle_is_inval(coh) || daos_unit_oid_is_null(oid) || + dkey == NULL || dkey->iov_len == 0) + return -DER_INVAL; + + param.ip_hdl = coh; + param.ip_oid = oid; + param.ip_epr.epr_hi = DAOS_EPOCH_MAX; + param.ip_dkey = *dkey; + args.key = dkey; + args.cb = cb; + args.cb_args = cb_args; + + if (akey != NULL) { + param.ip_akey = *akey; + args.key = akey; + type = VOS_ITER_AKEY; + } + + return ddb_vos_iterate(¶m, type, false, &anchors, key_ilog_cb, &args); +} + +static int +process_key_ilog_cb(daos_handle_t ih, vos_iter_entry_t *entry, vos_iter_type_t type, + vos_iter_param_t *param, void *cb_arg, unsigned int *acts) +{ + struct vos_iterator *iter = vos_hdl2iter(ih); + struct vos_obj_iter *oiter = vos_iter2oiter(iter); + struct ilog_cb_args *args = cb_arg; + struct vos_rec_bundle rbund; + daos_handle_t coh = param->ip_hdl; + int rc; + + D_ASSERT(type == VOS_ITER_DKEY || type == VOS_ITER_AKEY); + if (!daos_key_match(&entry->ie_key, args->key)) + return 0; + + rc = ddb_key_iter_fetch_helper(oiter, &rbund); + if (!SUCCESS(rc)) + return rc; + + return process_ilog_entries(coh, vos_obj2umm(oiter->it_obj), &rbund.rb_krec->kr_ilog, + args->op); +} + +int +dv_process_key_ilog_entries(daos_handle_t coh, daos_unit_oid_t oid, daos_key_t *dkey, + daos_key_t *akey, enum ddb_ilog_op op) +{ + vos_iter_param_t param = {0}; + struct vos_iter_anchors anchors = {0}; + struct ilog_cb_args args = {0}; + vos_iter_type_t type = VOS_ITER_DKEY; + + if (daos_handle_is_inval(coh) || daos_unit_oid_is_null(oid) || + dkey == NULL || dkey->iov_len == 0 || (op != DDB_ILOG_OP_ABORT && + op != DDB_ILOG_OP_PERSIST)) + return -DER_INVAL; + + if (daos_handle_is_inval(coh) || daos_unit_oid_is_null(oid) || + dkey == NULL || dkey->iov_len == 0) + return -DER_INVAL; + + param.ip_hdl = coh; + param.ip_oid = oid; + param.ip_epr.epr_hi = DAOS_EPOCH_MAX; + param.ip_dkey = *dkey; + args.key = dkey; + args.op = op; + if (akey != NULL) { + args.key = akey; + type = VOS_ITER_AKEY; + param.ip_akey = *akey; + } + + return ddb_vos_iterate(¶m, type, false, &anchors, process_key_ilog_cb, &args); + + return 0; +} + +struct committed_dtx_cb_arg { + dv_dtx_cmt_handler handler; + void *handler_arg; +}; + +struct active_dtx_cb_arg { + dv_dtx_act_handler handler; + void *handler_arg; +}; + +static int +committed_dtx_cb(daos_handle_t ih, d_iov_t *key, d_iov_t *val, void *cb_arg) +{ + struct committed_dtx_cb_arg *arg = cb_arg; + struct dv_dtx_committed_entry entry; + struct vos_dtx_cmt_ent *ent = val->iov_buf; + int rc; + + entry.ddtx_id = ent->dce_base.dce_xid; + entry.ddtx_cmt_time = ent->dce_base.dce_cmt_time; + entry.ddtx_epoch = ent->dce_base.dce_epoch; + + rc = arg->handler(&entry, arg->handler_arg); + + return rc; +} + +static int +active_dtx_cb(daos_handle_t ih, d_iov_t *key, d_iov_t *val, void *cb_arg) +{ + struct dv_dtx_active_entry entry = {0}; + struct active_dtx_cb_arg *arg = cb_arg; + struct vos_dtx_act_ent *ent = val->iov_buf; + int rc; + + entry.ddtx_id = ent->dae_base.dae_xid; + entry.ddtx_epoch = ent->dae_base.dae_epoch; + entry.ddtx_grp_cnt = ent->dae_base.dae_grp_cnt; + entry.ddtx_ver = ent->dae_base.dae_ver; + entry.ddtx_rec_cnt = ent->dae_base.dae_rec_cnt; + entry.ddtx_mbs_flags = ent->dae_base.dae_mbs_flags; + entry.ddtx_flags = ent->dae_base.dae_flags; + entry.ddtx_oid = ent->dae_base.dae_oid; + + rc = arg->handler(&entry, arg->handler_arg); + + return rc; +} + +int +dv_dtx_get_cmt_table(daos_handle_t coh, dv_dtx_cmt_handler handler_cb, void *handler_arg) +{ + struct vos_container *cont; + int rc; + struct committed_dtx_cb_arg cb_arg = {0}; + + if (daos_handle_is_inval(coh)) + return -DER_INVAL; + + cb_arg.handler = handler_cb; + cb_arg.handler_arg = handler_arg; + + cont = vos_hdl2cont(coh); + + /* + * Must reindex before can iterate the committed table. Each reindex only reindex entries + * within one block, so must loop until all are done (rc == 1) + */ + do { + rc = vos_dtx_cmt_reindex(coh); + } while (rc >= 0 && rc != 1); + if (rc < 0) + return rc; + + rc = dbtree_iterate(cont->vc_dtx_committed_hdl, DAOS_INTENT_DEFAULT, false, + committed_dtx_cb, &cb_arg); + return rc; +} + +int +dv_dtx_get_act_table(daos_handle_t coh, dv_dtx_act_handler handler_cb, void *handler_arg) +{ + struct vos_container *cont; + int rc; + struct active_dtx_cb_arg cb_arg = {0}; + + if (daos_handle_is_inval(coh)) + return -DER_INVAL; + + cb_arg.handler = handler_cb; + cb_arg.handler_arg = handler_arg; + + cont = vos_hdl2cont(coh); + + rc = dbtree_iterate(cont->vc_dtx_active_hdl, DAOS_INTENT_DEFAULT, false, + active_dtx_cb, &cb_arg); + + return rc; +} + +int +dv_dtx_commit_active_entry(daos_handle_t coh, struct dtx_id *dti) +{ + return vos_dtx_commit(coh, dti, 1, NULL); +} + +int +dv_dtx_abort_active_entry(daos_handle_t coh, struct dtx_id *dti) +{ + return vos_dtx_abort(coh, dti, DAOS_EPOCH_MAX); +} + +int +dv_delete(daos_handle_t poh, struct dv_tree_path *vtp) +{ + daos_handle_t coh; + int rc; + + /* Don't allow deleting all contents ... must specify at least a container */ + if (dvp_is_empty(vtp)) + return -DER_INVAL; + + if (!SUCCESS(ddb_vtp_verify(poh, vtp))) + return -DER_NONEXIST; + + if (!dv_has_obj(vtp)) + return vos_cont_destroy(poh, vtp->vtp_cont); + + rc = dv_cont_open(poh, vtp->vtp_cont, &coh); + if (!SUCCESS(rc)) + return rc; + + if (dv_has_akey(vtp)) + rc = vos_obj_del_key(coh, vtp->vtp_oid, &vtp->vtp_dkey, &vtp->vtp_akey); + else if (dv_has_dkey(vtp)) + rc = vos_obj_del_key(coh, vtp->vtp_oid, &vtp->vtp_dkey, NULL); + else /* delete object */ + rc = vos_obj_delete(coh, vtp->vtp_oid); + + dv_cont_close(&coh); + + return rc; +} + +int +dv_update(daos_handle_t poh, struct dv_tree_path *vtp, d_iov_t *iov) +{ + daos_iod_t iod = {0}; + d_sg_list_t sgl = {0}; + uint64_t flags = 0; + daos_handle_t coh; + daos_epoch_t epoch = 0; + uint32_t pool_ver = 0; + int rc; + + if (!dvp_is_complete(vtp) || iov->iov_len == 0) + return -DER_INVAL; + + rc = dv_cont_open(poh, vtp->vtp_cont, &coh); + if (!SUCCESS(rc)) + return rc; + + d_sgl_init(&sgl, 1); + sgl.sg_nr_out = 1; + sgl.sg_iovs[0] = *iov; + + iod.iod_name = vtp->vtp_akey; + iod.iod_nr = 1; + if (vtp->vtp_recx.rx_nr == 0) { + iod.iod_type = DAOS_IOD_SINGLE; + iod.iod_size = iov->iov_len; + } else { + iod.iod_type = DAOS_IOD_ARRAY; + iod.iod_recxs = &vtp->vtp_recx; + iod.iod_size = 1; + } + + epoch = d_hlc_get(); + rc = vos_obj_update(coh, vtp->vtp_oid, epoch, pool_ver, flags, + &vtp->vtp_dkey, 1, &iod, NULL, &sgl); + if (rc == -DER_NO_PERM) + D_ERROR("Unable to update. Trying to update with the wrong value type? " + "(Array vs SV)\n"); + if (rc == -DER_REC2BIG) + D_ERROR("Unable to update. Data value might not be large enough to fill the " + "supplied recx\n"); + d_sgl_fini(&sgl, false); + dv_cont_close(&coh); + + return rc; +} + +static int +find_cb(daos_handle_t ih, vos_iter_entry_t *entry, vos_iter_type_t type, vos_iter_param_t *param, + void *cb_arg, unsigned int *acts) +{ + struct dv_tree_path *path = cb_arg; + + switch (type) { + + case VOS_ITER_NONE: + break; + case VOS_ITER_COUUID: + break; + case VOS_ITER_OBJ: + if (daos_oid_cmp(path->vtp_oid.id_pub, entry->ie_oid.id_pub) == 0) + return 1; + break; + case VOS_ITER_DKEY: + if (daos_key_match(&path->vtp_dkey, &entry->ie_key)) + return 1; + break; + case VOS_ITER_AKEY: + if (daos_key_match(&path->vtp_akey, &entry->ie_key)) + return 1; + break; + case VOS_ITER_SINGLE: + break; + case VOS_ITER_RECX: + if (daos_recx_match(path->vtp_recx, entry->ie_orig_recx)) + return 1; + break; + case VOS_ITER_DTX: + break; + } + return 0; +} + +/* Note: + * This can be improved by verifying the path in a single vos_iterate ... instead of 1 for + * path part. + */ +static bool +part_is_valid(daos_handle_t coh, struct dv_tree_path *path, vos_iter_type_t type) +{ + vos_iter_param_t param = {0}; + struct vos_iter_anchors anchors = {0}; + + param.ip_hdl = coh; + param.ip_oid = path->vtp_oid; + param.ip_dkey = path->vtp_dkey; + if (type == VOS_ITER_RECX) + param.ip_akey = path->vtp_akey; + + param.ip_epr.epr_hi = DAOS_EPOCH_MAX; + + return vos_iterate(¶m, type, false, &anchors, find_cb, NULL, path, NULL) == 1; +} + +int +ddb_vtp_verify(daos_handle_t poh, struct dv_tree_path *vtp) +{ + daos_handle_t coh; + int rc = 0; + + if (uuid_is_null(vtp->vtp_cont)) /* empty path is fine */ + return 0; + + rc = dv_cont_open(poh, vtp->vtp_cont, &coh); + if (!SUCCESS(rc)) + return rc; + + if (!daos_oid_is_null(vtp->vtp_oid.id_pub) && !part_is_valid(coh, vtp, VOS_ITER_OBJ)) + D_GOTO(done, rc = -DER_NONEXIST); + + if (vtp->vtp_dkey.iov_len > 0 && !part_is_valid(coh, vtp, VOS_ITER_DKEY)) + D_GOTO(done, rc = -DER_NONEXIST); + + if (vtp->vtp_akey.iov_len > 0 && !part_is_valid(coh, vtp, VOS_ITER_AKEY)) + D_GOTO(done, rc = -DER_NONEXIST); + + if (vtp->vtp_recx.rx_nr > 0 && !part_is_valid(coh, vtp, VOS_ITER_RECX)) + D_GOTO(done, rc = -DER_NONEXIST); + +done: + dv_cont_close(&coh); + + return rc; +} + +/* + * Delete dtx committed entries. Returns number of entries deleted. + * On error will return value < 0 + */ +static int +dtx_cmt_entry_delete(daos_handle_t coh) +{ + struct vos_container *cont; + struct vos_cont_df *cont_df; + struct umem_instance *umm; + struct vos_dtx_blob_df *dbd; + struct vos_dtx_blob_df *next; + uint64_t epoch; + umem_off_t dbd_off; + uint32_t delete_count = 0; + int rc; + int i; + + cont = vos_hdl2cont(coh); + D_ASSERT(cont != NULL); + + cont_df = cont->vc_cont_df; + dbd_off = cont_df->cd_dtx_committed_head; + umm = vos_cont2umm(cont); + epoch = cont_df->cd_newest_aggregated; + + dbd = umem_off2ptr(umm, dbd_off); + if (dbd == NULL || dbd->dbd_count == 0) + return 0; + + rc = umem_tx_begin(umm, NULL); + if (rc != 0) { + D_ERROR("Failed to TX begin "UMOFF_PF": "DF_RC"\n", UMOFF_P(dbd_off), DP_RC(rc)); + return rc; + } + + for (i = 0; i < dbd->dbd_count; i++) { + struct vos_dtx_cmt_ent_df *dce_df; + d_iov_t kiov; + + dce_df = &dbd->dbd_committed_data[i]; + if (epoch < dce_df->dce_epoch) + epoch = dce_df->dce_epoch; + d_iov_set(&kiov, &dce_df->dce_xid, sizeof(dce_df->dce_xid)); + rc = dbtree_delete(cont->vc_dtx_committed_hdl, BTR_PROBE_EQ, + &kiov, NULL); + if (rc != 0 && rc != -DER_NONEXIST) { + D_ERROR("Failed to remove entry "UMOFF_PF": "DF_RC"\n", + UMOFF_P(dbd_off), DP_RC(rc)); + goto out; + } + } + delete_count = i; + + if (epoch != cont_df->cd_newest_aggregated) { + rc = umem_tx_add_ptr(umm, &cont_df->cd_newest_aggregated, + sizeof(cont_df->cd_newest_aggregated)); + if (rc != 0) { + D_ERROR("Failed to refresh epoch "UMOFF_PF": "DF_RC"\n", + UMOFF_P(dbd_off), DP_RC(rc)); + goto out; + } + + cont_df->cd_newest_aggregated = epoch; + } + + next = umem_off2ptr(umm, dbd->dbd_next); + if (next == NULL) { + /* The last blob for committed DTX blob. */ + D_ASSERT(cont_df->cd_dtx_committed_tail == cont_df->cd_dtx_committed_head); + + rc = umem_tx_add_ptr(umm, &cont_df->cd_dtx_committed_tail, + sizeof(cont_df->cd_dtx_committed_tail)); + if (rc != 0) { + D_ERROR("Failed to update tail "UMOFF_PF": "DF_RC"\n", + UMOFF_P(dbd_off), DP_RC(rc)); + goto out; + } + + cont_df->cd_dtx_committed_tail = UMOFF_NULL; + } else { + rc = umem_tx_add_ptr(umm, &next->dbd_prev, + sizeof(next->dbd_prev)); + if (rc != 0) { + D_ERROR("Failed to update prev "UMOFF_PF": "DF_RC"\n", + UMOFF_P(dbd_off), DP_RC(rc)); + goto out; + } + + next->dbd_prev = UMOFF_NULL; + } + + rc = umem_tx_add_ptr(umm, &cont_df->cd_dtx_committed_head, + sizeof(cont_df->cd_dtx_committed_head)); + if (rc != 0) { + D_ERROR("Failed to update head "UMOFF_PF": "DF_RC"\n", UMOFF_P(dbd_off), DP_RC(rc)); + goto out; + } + + cont_df->cd_dtx_committed_head = dbd->dbd_next; + rc = umem_free(umm, dbd_off); + +out: + rc = umem_tx_end(umm, rc); + if (rc != 0) { + D_ERROR("Failed to delete DTX committed entries "UMOFF_PF": " + DF_RC"\n", UMOFF_P(dbd_off), DP_RC(rc)); + return rc; + } + + return delete_count; +} + +int +dv_dtx_clear_cmt_table(daos_handle_t coh) +{ + uint32_t delete_count = 0; + int rc; + + do { + rc = dtx_cmt_entry_delete(coh); + if (rc > 0) + delete_count += rc; + } while (rc > 0); + + if (rc < 0) + return rc; + return delete_count; +} + +struct dv_sync_cb_args { + dv_smd_sync_complete sync_complete_cb; + void *sync_cb_args; + int sync_rc; +}; + +static void +sync_cb(struct ddbs_sync_info *info, void *cb_args) +{ + uint8_t *pool_id = info->dsi_hdr->bbh_pool; + struct smd_pool_info *pool_info = NULL; + daos_size_t blob_size; + struct dv_sync_cb_args *args = cb_args; + enum smd_dev_type st = SMD_DEV_TYPE_DATA; /* FIXME: support other types? */ + int rc; + + D_ASSERT(args != NULL); + + if (info->dsi_hdr == NULL) { + D_ERROR("Got called without the header. Unable to sync.\n"); + args->sync_rc = -DER_UNKNOWN; + return; + } + rc = smd_dev_add_tgt(info->dsi_dev_id, info->dsi_hdr->bbh_vos_id, st); + smd_dev_set_state(info->dsi_dev_id, SMD_DEV_NORMAL); + if (rc == -DER_EXIST) + D_INFO("tgt_id(%d) already mapped to dev_id("DF_UUID")", + info->dsi_hdr->bbh_vos_id, info->dsi_dev_id); + else if (rc != 0) + D_ERROR("Error mapping tgt_id(%d) to dev_id("DF_UUID")", + info->dsi_hdr->bbh_vos_id, info->dsi_dev_id); + + rc = smd_pool_get_info(pool_id, &pool_info); + if (!SUCCESS(rc)) { + D_ERROR("Failed to get smd pool info. Going to continue rebuilding smd_pool " + "table with spdk cluster size and cluster count: "DF_RC". \n", DP_RC(rc)); + /* + * This could be larger than how the pool was originally configured, but it will + * not be smaller + */ + blob_size = info->dsi_cluster_nr * info->dsi_cluster_size; + } else { + blob_size = pool_info->spi_blob_sz[st]; + smd_pool_free_info(pool_info); + } + + /* Try to delete the target first */ + rc = smd_pool_del_tgt(pool_id, info->dsi_hdr->bbh_vos_id, st); + if (!SUCCESS(rc)) { + /* Ignore error for now ... might not exist*/ + D_WARN("delete target failed: "DF_RC"\n", DP_RC(rc)); + rc = 0; + } + + rc = smd_pool_add_tgt(pool_id, info->dsi_hdr->bbh_vos_id, + info->dsi_hdr->bbh_blob_id, st, blob_size); + if (!SUCCESS(rc)) { + D_ERROR("add target failed: "DF_RC"\n", DP_RC(rc)); + args->sync_rc = rc; + return; + } + + if (args->sync_complete_cb) { + rc = args->sync_complete_cb(args->sync_cb_args, pool_id, + info->dsi_hdr->bbh_vos_id, + info->dsi_hdr->bbh_blob_id, + blob_size, info->dsi_dev_id); + } +} + +int +dv_sync_smd(const char *nvme_conf, const char *db_path, dv_smd_sync_complete complete_cb, + void *cb_args) +{ + struct dv_sync_cb_args sync_cb_args = {0}; + int rc; + + /* don't initialize NVMe within VOS. Will happen in ddb_spdk module */ + rc = vos_self_init_ext(db_path, true, 0, false); + + if (!SUCCESS(rc)) { + D_ERROR("VOS failed to initialize: "DF_RC"\n", DP_RC(rc)); + return rc; + } + + rc = smd_init(vos_db_get()); + if (!SUCCESS(rc)) { + D_ERROR("SMD failed to initialize: "DF_RC"\n", DP_RC(rc)); + return rc; + } + + sync_cb_args.sync_complete_cb = complete_cb; + sync_cb_args.sync_cb_args = cb_args; + rc = ddbs_for_each_bio_blob_hdr(nvme_conf, sync_cb, &sync_cb_args); + + if (rc == 0 && sync_cb_args.sync_rc != 0) + rc = sync_cb_args.sync_rc; + + smd_fini(); + vos_db_fini(); + + return rc; +} + +struct vea_cb_args { + dv_vea_extent_handler vca_cb; + void *vca_cb_args; +}; + +static int +vea_free_extent_cb(void *cb_arg, struct vea_free_extent *vfe) +{ + struct vea_cb_args *args = cb_arg; + + if (args->vca_cb) + return args->vca_cb(args->vca_cb_args, vfe); + + return 0; +} + +int +dv_enumerate_vea(daos_handle_t poh, dv_vea_extent_handler cb, void *cb_arg) +{ + struct vea_cb_args args = {.vca_cb = cb, .vca_cb_args = cb_arg}; + struct vos_pool *pool; + struct vea_space_info *vsi; + int rc; + + pool = vos_hdl2pool(poh); + vsi = pool->vp_vea_info; + if (vsi == NULL) + return -DER_NONEXIST; + + rc = vea_enumerate_free(vsi, vea_free_extent_cb, &args); + if (!SUCCESS(rc)) + D_ERROR("vea_enumerate_free failed: "DF_RC"\n", DP_RC(rc)); + return rc; +} + +int +dv_vea_free_region(daos_handle_t poh, uint32_t offset, uint32_t blk_cnt) +{ + struct vos_pool *pool; + struct vea_space_info *vsi; + int rc; + + if (offset == 0) + return -DER_INVAL; + + pool = vos_hdl2pool(poh); + vsi = pool->vp_vea_info; + if (vsi == NULL) + return -DER_NONEXIST; + + rc = vea_free(vsi, offset, blk_cnt); + if (!SUCCESS(rc)) + D_ERROR("vea_free error: "DF_RC"\n", DP_RC(rc)); + + return rc; +} diff --git a/src/ddb/ddb_vos.h b/src/ddb/ddb_vos.h new file mode 100644 index 00000000000..d1bba07c541 --- /dev/null +++ b/src/ddb/ddb_vos.h @@ -0,0 +1,198 @@ +/** + * (C) Copyright 2022 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ + +#ifndef DAOS_DDB_VOS_H +#define DAOS_DDB_VOS_H + +#include +#include +#include "ddb_tree_path.h" + +struct ddb_cont { + uuid_t ddbc_cont_uuid; + uint32_t ddbc_idx; + struct dv_indexed_tree_path *ddbc_path; +}; + +struct ddb_obj { + daos_obj_id_t ddbo_oid; + uint32_t ddbo_idx; + enum daos_otype_t ddbo_otype; + char ddbo_otype_str[32]; + uint32_t ddbo_nr_grps; + struct dv_indexed_tree_path *ddbo_path; +}; + +struct ddb_key { + daos_key_t ddbk_key; + uint32_t ddbk_idx; + vos_iter_type_t ddbk_child_type; + struct dv_indexed_tree_path *ddbk_path; +}; + +struct ddb_sv { + uint64_t ddbs_record_size; + uint32_t ddbs_idx; + struct dv_indexed_tree_path *ddbs_path; +}; + +struct ddb_array { + uint64_t ddba_record_size; + daos_recx_t ddba_recx; + uint32_t ddba_idx; + struct dv_indexed_tree_path *ddba_path; + +}; + +/* Open and close a pool for a ddb_ctx */ +int dv_pool_open(char *path, daos_handle_t *poh); +int dv_pool_close(daos_handle_t poh); + +/* Open and close a cont for a ddb_ctx */ +int dv_cont_open(daos_handle_t poh, uuid_t uuid, daos_handle_t *coh); +int dv_cont_close(daos_handle_t *coh); + +/* + * Table of functions for handling parts of a vos tree. Is used with vos_iterate and well defined + * structures for each tree branch. + */ +struct vos_tree_handlers { + int (*ddb_cont_handler)(struct ddb_cont *cont, void *args); + int (*ddb_obj_handler)(struct ddb_obj *obj, void *args); + int (*ddb_dkey_handler)(struct ddb_key *key, void *args); + int (*ddb_akey_handler)(struct ddb_key *key, void *args); + int (*ddb_sv_handler)(struct ddb_sv *key, void *args); + int (*ddb_array_handler)(struct ddb_array *key, void *args); +}; + +/* + * Traverse over a vos tree. The starting point is indicated by the path passed. + */ +/** + * + * @param poh Open pool handle + * @param path Starting point for traversing the tree + * @param recursive Whether to traverse the tree from the starting path recursively, or + * just the immediate children + * @param handlers Function table providing the callbacks for handling the vos tree parts + * @param handler_args arguments to the handlers + * @return 0 if success, else error + */ +int dv_iterate(daos_handle_t poh, struct dv_tree_path *path, bool recursive, + struct vos_tree_handlers *handlers, void *handler_args, + struct dv_indexed_tree_path *itp); + +/* need a special function to get a container idx */ +int dv_get_cont_idx(daos_handle_t poh, uuid_t uuid); +/* The following functions lookup a vos path part given a starting point and the index desired */ +int dv_get_cont_uuid(daos_handle_t poh, uint32_t idx, uuid_t uuid); +int dv_get_object_oid(daos_handle_t coh, uint32_t idx, daos_unit_oid_t *uoid); +int dv_get_dkey(daos_handle_t coh, daos_unit_oid_t uoid, uint32_t idx, daos_key_t *dkey); +int dv_get_akey(daos_handle_t coh, daos_unit_oid_t uoid, daos_key_t *dkey, uint32_t idx, + daos_key_t *akey); +int dv_get_recx(daos_handle_t coh, daos_unit_oid_t uoid, daos_key_t *dkey, daos_key_t *akey, + uint32_t idx, daos_recx_t *recx); + +/** + * Verify and update the tree path within the builder. For any indexes set in the builder, will + * try to find and update appropriate path parts. If the path part is already set, will + * verify that it exists. + * @param ctx application context + * @param pb The path builder structure + * @return 0 if successful, else error + */ +int dv_path_verify(daos_handle_t poh, struct dv_indexed_tree_path *vtp); + +struct ddb_superblock { + uuid_t dsb_id; + uint64_t dsb_cont_nr; + uint64_t dsb_nvme_sz; + uint64_t dsb_scm_sz; + uint64_t dsb_tot_blks; /* vea: Block device capacity */ + uint32_t dsb_durable_format_version; + uint32_t dsb_blk_sz; /* vea: Block size, 4k bytes by default */ + uint32_t dsb_hdr_blks; /* vea: Reserved blocks for the block device header */ +}; + +typedef int (*dv_dump_superblock_cb)(void *cb_arg, struct ddb_superblock *sb); + +int dv_superblock(daos_handle_t poh, dv_dump_superblock_cb cb, void *cb_args); + +typedef int (*dv_dump_value_cb)(void *cb_arg, d_iov_t *value); +int dv_dump_value(daos_handle_t poh, struct dv_tree_path *path, dv_dump_value_cb dump_cb, + void *cb_arg); + +struct ddb_ilog_entry { + uint32_t die_idx; + int32_t die_status; + char die_status_str[32]; + daos_epoch_t die_epoch; + uint32_t die_tx_id; + uint16_t die_update_minor_eph; + uint16_t die_punch_minor_eph; +}; + +enum ddb_ilog_op { + DDB_ILOG_OP_UNKNOWN = 0, + DDB_ILOG_OP_ABORT = 1, + DDB_ILOG_OP_PERSIST = 2, +}; + +typedef int (*dv_dump_ilog_entry)(void *cb_arg, struct ddb_ilog_entry *entry); +int dv_get_obj_ilog_entries(daos_handle_t coh, daos_unit_oid_t oid, dv_dump_ilog_entry cb, + void *cb_args); +int dv_process_obj_ilog_entries(daos_handle_t coh, daos_unit_oid_t oid, enum ddb_ilog_op op); + +int +dv_get_key_ilog_entries(daos_handle_t coh, daos_unit_oid_t oid, daos_key_t *dkey, daos_key_t *akey, + dv_dump_ilog_entry cb, void *cb_args); + +int dv_process_key_ilog_entries(daos_handle_t coh, daos_unit_oid_t oid, daos_key_t *dkey, + daos_key_t *akey, enum ddb_ilog_op op); + +struct dv_dtx_committed_entry { + struct dtx_id ddtx_id; + daos_epoch_t ddtx_cmt_time; + daos_epoch_t ddtx_epoch; +}; + +struct dv_dtx_active_entry { + struct dtx_id ddtx_id; + daos_epoch_t ddtx_handle_time; + daos_epoch_t ddtx_epoch; + uint32_t ddtx_grp_cnt; + uint32_t ddtx_ver; + uint32_t ddtx_rec_cnt; + uint16_t ddtx_mbs_flags; + uint16_t ddtx_flags; + daos_unit_oid_t ddtx_oid; +}; + +typedef int (*dv_dtx_cmt_handler)(struct dv_dtx_committed_entry *entry, void *cb_arg); +int dv_dtx_get_cmt_table(daos_handle_t coh, dv_dtx_cmt_handler handler_cb, void *handler_arg); +typedef int (*dv_dtx_act_handler)(struct dv_dtx_active_entry *entry, void *cb_arg); +int dv_dtx_get_act_table(daos_handle_t coh, dv_dtx_act_handler handler_cb, void *handler_arg); +int dv_dtx_clear_cmt_table(daos_handle_t coh); +int dv_dtx_commit_active_entry(daos_handle_t coh, struct dtx_id *dti); +int dv_dtx_abort_active_entry(daos_handle_t coh, struct dtx_id *dti); + +/* Sync the smd table with information saved in blobs */ +typedef int (*dv_smd_sync_complete)(void *cb_args, uuid_t pool_id, uint32_t vos_id, + uint64_t blob_id, daos_size_t blob_size, uuid_t dev_id); +int dv_sync_smd(const char *nvme_conf, const char *db_path, dv_smd_sync_complete complete_cb, + void *cb_args); + +typedef int (*dv_vea_extent_handler)(void *cb_arg, struct vea_free_extent *free_extent); +int dv_enumerate_vea(daos_handle_t poh, dv_vea_extent_handler cb, void *cb_arg); +int dv_vea_free_region(daos_handle_t poh, uint32_t offset, uint32_t blk_cnt); +int dv_delete(daos_handle_t poh, struct dv_tree_path *vtp); +int dv_update(daos_handle_t poh, struct dv_tree_path *vtp, d_iov_t *iov); + +void dv_oid_to_obj(daos_obj_id_t oid, struct ddb_obj *obj); + +int ddb_vtp_verify(daos_handle_t poh, struct dv_tree_path *vtp); + +#endif /* DAOS_DDB_VOS_H */ diff --git a/src/ddb/tests/README.md b/src/ddb/tests/README.md new file mode 100644 index 00000000000..6642def9a0e --- /dev/null +++ b/src/ddb/tests/README.md @@ -0,0 +1,24 @@ +# ddb_tests + +The ddb_tests executable will test the ddb utility at multiple levels. There is +a different test suite for each of the following layers: + +- The parsing tests unit test various utility type functions that are used for + parsing input. +- The cmd options tests unit test that the 'getopt_long' function parameters are + setup correctly for each of the commands. +- The vos interface layer is tested with a vos instance so a mount point must be + setup correctly at /mnt/daos with tmpfs. +- The commands tests verify that the command functions work correctly. Even + though test suite will setup a vos instance and tests the commands and vos + layers together, most of the testing is focused on the commands layer, that + invalid input for options and arguments is handled appropriately, etc. +- The main test suite focuses on the ddb_main function and that the ddb utility + options and arguments are handled appropriately. +- The print test suite looks at how information is printed and if it seems + correct. + +The ddb_test_driver.c file contains the entry point for ddb_tests. It does not +take any arguments for filtering or modifying tests; however, while debugging, +the "test_suites" and "cmocka_set_test_filter" variable and function can be +used, with a test recompile, to filter which tests are run. diff --git a/src/ddb/tests/SConscript b/src/ddb/tests/SConscript new file mode 100644 index 00000000000..05c73bf3031 --- /dev/null +++ b/src/ddb/tests/SConscript @@ -0,0 +1,53 @@ +# pylint: disable-next=wrong-spelling-in-comment +"""Build tests""" + + +def scons(): + """Execute build""" + Import('env', 'prereqs') + + denv = env.Clone() + + # Add runtime paths for daos libraries + denv.AppendUnique(RPATH_FULL=['$PREFIX/lib64/daos_srv']) + denv.AppendUnique(RPATH_FULL=['$PREFIX/lib64']) + + prereqs.require(denv, 'argobots', 'protobufc', 'pmdk', 'spdk') + + # for ddb includes + denv.AppendUnique(CPPPATH=[Dir('../').srcnode()]) + denv.AppendUnique(LIBPATH=[Dir('../')]) + + # Add include directory for vos internal headers + denv.AppendUnique(CPPPATH=[Dir('../../vos/').srcnode()]) + denv.AppendUnique(CPPPATH=[Dir('../../vos/')]) + denv.AppendUnique(CPPDEFINES='_GNU_SOURCE') + + libs = ['vos', 'daos_common_pmem', 'abt', 'gurt', 'uuid', 'bio', 'cart', + 'cmocka', 'ddb'] + # spdk libraries + libs += ['spdk_event', 'spdk_log'] + libs += ['spdk_bdev', 'spdk_blob', 'spdk_blob_bdev', 'spdk_json'] + libs += ['spdk_nvme', 'spdk_init', 'spdk_thread', 'spdk_log'] + libs += ['spdk_env_dpdk', 'spdk_thread', 'spdk_bdev', 'rte_mempool'] + libs += ['rte_mempool_ring', 'rte_bus_pci', 'rte_pci', 'rte_ring'] + libs += ['rte_mbuf', 'rte_eal', 'rte_kvargs', 'spdk_bdev_aio'] + libs += ['spdk_bdev_nvme', 'spdk_blob', 'spdk_nvme', 'spdk_util'] + libs += ['spdk_json', 'spdk_jsonrpc', 'spdk_rpc', 'spdk_trace'] + libs += ['spdk_sock', 'spdk_log', 'spdk_notify', 'spdk_blob_bdev'] + libs += ['spdk_vmd', 'spdk_event_bdev', 'spdk_init', 'rte_power'] + src = ['ddb_cmd_options_tests.c', + 'ddb_commands_tests.c', + 'ddb_main_tests.c', + 'ddb_parse_tests.c', + 'ddb_path_tests.c', + 'ddb_test_driver.c', + 'ddb_vos_tests.c', + 'ddb_commands_print_tests.c'] + ddb_tests = denv.d_program('ddb_tests', [src], LIBS=libs) + + denv.Install('$PREFIX/bin/', ddb_tests) + + +if __name__ == "SCons.Script": + scons() diff --git a/src/ddb/tests/ddb_cmd_options_tests.c b/src/ddb/tests/ddb_cmd_options_tests.c new file mode 100644 index 00000000000..643815bb629 --- /dev/null +++ b/src/ddb/tests/ddb_cmd_options_tests.c @@ -0,0 +1,327 @@ +/** + * (C) Copyright 2022 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ +#include +#include +#include +#include +#include +#include "ddb_cmocka.h" +#include "ddb_test_driver.h" + +#define test_run_inval_cmd(...) \ + assert_rc_equal(-DER_INVAL, __test_run_cmd(NULL, (char *[]){__VA_ARGS__, NULL})) +#define test_run_cmd(ctx, ...) \ + assert_success(__test_run_cmd(ctx, (char *[]){__VA_ARGS__, NULL})) + +static int +fake_print(const char *fmt, ...) +{ + return 0; +} + +static int +__test_run_cmd(struct ddb_cmd_info *info, char *argv[]) +{ + struct argv_parsed parse_args = {0}; + uint32_t argc = 0; + struct ddb_ctx ctx = {0}; + struct ddb_cmd_info tmp_info = {0}; + int rc; + + ctx.dc_io_ft.ddb_print_message = fake_print; + ctx.dc_io_ft.ddb_print_error = fake_print; + if (info == NULL) + info = &tmp_info; + + assert_non_null(argv); + if (g_verbose) + printf("Command: "); + while (argv[argc] != NULL) { + if (g_verbose) + printf("%s ", argv[argc]); + argc++; + } + if (g_verbose) + printf("\n"); + + parse_args.ap_argv = argv; + parse_args.ap_argc = argc; + + rc = ddb_parse_cmd_args(&ctx, parse_args.ap_argc, parse_args.ap_argv, info); + + if (!SUCCESS(rc)) + return rc; + + return rc; +} + +static void +ls_options_parsing(void **state) +{ + struct ddb_cmd_info info = {0}; + struct ls_options *options = &info.dci_cmd_option.dci_ls; + + /* test invalid arguments and options */ + test_run_inval_cmd("ls", "path", "extra"); /* too many argument */ + test_run_inval_cmd("ls", "-z"); /* invalid option */ + + /* test all arguments */ + test_run_cmd(&info, "ls", "path"); + assert_non_null(options->path); + assert_false(options->recursive); + assert_false(options->details); + + /* test all options and arguments */ + test_run_cmd(&info, "ls", "-r", "-d", "path"); + assert_non_null(options->path); + assert_true(options->recursive); + assert_true(options->details); +} + +static void +open_options_parsing(void **state) +{ + struct ddb_cmd_info info = {0}; + struct open_options *options = &info.dci_cmd_option.dci_open; + + /* test invalid arguments and options */ + test_run_inval_cmd("open", "path", "extra"); /* too many argument */ + test_run_inval_cmd("open", "-z"); /* invalid option */ + + /* test all arguments */ + test_run_cmd(&info, "open", "path"); + assert_non_null(options->path); + assert_false(options->write_mode); + + /* test all options and arguments */ + test_run_cmd(&info, "open", "-w", "path"); + assert_non_null(options->path); + assert_true(options->write_mode); +} + +static void +value_dump_options_parsing(void **state) +{ + struct ddb_cmd_info info = {0}; + struct value_dump_options *options = &info.dci_cmd_option.dci_value_dump; + + /* test invalid arguments and options */ + test_run_inval_cmd("value_dump", "path", "dst", "extra"); /* too many argument */ + test_run_inval_cmd("value_dump", "-z"); /* invalid option */ + + /* test all arguments */ + test_run_cmd(&info, "value_dump", "path", "dst"); + assert_non_null(options->path); + assert_non_null(options->dst); +} + +static void +rm_options_parsing(void **state) +{ + struct ddb_cmd_info info = {0}; + struct rm_options *options = &info.dci_cmd_option.dci_rm; + + /* test invalid arguments and options */ + test_run_inval_cmd("rm", "path", "extra"); /* too many argument */ + test_run_inval_cmd("rm", "-z"); /* invalid option */ + + /* test all arguments */ + test_run_cmd(&info, "rm", "path"); + assert_non_null(options->path); +} + +static void +value_load_options_parsing(void **state) +{ + struct ddb_cmd_info info = {0}; + struct value_load_options *options = &info.dci_cmd_option.dci_value_load; + + /* test invalid arguments and options */ + test_run_inval_cmd("value_load", "src", "dst", "extra"); /* too many argument */ + test_run_inval_cmd("value_load", "-z"); /* invalid option */ + + /* test all arguments */ + test_run_cmd(&info, "value_load", "src", "dst"); + assert_non_null(options->src); + assert_non_null(options->dst); +} + +static void +ilog_dump_options_parsing(void **state) +{ + struct ddb_cmd_info info = {0}; + struct ilog_dump_options *options = &info.dci_cmd_option.dci_ilog_dump; + + /* test invalid arguments and options */ + test_run_inval_cmd("ilog_dump", "path", "extra"); /* too many argument */ + test_run_inval_cmd("ilog_dump", "-z"); /* invalid option */ + + /* test all arguments */ + test_run_cmd(&info, "ilog_dump", "path"); + assert_non_null(options->path); +} + +static void +ilog_commit_options_parsing(void **state) +{ + struct ddb_cmd_info info = {0}; + struct ilog_commit_options *options = &info.dci_cmd_option.dci_ilog_commit; + + /* test invalid arguments and options */ + test_run_inval_cmd("ilog_commit", "path", "extra"); /* too many argument */ + test_run_inval_cmd("ilog_commit", "-z"); /* invalid option */ + + /* test all arguments */ + test_run_cmd(&info, "ilog_commit", "path"); + assert_non_null(options->path); +} + +static void +ilog_clear_options_parsing(void **state) +{ + struct ddb_cmd_info info = {0}; + struct ilog_clear_options *options = &info.dci_cmd_option.dci_ilog_clear; + + /* test invalid arguments and options */ + test_run_inval_cmd("ilog_clear", "path", "extra"); /* too many argument */ + test_run_inval_cmd("ilog_clear", "-z"); /* invalid option */ + + /* test all arguments */ + test_run_cmd(&info, "ilog_clear", "path"); + assert_non_null(options->path); +} + +static void +dtx_dump_options_parsing(void **state) +{ + struct ddb_cmd_info info = {0}; + struct dtx_dump_options *options = &info.dci_cmd_option.dci_dtx_dump; + + /* test invalid arguments and options */ + test_run_inval_cmd("dtx_dump", "path", "extra"); /* too many argument */ + test_run_inval_cmd("dtx_dump", "-z"); /* invalid option */ + + /* test all arguments */ + test_run_cmd(&info, "dtx_dump", "path"); + assert_non_null(options->path); + assert_false(options->active); + assert_false(options->committed); + + /* test all options and arguments */ + test_run_cmd(&info, "dtx_dump", "-a", "-c", "path"); + assert_non_null(options->path); + assert_true(options->active); + assert_true(options->committed); +} + +static void +dtx_cmt_clear_options_parsing(void **state) +{ + struct ddb_cmd_info info = {0}; + struct dtx_cmt_clear_options *options = &info.dci_cmd_option.dci_dtx_cmt_clear; + + /* test invalid arguments and options */ + test_run_inval_cmd("dtx_cmt_clear", "path", "extra"); /* too many argument */ + test_run_inval_cmd("dtx_cmt_clear", "-z"); /* invalid option */ + + /* test all arguments */ + test_run_cmd(&info, "dtx_cmt_clear", "path"); + assert_non_null(options->path); +} + +static void +smd_sync_options_parsing(void **state) +{ + struct ddb_cmd_info info = {0}; + struct smd_sync_options *options = &info.dci_cmd_option.dci_smd_sync; + + /* test invalid arguments and options */ + test_run_inval_cmd("smd_sync", "nvme_conf", "db_path", "extra"); /* too many argument */ + test_run_inval_cmd("smd_sync", "-z"); /* invalid option */ + + /* test all arguments */ + test_run_cmd(&info, "smd_sync", "nvme_conf", "db_path"); + assert_non_null(options->nvme_conf); + assert_non_null(options->db_path); +} + +static void +vea_update_options_parsing(void **state) +{ + struct ddb_cmd_info info = {0}; + struct vea_update_options *options = &info.dci_cmd_option.dci_vea_update; + + /* test invalid arguments and options */ + test_run_inval_cmd("vea_update", "offset", "blk_cnt", "extra"); /* too many argument */ + test_run_inval_cmd("vea_update", "-z"); /* invalid option */ + + /* test all arguments */ + test_run_cmd(&info, "vea_update", "offset", "blk_cnt"); + assert_non_null(options->offset); + assert_non_null(options->blk_cnt); +} + +static void +dtx_act_commit_options_parsing(void **state) +{ + struct ddb_cmd_info info = {0}; + struct dtx_act_commit_options *options = &info.dci_cmd_option.dci_dtx_act_commit; + + /* test invalid arguments and options */ + test_run_inval_cmd("dtx_act_commit", "path", "dtx_id", "extra"); /* too many argument */ + test_run_inval_cmd("dtx_act_commit", "-z"); /* invalid option */ + + /* test all arguments */ + test_run_cmd(&info, "dtx_act_commit", "path", "dtx_id"); + assert_non_null(options->path); + assert_non_null(options->dtx_id); +} + +static void +dtx_act_abort_options_parsing(void **state) +{ + struct ddb_cmd_info info = {0}; + struct dtx_act_abort_options *options = &info.dci_cmd_option.dci_dtx_act_abort; + + /* test invalid arguments and options */ + test_run_inval_cmd("dtx_act_abort", "path", "dtx_id", "extra"); /* too many argument */ + test_run_inval_cmd("dtx_act_abort", "-z"); /* invalid option */ + + /* test all arguments */ + test_run_cmd(&info, "dtx_act_abort", "path", "dtx_id"); + assert_non_null(options->path); + assert_non_null(options->dtx_id); +} + +/* + * ----------------------------------------------- + * Execute + * ----------------------------------------------- + */ +#define TEST(x) { #x, x, NULL, NULL } +int +ddb_cmd_options_tests_run() +{ + static const struct CMUnitTest tests[] = { + TEST(ls_options_parsing), + TEST(open_options_parsing), + TEST(value_dump_options_parsing), + TEST(rm_options_parsing), + TEST(value_load_options_parsing), + TEST(ilog_dump_options_parsing), + TEST(ilog_commit_options_parsing), + TEST(ilog_clear_options_parsing), + TEST(dtx_dump_options_parsing), + TEST(dtx_cmt_clear_options_parsing), + TEST(smd_sync_options_parsing), + TEST(vea_update_options_parsing), + TEST(dtx_act_commit_options_parsing), + TEST(dtx_act_abort_options_parsing), + }; + + return cmocka_run_group_tests_name("DDB commands option parsing tests", tests, + NULL, NULL); +} diff --git a/src/ddb/tests/ddb_cmocka.h b/src/ddb/tests/ddb_cmocka.h new file mode 100644 index 00000000000..dd38715e376 --- /dev/null +++ b/src/ddb/tests/ddb_cmocka.h @@ -0,0 +1,72 @@ +/** + * (C) Copyright 2022 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ + +#ifndef DAOS_DDB_CMOCKA_H +#define DAOS_DDB_CMOCKA_H +#include +#include +#include +#include + +#define assert_uuid_equal(a, b) \ + do { \ + char str_a[DAOS_UUID_STR_SIZE]; \ + char str_b[DAOS_UUID_STR_SIZE]; \ + uuid_unparse(a, str_a); \ + uuid_unparse(b, str_b); \ + assert_string_equal(str_a, str_b); \ + } while (0) +#define assert_uuid_not_equal(a, b) \ + do { \ + char str_a[DAOS_UUID_STR_SIZE]; \ + char str_b[DAOS_UUID_STR_SIZE]; \ + uuid_unparse(a, str_a); \ + uuid_unparse(b, str_b); \ + assert_string_not_equal(str_a, str_b); \ + } while (0) +#define assert_oid_equal(a, b) \ + do { \ + assert_int_equal((a).hi, (b).hi); \ + assert_int_equal((a).lo, (b).lo); \ + } while (0) + +#define assert_uoid_equal(a, b) \ + do { \ + assert_oid_equal((a).id_pub, (b).id_pub); \ + assert_int_equal((a).id_shard, (b).id_shard); \ + assert_int_equal((a).id_layout_ver, (b).id_layout_ver); \ + } while (0) + +#define assert_oid_not_equal(a, b) assert_true(a.hi != b.hi || a.lo != b.lo) + +#define assert_key_equal(a, b) \ + do { \ + assert_int_equal(a.iov_len, b.iov_len); \ + assert_memory_equal(a.iov_buf, b.iov_buf, a.iov_len); \ + } while (0) + +#define assert_key_not_equal(a, b) \ + do { \ + if (a.iov_len == b.iov_len && a.iov_buf_len == b.iov_buf_len) \ + assert_memory_not_equal(a.iov_buf, b.iov_buf, a.iov_len); \ + } while (0) + +#define assert_recx_equal(a, b) \ + do { \ + assert_int_equal((a).rx_nr, (b).rx_nr); \ + assert_int_equal((a).rx_idx, (b).rx_idx); \ + } while (0) + +#define assert_string_contains(str, substr) \ + do { \ + if (strstr(str, substr) == NULL) \ + fail_msg("'%s' not found in '%s'", substr, str); \ + } while (0) + +#define assert_invalid(x) assert_rc_equal(-DER_INVAL, (x)) +#define assert_nonexist(x) assert_rc_equal(-DER_NONEXIST, (x)) + +#endif /* DAOS_DDB_CMOCKA_H */ diff --git a/src/ddb/tests/ddb_commands_print_tests.c b/src/ddb/tests/ddb_commands_print_tests.c new file mode 100644 index 00000000000..5b0d1673816 --- /dev/null +++ b/src/ddb/tests/ddb_commands_print_tests.c @@ -0,0 +1,356 @@ +/** + * (C) Copyright 2022-2023 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ +#include +#include +#include "ddb_cmocka.h" +#include "ddb_test_driver.h" + +static struct ddb_ctx g_ctx = {.dc_io_ft.ddb_print_message = dvt_fake_print}; + +static void +print_container_test(void **state) +{ + struct ddb_cont cont = {0}; + + uuid_parse("12345678-1234-1243-1243-123456789012", cont.ddbc_cont_uuid); + cont.ddbc_idx = 1; + + ddb_print_cont(&g_ctx, &cont); + assert_printed_exact("[1] 12345678-1234-1243-1243-123456789012\n"); +} + +static void +print_object_test(void **state) +{ + struct ddb_obj obj = {0}; + + obj.ddbo_idx = 2; + obj.ddbo_oid.lo = 1; + obj.ddbo_oid.hi = 10; + obj.ddbo_nr_grps = 2; + strcpy(obj.ddbo_otype_str, "TEST TYPE"); + + ddb_print_obj(&g_ctx, &obj, 1); + + assert_printed_exact(" [2] '10.1' (type: TEST TYPE, groups: 2)\n"); +} + +static void set_key_buf(struct ddb_key *key, uint32_t len) +{ + int i; + + for (i = 0; i < len; i++) + ((uint8_t *)key->ddbk_key.iov_buf)[i] = (i % 16 + 0x1); + + key->ddbk_key.iov_len = len; +} + +static void +print_key_test(void **state) +{ + struct ddb_key key = {0}; + char key_buf[1024] = {0}; + uint64_t ll = 0x1abc2abc3abc4abc; + int i = 0x1234abcd; + short s = 0xabcd; + + key.ddbk_idx = 4; + d_iov_set(&key.ddbk_key, key_buf, ARRAY_SIZE(key_buf)); + + ddb_print_key(&g_ctx, &key, 0); + + /* empty large key */ + assert_printed_exact("[4] '' (1024)\n"); + dvt_fake_print_reset(); + + /* Large key buffer, but only part is text */ + strcpy(key_buf, "string key"); + ddb_print_key(&g_ctx, &key, 0); + assert_printed_exact("[4] 'string key' (1024)\n"); + dvt_fake_print_reset(); + + /* No ending '\0' */ + strcpy(key_buf, "abcdefghijklmnopqrstuvwxyz"); + key.ddbk_key.iov_len = 5; + ddb_print_key(&g_ctx, &key, 0); + assert_printed_exact("[4] 'abcde' (5)\n"); + dvt_fake_print_reset(); + + /* With ending '\0' in middle ... only prints to null terminator */ + strcpy(key_buf, "abcdefghijklmnopqrstuvwxyz"); + key_buf[10] = '\0'; + key.ddbk_key.iov_len = 26; + ddb_print_key(&g_ctx, &key, 0); + assert_printed_exact("[4] 'abcdefghij' (26)\n"); + dvt_fake_print_reset(); + + /* + * Print binary keys. + * If key length is a number type, then print as that. + */ + memset(key_buf, 0, ARRAY_SIZE(key_buf)); + + /* char key */ + key_buf[0] = 0xab; + key.ddbk_key.iov_len = sizeof(char); + ddb_print_key(&g_ctx, &key, 0); + assert_printed_exact("[4] {uint8:0xab}\n"); + dvt_fake_print_reset(); + + /* short key */ + key.ddbk_key.iov_buf = (uint8_t *)&s; + key.ddbk_key.iov_len = sizeof(short); + ddb_print_key(&g_ctx, &key, 0); + assert_printed_exact("[4] {uint16:0xabcd}\n"); + dvt_fake_print_reset(); + + /* int key */ + key.ddbk_key.iov_buf = (int *)&i; + key.ddbk_key.iov_len = sizeof(int); + ddb_print_key(&g_ctx, &key, 0); + assert_printed_exact("[4] {uint32:0x1234abcd}\n"); + dvt_fake_print_reset(); + + /* 64 bit key */ + key.ddbk_key.iov_buf = (uint64_t *)≪ + key.ddbk_key.iov_len = sizeof(uint64_t); + ddb_print_key(&g_ctx, &key, 0); + assert_printed_exact("[4] {uint64:0x1abc2abc3abc4abc}\n"); + dvt_fake_print_reset(); + + /* random length binary key */ + key_buf[0] = 0xaa; + key_buf[1] = 0xbb; + key_buf[2] = 0xcc; + key.ddbk_key.iov_buf = key_buf; + key.ddbk_key.iov_len = 3; + ddb_print_key(&g_ctx, &key, 0); + assert_printed_exact("[4] {bin(3):0xaabbcc}\n"); + dvt_fake_print_reset(); + + set_key_buf(&key, 12); + ddb_print_key(&g_ctx, &key, 0); + assert_printed_exact("[4] {bin(12):0x0102030405060708090a0b0c}\n"); + dvt_fake_print_reset(); + + set_key_buf(&key, 128); + ddb_print_key(&g_ctx, &key, 0); + assert_printed_exact("[4] {bin(128):0x0102030405060708090a0b0c0d0e0f1001020304050607080...}" + "\n"); + dvt_fake_print_reset(); +} + +static void +print_sv_test(void **state) +{ + struct ddb_sv sv = {.ddbs_record_size = 19089555}; + + ddb_print_sv(&g_ctx, &sv, 0); + assert_printed_exact("[0] Single Value (Length: 19089555 bytes)\n"); +} + +static void +print_array_test(void **state) +{ + struct ddb_array array = { + .ddba_recx.rx_idx = 64, + .ddba_recx.rx_nr = 128, + .ddba_record_size = 3, + .ddba_idx = 8, + }; + + ddb_print_array(&g_ctx, &array, 0); + assert_printed_exact("[8] Array Value (Length: 128 records, " + "Record Indexes: {64-191}, Record Size: 3)\n"); +} + +#define assert_hr_bytes(expected_str, bytes) \ + do { \ + uint32_t __buf_len = 32; \ + char __buf[__buf_len]; \ + ddb_bytes_hr(bytes, __buf, __buf_len); \ + assert_string_equal(expected_str, __buf); \ + } while (0) + +static void +bytes_hr_tests(void **state) +{ + assert_hr_bytes("1KB", 1024); + assert_hr_bytes("1KB", 1025); + assert_hr_bytes("1KB", 1025); + assert_hr_bytes("1KB", 1024 + 50); + assert_hr_bytes("2KB", 1024 * 2); + assert_hr_bytes("1MB", 1024 * 1024); + assert_hr_bytes("1GB", 1024 * 1024 * 1024); + assert_hr_bytes("1TB", 0x10000000000); +} + +static void +print_superblock_test(void **state) +{ + struct ddb_superblock sb = { + .dsb_scm_sz = 0x100000000, /* 4 GB */ + .dsb_nvme_sz = 0x40000000000, /* 4 TB */ + .dsb_cont_nr = 2, + .dsb_durable_format_version = 23, + .dsb_blk_sz = 4096, + .dsb_hdr_blks = 1024, + .dsb_tot_blks = 0x40000000000, + }; + + uuid_parse("12345678-1234-1234-1234-123456789012", sb.dsb_id); + + ddb_print_superblock(&g_ctx, &sb); + + assert_printed_contains("Pool UUID: 12345678-1234-1234-1234-123456789012\n"); + assert_printed_contains("Format Version: 23\n"); + assert_printed_contains("Containers: 2\n"); + assert_printed_contains("SCM Size: 4GB\n"); + assert_printed_contains("NVME Size: 4TB\n"); + assert_printed_contains("Block Size: 4KB\n"); + assert_printed_contains("Reserved Blocks: 1024\n"); + assert_printed_contains("Block Device Capacity: 4TB\n"); +} + +static void +print_ilog_test(void **state) +{ + struct ddb_ilog_entry ilog = { + .die_status = 1, + .die_status_str = "TEST STATUS", + .die_epoch = 1234567890, + .die_idx = 1, + .die_tx_id = 2 + }; + + ddb_print_ilog_entry(&g_ctx, &ilog); + + assert_printed_contains("Index: 1\n"); + assert_printed_contains("Status: TEST STATUS (1)\n"); + assert_printed_contains("Epoch: 1234567890\n"); + assert_printed_contains("Txn ID: 2\n"); +} + +static void +print_dtx_active_test(void **state) +{ + struct dv_dtx_active_entry entry = { + .ddtx_id = {.dti_uuid = {0x12, 0x34, 0x56, 0x78, 0x9a, 0xbc}, .dti_hlc = 0x1234}, + .ddtx_handle_time = 12345690, + .ddtx_epoch = 99, + .ddtx_grp_cnt = 3, + .ddtx_ver = 1, + .ddtx_rec_cnt = 1, + .ddtx_mbs_flags = 1, + .ddtx_flags = 0, + .ddtx_oid = g_oids[0], + }; + + ddb_print_dtx_active(&g_ctx, &entry); + + assert_printed_contains("ID: 12345678-9abc-0000-0000-000000000000.1234\n"); + assert_printed_contains("Epoch: 99\n"); + assert_printed_contains("Handle Time: 12345690\n"); + assert_printed_contains("Grp Cnt: 3\n"); + assert_printed_contains("Ver: 1\n"); + assert_printed_contains("Rec Cnt: 1\n"); + assert_printed_contains("Mbs Flags: 1\n"); + assert_printed_contains("Flags: 0\n"); + assert_printed_contains("Oid: 281479271743488.4294967296.0.0\n"); +} + +static void +print_dtx_committed_test(void **state) +{ + struct dv_dtx_committed_entry entry = { + .ddtx_epoch = 1234, + .ddtx_id = {.dti_uuid = {0x12, 0x34, 0x56, 0x78, 0x9a, 0xbc}, .dti_hlc = 0x1234}, + }; + + ddb_print_dtx_committed(&g_ctx, &entry); + + assert_printed_contains("ID: 12345678-9abc-0000-0000-000000000000.1234\n"); + assert_printed_contains("Epoch: 1234\n"); +} + +static void +iov_to_printable_test(void **state) +{ + d_iov_t iov = {0}; + uint32_t buf_len = 64; + char buf[buf_len]; + char input_buf[buf_len]; + + assert_int_equal(0, ddb_iov_to_printable_buf(&iov, buf, buf_len)); + + /* buf is plenty big */ + sprintf(input_buf, "This is some text"); + d_iov_set(&iov, input_buf, strlen(input_buf) + 1); + assert_int_equal(17, ddb_iov_to_printable_buf(&iov, buf, buf_len)); + assert_string_equal(input_buf, buf); + + /* buf is too small */ + memset(buf, 0, buf_len); + assert_int_equal(17, ddb_iov_to_printable_buf(&iov, buf, 10)); + assert_string_equal("This is s", buf); + + /* Binary type - enough buffer*/ + memset(input_buf, 0xab, buf_len); + d_iov_set(&iov, input_buf, 10); + /* chars written to buffer is 30. For each byte, 2 are printed (10 bytes * 2) plus + * the prefix of 'bin(10):' is 10 more chars. + */ + assert_int_equal(30, ddb_iov_to_printable_buf(&iov, buf, buf_len)); + assert_string_equal("bin(10):0xabababababababababab", buf); + + /* Binary type - not enough buffer*/ + assert_int_equal(30, ddb_iov_to_printable_buf(&iov, buf, 20)); + assert_string_equal("bin(10):0xababab...", buf); + + /* Number types */ + d_iov_set(&iov, input_buf, 8); /* uint64 */ + assert_int_equal(25, ddb_iov_to_printable_buf(&iov, buf, buf_len)); + assert_string_equal("uint64:0xabababababababab", buf); + + assert_int_equal(25, ddb_iov_to_printable_buf(&iov, buf, 10)); + assert_string_equal("uint64:0x", buf); + + d_iov_set(&iov, input_buf, 4); /* uint32 */ + assert_int_equal(17, ddb_iov_to_printable_buf(&iov, buf, buf_len)); + assert_string_equal("uint32:0xabababab", buf); + + d_iov_set(&iov, input_buf, 1); /* uint8 */ + assert_int_equal(10, ddb_iov_to_printable_buf(&iov, buf, buf_len)); + assert_string_equal("uint8:0xab", buf); +} + +static int +ddb_print_setup(void **state) +{ + dvt_fake_print_reset(); + return 0; +} + +#define TEST(x) { #x, x, ddb_print_setup, NULL } +static const struct CMUnitTest tests[] = { + TEST(print_container_test), + TEST(print_object_test), + TEST(print_key_test), + TEST(print_sv_test), + TEST(print_array_test), + TEST(bytes_hr_tests), + TEST(print_superblock_test), + TEST(print_ilog_test), + TEST(print_dtx_active_test), + TEST(print_dtx_committed_test), + TEST(iov_to_printable_test), +}; + +int +ddb_commands_print_tests_run() +{ + return cmocka_run_group_tests_name("ddb commands printer", tests, NULL, NULL); +} diff --git a/src/ddb/tests/ddb_commands_tests.c b/src/ddb/tests/ddb_commands_tests.c new file mode 100644 index 00000000000..bf723eeb2e4 --- /dev/null +++ b/src/ddb/tests/ddb_commands_tests.c @@ -0,0 +1,423 @@ +/** + * (C) Copyright 2022-2023 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ +#include +#include +#include +#include +#include +#include +#include "ddb_cmocka.h" +#include "ddb_test_driver.h" + +/* + * Test that the command line arguments execute the correct tool command with the correct + * options/arguments for the command. Verification depends on the ability to set fake command + * functions in a command function table that the program uses. + */ + +struct ddb_ctx g_ctx = { + .dc_io_ft.ddb_print_message = dvt_fake_print, + .dc_io_ft.ddb_print_error = dvt_fake_print, + .dc_io_ft.ddb_read_file = dvt_fake_read_file, + .dc_io_ft.ddb_get_file_size = dvt_fake_get_file_size, + .dc_io_ft.ddb_get_file_exists = dvt_fake_get_file_exists, + .dc_write_mode = true, +}; + +static uint32_t fake_write_file_called; +static int +fake_write_file(const char *path, d_iov_t *contents) +{ + fake_write_file_called++; + + return 0; +} + +/* + * ----------------------------------------------- + * Test Functions + * ----------------------------------------------- + */ + +static void +quit_cmd_tests(void **state) +{ + /* Quit is really simple and should just indicate to the program context that it's + * time to quit + */ + assert_success(ddb_run_quit(&g_ctx)); + assert_true(g_ctx.dc_should_quit); +} + +static void +ls_cmd_tests(void **state) +{ + struct dt_vos_pool_ctx *tctx = *state; + struct ddb_ctx ctx = {0}; + struct ls_options opt = {.recursive = false, .path = ""}; + int items_in_tree; + char buf[256]; + + ctx.dc_poh = tctx->dvt_poh; + ctx.dc_io_ft.ddb_print_message = dvt_fake_print; + ctx.dc_io_ft.ddb_print_error = dvt_fake_print; + assert_success(ddb_run_ls(&ctx, &opt)); + + /* At least each container should be printed */ + assert_success(ddb_run_ls(&ctx, &opt)); + assert_true(ARRAY_SIZE(g_uuids) <= dvt_fake_print_called); + + /* With recursive set, every item in the tree should be printed, this gets huge so turn + * off storing it in the fake print buffer. + */ + dvt_fake_print_just_count = true; + opt.recursive = true; + items_in_tree = ARRAY_SIZE(g_uuids) * ARRAY_SIZE(g_oids) * + ARRAY_SIZE(g_dkeys) * ARRAY_SIZE(g_akeys); + dvt_fake_print_called = 0; + assert_success(ddb_run_ls(&ctx, &opt)); + assert_true(items_in_tree <= dvt_fake_print_called); + dvt_fake_print_just_count = false; + + /* pick a specific oid - each dkey should be printed */ + opt.path = "[0]/[0]"; + opt.recursive = false; + assert_success(ddb_run_ls(&ctx, &opt)); + assert_true(ARRAY_SIZE(g_dkeys) <= dvt_fake_print_called); + + /* printing a recx works */ + dvt_fake_print_called = 0; + opt.path = "/[0]/[0]/[0]/[0]/[0]"; + opt.recursive = true; + assert_success(ddb_run_ls(&ctx, &opt)); + + /* invalid paths ... */ + opt.path = buf; + + sprintf(buf, "%s", g_invalid_uuid_str); + assert_invalid(ddb_run_ls(&ctx, &opt)); + sprintf(buf, "%s/"DF_OID"/", g_uuids_str[0], DP_OID(g_invalid_oid.id_pub)); + assert_invalid(ddb_run_ls(&ctx, &opt)); + dvt_fake_print_reset(); + + opt.path = "/[0]/[1]/dkey-3"; + opt.recursive = true; + assert_success(ddb_run_ls(&ctx, &opt)); + assert_printed_contains("dkey-3"); + + opt.path = "/[0]"; + opt.recursive = false; + /* The output of this command will show which object ID to use for the next one. Can + * use g_verbose=true; to see output. Right now kind of manual, but when json output is + * implemented, might be able to automate this a little better. + */ + assert_success(ddb_run_ls(&ctx, &opt)); + dvt_fake_print_reset(); + opt.path = "/[0]/[0]"; + assert_success(ddb_run_ls(&ctx, &opt)); + g_verbose = false; + assert_printed_contains("/12345678-1234-1234-1234-123456789001/" + "281479271743488.4294967296.0.0"); +} + +static void +dump_value_cmd_tests(void **state) +{ + struct dt_vos_pool_ctx *tctx = *state; + struct ddb_ctx ctx = {0}; + struct value_dump_options opt = {0}; + + ctx.dc_io_ft.ddb_print_message = dvt_fake_print; + ctx.dc_io_ft.ddb_print_error = dvt_fake_print; + ctx.dc_io_ft.ddb_write_file = fake_write_file; + ctx.dc_poh = tctx->dvt_poh; + + /* requires a path to dump */ + assert_invalid(ddb_run_value_dump(&ctx, &opt)); + + /* path must be complete (to a value) */ + opt.path = "[0]"; + assert_rc_equal(ddb_run_value_dump(&ctx, &opt), -DDBER_INCOMPLETE_PATH_VALUE); + + /* Path is complete, no destination means will dump to screen */ + opt.path = "[0]/[0]/[0]/[1]"; + assert_success(ddb_run_value_dump(&ctx, &opt)); + + /* success */ + opt.dst = "/tmp/dumped_file"; + assert_success(ddb_run_value_dump(&ctx, &opt)); + assert_true(fake_write_file_called >= 1); +} + +static void +dump_ilog_cmd_tests(void **state) +{ + struct dt_vos_pool_ctx *tctx = *state; + struct ddb_ctx ctx = {0}; + struct ilog_dump_options opt = {0}; + + ctx.dc_io_ft.ddb_print_message = dvt_fake_print; + ctx.dc_io_ft.ddb_print_error = dvt_fake_print; + ctx.dc_io_ft.ddb_write_file = fake_write_file; + ctx.dc_poh = tctx->dvt_poh; + + assert_invalid(ddb_run_ilog_dump(&ctx, &opt)); + + /* Dump object ilog */ + dvt_fake_print_called = 0; + opt.path = "[0]/[0]"; + assert_success(ddb_run_ilog_dump(&ctx, &opt)); + assert_true(dvt_fake_print_called); + + /* Dump dkey ilog */ + dvt_fake_print_called = 0; + opt.path = "[0]/[0]/[0]"; + assert_success(ddb_run_ilog_dump(&ctx, &opt)); + assert_true(dvt_fake_print_called); + + /* Dump akey ilog */ + opt.path = "[0]/[0]/[0]/[0]"; + assert_success(ddb_run_ilog_dump(&ctx, &opt)); +} + +static void +dump_superblock_cmd_tests(void **state) +{ + struct dt_vos_pool_ctx *tctx = *state; + struct ddb_ctx ctx = {0}; + + ctx.dc_io_ft.ddb_print_message = dvt_fake_print; + ctx.dc_poh = tctx->dvt_poh; + + ddb_run_superblock_dump(&ctx); + + assert_true(dvt_fake_print_called >= 1); /* Should have printed at least once */ +} + +static void +dump_dtx_cmd_tests(void **state) +{ + struct dt_vos_pool_ctx *tctx = *state; + struct ddb_ctx ctx = {0}; + struct dtx_dump_options opt = {0}; + daos_handle_t coh; + + dvt_fake_print_reset(); + + ctx.dc_io_ft.ddb_print_message = dvt_fake_print; + ctx.dc_io_ft.ddb_print_error = dvt_fake_print; + ctx.dc_poh = tctx->dvt_poh; + + assert_invalid(ddb_run_dtx_dump(&ctx, &opt)); + + assert_success(vos_cont_open(tctx->dvt_poh, g_uuids[0], &coh)); + + dvt_vos_insert_2_records_with_dtx(coh); + vos_cont_close(coh); + + opt.path = "[0]"; + assert_success(ddb_run_dtx_dump(&ctx, &opt)); + + assert_string_contains(dvt_fake_print_buffer, "Active Transactions:"); + assert_string_contains(dvt_fake_print_buffer, "Committed Transactions:"); +} + +static void +rm_cmd_tests(void **state) +{ + struct rm_options opt = {0}; + + assert_invalid(ddb_run_rm(&g_ctx, &opt)); + + dvt_fake_print_reset(); + opt.path = "[0]"; + assert_success(ddb_run_rm(&g_ctx, &opt)); + assert_string_equal(dvt_fake_print_buffer, + "CONT: (/[0]) /12345678-1234-1234-1234-123456789001 deleted\n"); +} + +static void +load_cmd_tests(void **state) +{ + struct value_load_options opt = {0}; + char buf[256]; + daos_unit_oid_t new_oid = g_oids[0]; + + assert_invalid(ddb_run_value_load(&g_ctx, &opt)); + + opt.dst = "/[0]/[0]/[0]/[1]"; + opt.src = "/tmp/value_src"; + dvt_fake_get_file_exists_result = true; + snprintf(dvt_fake_read_file_buf, ARRAY_SIZE(dvt_fake_read_file_buf), "Some text"); + assert_invalid(ddb_run_value_load(&g_ctx, &opt)); + dvt_fake_get_file_size_result = strlen(dvt_fake_read_file_buf); + dvt_fake_read_file_result = strlen(dvt_fake_read_file_buf); + assert_success(ddb_run_value_load(&g_ctx, &opt)); + + /* add a new 'a' key */ + opt.dst = "/[0]/[0]/[0]/a-new-key"; + assert_success(ddb_run_value_load(&g_ctx, &opt)); + + /* add a new 'd' key */ + opt.dst = "/[0]/[0]/a-new-key/a-new-key"; + assert_success(ddb_run_value_load(&g_ctx, &opt)); + + /* add a new object */ + new_oid.id_pub.lo = 999; + sprintf(buf, "%s/"DF_UOID"/dkey_new/akey_new", g_uuids_str[3], DP_UOID(new_oid)); + opt.dst = buf; + assert_success(ddb_run_value_load(&g_ctx, &opt)); + + /* + * Error cases ... + */ + + /* File not found */ + dvt_fake_get_file_exists_result = false; + assert_invalid(ddb_run_value_load(&g_ctx, &opt)); + dvt_fake_get_file_exists_result = true; + + /* incomplete path */ + opt.dst = "/[0]/[0]/"; + assert_invalid(ddb_run_value_load(&g_ctx, &opt)); + + /* Can't use index for a new path */ + opt.dst = "/[0]/[0]/[0]/[9999]"; + assert_rc_equal(-DER_INVAL, ddb_run_value_load(&g_ctx, &opt)); + + /* can't create new container */ + sprintf(buf, "%s/"DF_OID"/'dkey_new'/'akey_new'", g_invalid_uuid_str, + DP_OID(g_oids[0].id_pub)); + opt.dst = buf; + assert_rc_equal(-DDBER_INVALID_CONT, ddb_run_value_load(&g_ctx, &opt)); +} + +static void +rm_ilog_cmd_tests(void **state) +{ + struct ilog_clear_options opt = {0}; + + assert_invalid(ddb_run_ilog_clear(&g_ctx, &opt)); + opt.path = "[0]"; /* just container ... bad */ + assert_invalid(ddb_run_ilog_clear(&g_ctx, &opt)); + + opt.path = "[1]/[0]"; /* object */ + assert_success(ddb_run_ilog_clear(&g_ctx, &opt)); + opt.path = "[2]/[0]/[0]"; /* dkey */ + assert_success(ddb_run_ilog_clear(&g_ctx, &opt)); +} + +static void +process_ilog_cmd_tests(void **state) +{ + struct ilog_commit_options opt = {0}; + + assert_invalid(ddb_run_ilog_commit(&g_ctx, &opt)); + opt.path = "[0]"; /* just container ... bad */ + assert_invalid(ddb_run_ilog_commit(&g_ctx, &opt)); + + opt.path = "[1]/[0]"; /* object */ + assert_success(ddb_run_ilog_commit(&g_ctx, &opt)); + opt.path = "[2]/[0]/[0]"; /* dkey */ + assert_success(ddb_run_ilog_commit(&g_ctx, &opt)); +} + +static void +clear_cmt_dtx_cmd_tests(void **state) +{ + struct dtx_cmt_clear_options opt = {0}; + + assert_invalid(ddb_run_dtx_cmt_clear(&g_ctx, &opt)); + + opt.path = "[0]"; + assert_success(ddb_run_dtx_cmt_clear(&g_ctx, &opt)); +} + +static void +dtx_commit_entry_tests(void **state) +{ + struct dtx_act_commit_options opt = {0}; + + assert_invalid(ddb_run_dtx_act_commit(&g_ctx, &opt)); + opt.path = "[0]/[0]"; + assert_invalid(ddb_run_dtx_act_commit(&g_ctx, &opt)); + + opt.dtx_id = "12345678-1234-1234-1234-123456789012.1234"; + assert_success(ddb_run_dtx_act_commit(&g_ctx, &opt)); +} + +static void +dtx_abort_entry_tests(void **state) +{ + struct dtx_act_abort_options opt = {0}; + + assert_invalid(ddb_run_dtx_act_abort(&g_ctx, &opt)); + + opt.path = "[0]/[0]"; + assert_invalid(ddb_run_dtx_act_abort(&g_ctx, &opt)); + opt.dtx_id = "12345678-1234-1234-1234-123456789012.1234"; + assert_success(ddb_run_dtx_act_abort(&g_ctx, &opt)); +} + +/* + * -------------------------------------------------------------- + * End test functions + * -------------------------------------------------------------- + */ + +static int +dcv_suit_setup(void **state) +{ + struct dt_vos_pool_ctx *tctx; + + assert_success(ddb_test_setup_vos(state)); + + /* test setup creates the pool, but doesn't open it ... leave it open for these tests */ + tctx = *state; + assert_success(dv_pool_open(tctx->dvt_pmem_file, &tctx->dvt_poh)); + + g_ctx.dc_poh = tctx->dvt_poh; + + return 0; +} + +static int +dcv_suit_teardown(void **state) +{ + struct dt_vos_pool_ctx *tctx = *state; + + if (tctx == NULL) + fail_msg("Test not setup correctly"); + assert_success(dv_pool_close(tctx->dvt_poh)); + ddb_teardown_vos(state); + + return 0; +} + +#define TEST(test) { #test, test, NULL, NULL } + +int +ddb_commands_tests_run() +{ + const struct CMUnitTest tests[] = { + TEST(quit_cmd_tests), + TEST(ls_cmd_tests), + TEST(dump_value_cmd_tests), + TEST(dump_ilog_cmd_tests), + TEST(dump_superblock_cmd_tests), + TEST(dump_dtx_cmd_tests), + TEST(rm_cmd_tests), + TEST(load_cmd_tests), + TEST(rm_ilog_cmd_tests), + TEST(process_ilog_cmd_tests), + TEST(clear_cmt_dtx_cmd_tests), + TEST(dtx_commit_entry_tests), + TEST(dtx_abort_entry_tests), + }; + + return cmocka_run_group_tests_name("DDB commands tests", tests, + dcv_suit_setup, dcv_suit_teardown); +} diff --git a/src/ddb/tests/ddb_main_tests.c b/src/ddb/tests/ddb_main_tests.c new file mode 100644 index 00000000000..cc3830de184 --- /dev/null +++ b/src/ddb/tests/ddb_main_tests.c @@ -0,0 +1,277 @@ +/** + * (C) Copyright 2022 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ +#include +#include +#include +#include +#include "ddb_cmocka.h" +#include "ddb_test_driver.h" + +/* + * Test that the command line interface interacts with a 'user' correctly. Will verify that the + * command line options and arguments are handled correctly and the interactive mode. + */ + +uint32_t fake_get_input_called; +int fake_get_input_inputs_count; +int fake_get_input_inputs_idx; +char fake_get_input_inputs[64][64]; + +#define set_fake_inputs(...) __set_fake_inputs((char *[]){__VA_ARGS__, NULL}) +static inline void +__set_fake_inputs(char *inputs[]) +{ + int i = 0; + + while (inputs[i] != NULL) { + /* input from user will always have a new line at the end */ + sprintf(fake_get_input_inputs[i], "%s\n", inputs[i]); + i++; + } + fake_get_input_inputs_count = i; + fake_get_input_inputs_idx = 0; +} + +static char * +fake_get_input(char *buf, uint32_t buf_len) +{ + char *input; + + assert_true(fake_get_input_inputs_idx < ARRAY_SIZE(fake_get_input_inputs)); + input = fake_get_input_inputs[fake_get_input_inputs_idx++]; + assert_true(strlen(input) < buf_len); + + strcpy(buf, input); + fake_get_input_called++; + + return input; +} + +int dvt_fake_get_lines_result; +int dvt_fake_get_lines_called; +int +dvt_fake_get_lines(const char *path, ddb_io_line_cb line_cb, void *cb_args) +{ + int i; + int rc; + + dvt_fake_get_lines_called++; + + for (i = 0; i < fake_get_input_inputs_count; i++) { + rc = line_cb(cb_args, fake_get_input_inputs[i], strlen(fake_get_input_inputs[i])); + if (rc != 0) + return rc; + } + + + return dvt_fake_get_lines_result; +} + +#define assert_main(...) \ + assert_success(__test_run_main((char *[]){"prog_name", __VA_ARGS__, NULL})) +#define assert_invalid_main(...) \ + assert_rc_equal(-DER_INVAL, __test_run_main((char *[]){"prog_name", __VA_ARGS__, NULL})) + +static int +__test_run_main(char *argv[]) +{ + uint32_t argc = 0; + struct ddb_io_ft ft = { + .ddb_print_message = dvt_fake_print, + .ddb_print_error = dvt_fake_print, + .ddb_get_input = fake_get_input, + .ddb_read_file = dvt_fake_read_file, + .ddb_get_file_exists = dvt_fake_get_file_exists, + .ddb_get_file_size = dvt_fake_get_file_size, + .ddb_get_lines = dvt_fake_get_lines + }; + + assert_non_null(argv); + if (g_verbose) + printf("Command: "); + while (argv[argc] != NULL && strcmp(argv[argc], "") != 0) { + if (g_verbose) + printf("%s ", argv[argc]); + argc++; + } + if (g_verbose) + printf("\n"); + + return ddb_main(&ft, argc, argv); +} + +#define assert_main_interactive_with_input(...) \ + __assert_main_interactive_with_input((char *[]) {__VA_ARGS__, NULL}) +static void +__assert_main_interactive_with_input(char *inputs[]) +{ + __set_fake_inputs(inputs); + assert_main(""); +} + +/* + * ----------------------------------------------- + * Test Functions + * ----------------------------------------------- + */ + +static void +interactive_mode_tests(void **state) +{ + assert_main_interactive_with_input("quit"); + assert_int_equal(1, fake_get_input_called); + + fake_get_input_called = 0; + assert_main_interactive_with_input("ls", "ls", "quit"); + assert_int_equal(3, fake_get_input_called); + + assert_invalid_main("path", "invalid_extra_arg"); +} + +static void +run_inline_command_with_opt_r_tests(void **state) +{ + struct dt_vos_pool_ctx *tctx = *state; + + assert_main(tctx->dvt_pmem_file, "-R", "ls [0] -r"); +} + +static void +only_modify_with_option_w_tests(void **state) +{ + struct dt_vos_pool_ctx *tctx = *state; + +#define assert_requires_write_mode(cmd) \ +do { \ + assert_invalid_main(tctx->dvt_pmem_file, "-R", cmd); \ + assert_main(tctx->dvt_pmem_file, "-w", "-R", cmd); \ +} while (0) + + dvt_fake_print_reset(); + assert_requires_write_mode("rm [0]"); + + /* Set up test for the load command */ + dvt_fake_get_file_exists_result = true; + dvt_fake_get_file_size_result = 10; + dvt_fake_read_file_result = dvt_fake_get_file_size_result; + assert_requires_write_mode("value_load src [0]/[0]/[0]/[1]"); + + assert_requires_write_mode("dtx_cmt_clear [0]"); +} + +static void +run_many_commands_with_option_f_tests(void **state) +{ + struct dt_vos_pool_ctx *tctx = *state; + + /* file doesn't exist */ + dvt_fake_get_file_exists_result = false; + assert_invalid_main(tctx->dvt_pmem_file, "-f", "file_path"); + + /* Empty file is still success */ + dvt_fake_get_file_exists_result = true; + assert_main(tctx->dvt_pmem_file, "-f", "file_path"); + + /* one command */ + dvt_fake_get_lines_called = 0; + assert_main(tctx->dvt_pmem_file, "-f", "file_path"); + assert_int_equal(1, dvt_fake_get_lines_called); + + /* handles invalid commands */ + dvt_fake_get_file_exists_result = true; + set_fake_inputs("bad_command"); + assert_invalid_main(tctx->dvt_pmem_file, "-f", "file_path"); + + /* multiple lines/commands */ + dvt_fake_get_file_exists_result = true; + dvt_fake_get_lines_called = 0; + set_fake_inputs("ls", "superblock_dump", "ls [0]"); + assert_main(tctx->dvt_pmem_file, "-f", "file_path"); + assert_int_equal(1, dvt_fake_get_lines_called); + + /* empty lines are ignored */ + dvt_fake_get_file_exists_result = true; + dvt_fake_get_lines_called = 0; + set_fake_inputs("ls", "", "superblock_dump"); + assert_main(tctx->dvt_pmem_file, "-f", "file_path"); + assert_int_equal(1, dvt_fake_get_lines_called); + + /* Lines with just whitespace are ignored */ + dvt_fake_get_file_exists_result = true; + dvt_fake_get_lines_called = 0; + set_fake_inputs("ls", "\t \t \t\n", "superblock_dump", "\n"); + assert_main(tctx->dvt_pmem_file, "-f", "file_path"); + assert_int_equal(1, dvt_fake_get_lines_called); + + /* commands that modify tree must have '-w' also */ + dvt_fake_get_file_exists_result = true; + set_fake_inputs("ls", "rm [0]"); + assert_invalid_main(tctx->dvt_pmem_file, "-f", "file_path"); + assert_main(tctx->dvt_pmem_file, "-w", "-f", "file_path"); +} + +static void +option_f_and_option_R_is_invalid_tests(void **state) +{ + struct dt_vos_pool_ctx *tctx = *state; + + /* Make sure that the fakes are setup to work so they are not invalid */ + set_fake_inputs("ls"); + dvt_fake_get_file_exists_result = true; + + assert_invalid_main(tctx->dvt_pmem_file, "-R", "ls", "-f", "file_path"); +} + +static void +get_help_tests(void **state) +{ + assert_main("-R", "help"); + assert_main("-h"); +} + +static int +ddb_main_suit_setup(void **state) +{ + struct dt_vos_pool_ctx *tctx; + + assert_success(ddb_test_setup_vos(state)); + + /* test setup creates the pool, but doesn't open it ... leave it open for these tests */ + tctx = *state; + assert_success(dv_pool_open(tctx->dvt_pmem_file, &tctx->dvt_poh)); + + return 0; +} + +static int +ddb_main_suit_teardown(void **state) +{ + struct dt_vos_pool_ctx *tctx = *state; + + if (tctx == NULL) + fail_msg("Test not setup correctly"); + assert_success(dv_pool_close(tctx->dvt_poh)); + ddb_teardown_vos(state); + + return 0; +} + +#define TEST(x) { #x, x, NULL, NULL } +int +ddb_main_tests_run() +{ + static const struct CMUnitTest tests[] = { + TEST(interactive_mode_tests), + TEST(run_inline_command_with_opt_r_tests), + TEST(only_modify_with_option_w_tests), + TEST(run_many_commands_with_option_f_tests), + TEST(option_f_and_option_R_is_invalid_tests), + TEST(get_help_tests), + }; + + return cmocka_run_group_tests_name("DDB CLI tests", tests, ddb_main_suit_setup, + ddb_main_suit_teardown); +} diff --git a/src/ddb/tests/ddb_parse_tests.c b/src/ddb/tests/ddb_parse_tests.c new file mode 100644 index 00000000000..ea6c0749252 --- /dev/null +++ b/src/ddb/tests/ddb_parse_tests.c @@ -0,0 +1,346 @@ +/** + * (C) Copyright 2022-2023 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ +#include +#include +#include +#include +#include "ddb_cmocka.h" +#include "ddb_test_driver.h" + +static int +fake_print(const char *fmt, ...) +{ + return 0; +} + +#define assert_parsed_words2(str, count, ...) \ + __assert_parsed_words2(str, count, (char *[])__VA_ARGS__) +static void +__assert_parsed_words2(const char *str, int count, char **expected_words) +{ + struct argv_parsed parse_args = {0}; + int i; + + assert_success(ddb_str2argv_create(str, &parse_args)); + assert_int_equal(count, parse_args.ap_argc); + + for (i = 0; i < parse_args.ap_argc; i++) + assert_string_equal(parse_args.ap_argv[i], expected_words[i]); + + ddb_str2argv_free(&parse_args); +} + +static void +assert_parsed_fail(const char *str) +{ + struct argv_parsed parse_args = {0}; + int rc; + + rc = ddb_str2argv_create(str, &parse_args); + ddb_str2argv_free(&parse_args); + assert_rc_equal(-DER_INVAL, rc); +} + +/* + * ----------------------------------------------- + * Test implementations + * ----------------------------------------------- + */ + +#define assert_invalid_f_path(path, parts) assert_invalid(vos_path_parse(path, &parts)) +#define assert_f_path(path, parts) assert_success(vos_path_parse(path, &parts)) + +static void +vos_file_parts_tests(void **state) +{ + struct vos_file_parts parts = {0}; + uuid_t expected_uuid; + + uuid_parse("12345678-1234-1234-1234-123456789012", expected_uuid); + + assert_invalid_f_path("", parts); + assert_invalid_f_path("/mnt/daos", parts); + assert_invalid_f_path("/mnt/daos/12345678-1234-1234-1234-123456789012", parts); + + assert_f_path("/mnt/daos/12345678-1234-1234-1234-123456789012/vos-1", parts); + + assert_string_equal("/mnt/daos", parts.vf_db_path); + assert_uuid_equal(expected_uuid, parts.vf_pool_uuid); + assert_string_equal("vos-1", parts.vf_vos_file); + assert_int_equal(1, parts.vf_target_idx); +} + +static void +string_to_argv_tests(void **state) +{ + assert_parsed_words2("one", 1, { "one" }); + assert_parsed_words2("one two", 2, {"one", "two"}); + assert_parsed_words2("one two three four five", 5, {"one", "two", "three", "four", "five"}); + assert_parsed_words2("one 'two two two'", 2, {"one", "two two two"}); + assert_parsed_words2("one 'two two two' three", 3, {"one", "two two two", "three"}); + assert_parsed_words2("one \"two two two\" three", 3, {"one", "two two two", "three"}); + + assert_parsed_fail("one>"); + assert_parsed_fail("one<"); + assert_parsed_fail("'one"); + assert_parsed_fail(" \"one"); + assert_parsed_fail("one \"two"); +} + +#define assert_invalid_program_args(argc, ...) \ + assert_rc_equal(-DER_INVAL, _assert_invalid_program_args(argc, ((char*[])__VA_ARGS__))) +static int +_assert_invalid_program_args(uint32_t argc, char **argv) +{ + struct program_args pa; + struct ddb_ctx ctx = { + .dc_io_ft.ddb_print_message = fake_print, + .dc_io_ft.ddb_print_error = fake_print + }; + + return ddb_parse_program_args(&ctx, argc, argv, &pa); +} + +#define assert_program_args(expected_program_args, argc, ...) \ + assert_success(_assert_program_args(&expected_program_args, argc, ((char*[])__VA_ARGS__))) +static int +_assert_program_args(struct program_args *expected_pa, uint32_t argc, char **argv) +{ + struct program_args pa = {0}; + int rc; + struct ddb_ctx ctx = { + .dc_io_ft.ddb_print_message = fake_print, + .dc_io_ft.ddb_print_error = fake_print + }; + + rc = ddb_parse_program_args(&ctx, argc, argv, &pa); + if (rc != 0) + return rc; + + + if (expected_pa->pa_r_cmd_run != NULL && pa.pa_r_cmd_run != NULL && + strcmp(expected_pa->pa_r_cmd_run, pa.pa_r_cmd_run) != 0) { + print_error("ERROR: %s != %s\n", expected_pa->pa_r_cmd_run, pa.pa_r_cmd_run); + return -DER_INVAL; + } + + if (expected_pa->pa_cmd_file != NULL && pa.pa_cmd_file != NULL && + strcmp(expected_pa->pa_cmd_file, pa.pa_cmd_file) != 0) { + print_error("ERROR: %s != %s\n", expected_pa->pa_cmd_file, pa.pa_cmd_file); + return -DER_INVAL; + } + + return 0; +} + +static void +parse_args_tests(void **state) +{ + struct program_args pa = {0}; + + assert_invalid_program_args(2, {"", "-z"}); + assert_invalid_program_args(3, {"", "command1", "command2"}); + pa.pa_r_cmd_run = "command"; + assert_program_args(pa, 3, {"", "-R", "command"}); + pa.pa_r_cmd_run = ""; + + pa.pa_cmd_file = "path"; + assert_program_args(pa, 3, {"", "-f", "path"}); +} + +#define assert_vtp_eq(a, b) \ +do { \ + assert_uuid_equal(a.vtp_path.vtp_cont, b.vtp_path.vtp_cont); \ + assert_int_equal(a.vtp_cont_idx, b.vtp_cont_idx); \ + assert_int_equal(a.vtp_oid_idx, b.vtp_oid_idx); \ + assert_int_equal(a.vtp_dkey_idx, b.vtp_dkey_idx); \ + assert_int_equal(a.vtp_akey_idx, b.vtp_akey_idx); \ + assert_int_equal(a.vtp_recx_idx, b.vtp_recx_idx); \ + assert_int_equal(a.vtp_path.vtp_oid.id_pub.hi, b.vtp_path.vtp_oid.id_pub.hi); \ + assert_int_equal(a.vtp_path.vtp_oid.id_pub.lo, b.vtp_path.vtp_oid.id_pub.lo); \ + assert_int_equal(a.vtp_path.vtp_dkey.iov_len, b.vtp_path.vtp_dkey.iov_len); \ + if (a.vtp_path.vtp_dkey.iov_len > 0) \ + assert_memory_equal(a.vtp_path.vtp_dkey.iov_buf, b.vtp_path.vtp_dkey.iov_buf, \ + a.vtp_path.vtp_dkey.iov_len); \ + assert_int_equal(a.vtp_path.vtp_akey.iov_len, b.vtp_path.vtp_akey.iov_len); \ + if (a.vtp_path.vtp_akey.iov_len > 0) \ + assert_memory_equal(a.vtp_path.vtp_akey.iov_buf, b.vtp_path.vtp_akey.iov_buf, \ + a.vtp_path.vtp_akey.iov_len); \ + } while (0) + +#define assert_invalid_path(path) \ +do { \ + struct dv_tree_path_builder __vt = {0}; \ + daos_handle_t poh = {0}; \ + assert_rc_equal(-DER_INVAL, ddb_vtp_init(poh, path, &__vt)); \ +} while (0) + +#define assert_invalid_parse_dtx_id(str) \ + do { \ + struct dtx_id __dtx_id = {0}; \ + assert_invalid(ddb_parse_dtx_id(str, &__dtx_id)); \ + } while (0) + +static void +parse_dtx_id_tests(void **state) +{ + struct dtx_id id; + uuid_t uuid; + + assert_invalid_parse_dtx_id(NULL); + assert_invalid_parse_dtx_id(""); + assert_invalid_parse_dtx_id("garbage.more_garbage"); + assert_invalid_parse_dtx_id("12345678-1234-1243-1243-124356789012.garbage"); + assert_invalid_parse_dtx_id("garbage.123456890"); + + assert_success(ddb_parse_dtx_id("12345678-1234-1243-1243-124356789012.123456890", &id)); + uuid_parse("12345678-1234-1243-1243-124356789012", uuid); + assert_uuid_equal(uuid, id.dti_uuid); + assert_int_equal(0x123456890, id.dti_hlc); +} + +#define assert_parsed_key(str, e) do {\ + daos_key_t __key = {0}; \ + assert_int_equal(strlen(str), ddb_parse_key(str, &__key)); \ + assert_key_equal(e, __key); \ + daos_iov_free(&__key); \ +} while (0) + +#define set_expected_str(key, str) do { \ + sprintf(key.iov_buf, str); \ + d_iov_set(&key, key.iov_buf, strlen(str)); \ +} while (0) + +#define set_expected_str_len(key, str, len) do { \ + memset(key.iov_buf, 0, len); \ + sprintf(key.iov_buf, str); \ + d_iov_set(&key, key.iov_buf, len); \ +} while (0) + +#define set_expected(key, val) d_iov_set(&key, &val, sizeof(val)) +#define set_expected_len(key, val, len) d_iov_set(&key, &val, len) + +static void +keys_are_parsed_correctly(void **state) +{ + daos_key_t key = {0}; + daos_key_t expected_key = {0}; + char buf[128] = {0}; + + d_iov_set(&expected_key, buf, ARRAY_SIZE(buf)); + + /* + * Invalid key path parts + */ + /* key should not be an empty string or NULL */ + assert_invalid(ddb_parse_key("", &key)); + assert_invalid(ddb_parse_key(NULL, &key)); + /* invalid syntax */ + assert_invalid(ddb_parse_key("{}", &key)); + assert_invalid(ddb_parse_key("{", &key)); + assert_invalid(ddb_parse_key("}", &key)); + assert_invalid(ddb_parse_key("string_key{{64}", &key)); + assert_invalid(ddb_parse_key("string_key{1{64}", &key)); + assert_invalid(ddb_parse_key("string_key{64}}", &key)); + assert_invalid(ddb_parse_key("string_key{64", &key)); + assert_invalid(ddb_parse_key("string_key}64", &key)); + /* must actually have a string value before size, or a type */ + assert_invalid(ddb_parse_key("{64}", &key)); + /* invalid size */ + assert_invalid(ddb_parse_key("string_key{a}", &key)); + /* shouldn't have anything after the size */ + assert_invalid(ddb_parse_key("string_key{5}more", &key)); + /* length is too small */ + assert_invalid(ddb_parse_key("string_key{0}", &key)); + assert_invalid(ddb_parse_key("string_key{3}", &key)); + /* invalid type */ + assert_invalid(ddb_parse_key("{uint:3}", &key)); + /* value is too big for type */ + + /* String keys ... some with length specified */ + /* Note that length of key does NOT include a NULL terminator */ + set_expected_str(expected_key, "string_key"); + assert_parsed_key("string_key", expected_key); + + set_expected_str_len(expected_key, "string_key", 64); + assert_parsed_key("string_key{64}", expected_key); + + /* able to escape curly brace */ + set_expected_str_len(expected_key, "string_{key", 64); + assert_parsed_key("string_\\{key{64}", expected_key); + + set_expected_str(expected_key, "string_{key"); + assert_parsed_key("string_\\{key", expected_key); + + set_expected_str_len(expected_key, "{{{{", 64); + assert_parsed_key("\\{\\{\\{\\{{64}", expected_key); + + set_expected_str(expected_key, "{{{{"); + assert_parsed_key("\\{\\{\\{\\{", expected_key); + + set_expected_str(expected_key, "}}}}"); + assert_parsed_key("\\}\\}\\}\\}", expected_key); + + set_expected_str(expected_key, "string_value{24}"); + assert_parsed_key("string_value\\{24\\}", expected_key); + + /* Number strings */ + uint8_t key_val_8 = 9; + + set_expected(expected_key, key_val_8); + assert_parsed_key("{uint8:9}", expected_key); + + uint16_t key_val_16 = 17; + + set_expected(expected_key, key_val_16); + assert_parsed_key("{uint16:17}", expected_key); + + uint32_t key_val_32 = 33; + + set_expected(expected_key, key_val_32); + assert_parsed_key("{uint32:33}", expected_key); + + uint64_t key_val_64 = 99999999; + + set_expected(expected_key, key_val_64); + assert_parsed_key("{uint64:99999999}", expected_key); + + uint64_t key_val_hex = 0x12345678; + + set_expected(expected_key, key_val_hex); + assert_parsed_key("{uint64:0x12345678}", expected_key); + + uint8_t bin_buf[10] = {0}; + + memset(bin_buf, 0xAB, ARRAY_SIZE(bin_buf)); + set_expected_len(expected_key, bin_buf, ARRAY_SIZE(bin_buf)); + assert_parsed_key("{bin:0xABABABABABABABABABAB}", expected_key); + assert_parsed_key("{bin(5):0xABABABABABABABABABAB}", expected_key); + + /* Currently don't check for value that's too big */ + assert_true(ddb_parse_key("{uint8:3000000000}", &key) > 0); + daos_iov_free(&key); +} + +/* + * ----------------------------------------------- + * Execute + * ----------------------------------------------- + */ +#define TEST(x) {#x, x, NULL, NULL} +int +ddb_parse_tests_run() +{ + static const struct CMUnitTest tests[] = { + TEST(vos_file_parts_tests), + TEST(string_to_argv_tests), + TEST(parse_args_tests), + TEST(parse_dtx_id_tests), + TEST(keys_are_parsed_correctly), + }; + return cmocka_run_group_tests_name("DDB helper parsing function tests", tests, + NULL, NULL); +} diff --git a/src/ddb/tests/ddb_path_tests.c b/src/ddb/tests/ddb_path_tests.c new file mode 100644 index 00000000000..7c0c7a3a421 --- /dev/null +++ b/src/ddb/tests/ddb_path_tests.c @@ -0,0 +1,387 @@ +/** + * (C) Copyright 2023 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ +#include + +#include "ddb_cmocka.h" +#include "ddb_test_driver.h" +#include "ddb_parse.h" +#include + +/* These tests are to verify the parsing and printing of the vos path */ +static struct ddb_ctx g_ctx = {.dc_io_ft.ddb_print_message = dvt_fake_print}; + +/* + * This just verifies that the parsing succeeds. There are other tests that verify that parts + * are parsed correctly + */ +static void +simple_path_parsing(void **state) +{ + struct dv_indexed_tree_path itp = {0}; + + assert_success(itp_parse(NULL, &itp)); + assert_success(itp_parse("", &itp)); + assert_success(itp_parse("/", &itp)); + assert_success(itp_parse("/[0]", &itp)); + assert_success(itp_parse("/[0]/", &itp)); + assert_success(itp_parse("/[0]/[0]", &itp)); + assert_success(itp_parse("/[0]/[0]/", &itp)); + assert_success(itp_parse("/[0]/[0]/[0]", &itp)); + assert_success(itp_parse("/[0]/[0]/[0]/", &itp)); + assert_success(itp_parse("/[0]/[0]/[0]/[0]", &itp)); + assert_success(itp_parse("/[0]/[0]/[0]/[0]/", &itp)); + assert_success(itp_parse("/[0]/[0]/[0]/[0]/[0]", &itp)); + assert_success(itp_parse("/[0]/[0]/[0]/[0]/[0]/", &itp)); + + /* Too many parts */ + assert_invalid(itp_parse("/[0]/[0]/[0]/[0]/[0]/[0]", &itp)); +} + +/* Test the safe string function which */ +static void +key_safe_str_tests(void **state) +{ + char buf[128] = {0}; + char small_buf[8] = {0}; +#define assert_key_escaped(key_str, expected) do { \ + sprintf(buf, key_str); \ + assert_true(itp_key_safe_str(buf, ARRAY_SIZE(buf))); \ + assert_string_equal(expected, buf);\ +} while (0) + + itp_key_safe_str(buf, (sizeof(buf) / sizeof((buf)[0]))); + /* shouldn't add anything to buf */ + assert_int_equal(0, strlen(buf)); + + /* + * Escaping a forward slash only requires a single backslash ('\'). However, in the C string + * the backslash actually has to be escaped as well, hence the double backslash. + */ + assert_key_escaped("a", "a"); + assert_key_escaped("/", "\\/"); + assert_key_escaped("a/", "a\\/"); + assert_key_escaped("a/b/c/d/e/f", "a\\/b\\/c\\/d\\/e\\/f"); + assert_key_escaped("{", "\\{"); + assert_key_escaped("/{/}\\", "\\/\\{\\/\\}\\\\"); + + /* When buf is too small for escape characters, the buf shouldn't change */ + sprintf(small_buf, "///////"); + assert_false(itp_key_safe_str(small_buf, ARRAY_SIZE(small_buf))); + assert_string_equal(small_buf, "///////"); +} + +static void +key_printing_and_parsing_tests(void **state) +{ +/* + * These tests will parse the first argument, then print it. The printed + * value will be compared to the second (expected) argument. + */ +#define assert_key_parsed_printed(parsed, printed) do {\ + union itp_part_type __v = {0}; \ + assert_true(ddb_parse_key(parsed, &__v.itp_key) > 0); \ + itp_print_part_key(&g_ctx, &__v); \ + assert_printed_exact(printed); \ + dvt_fake_print_reset(); \ + daos_iov_free(&__v.itp_key); \ +} while (0) + + assert_key_parsed_printed("akey", "akey"); + assert_key_parsed_printed("akey{4}", "akey"); + assert_key_parsed_printed("akey{64}", "akey{64}"); + /* binary should take size as input, but doesn't need it. It will always print it however */ + assert_key_parsed_printed("{bin:0xabcdef1234}", "{bin(5):0xabcdef1234}"); + assert_key_parsed_printed("{bin(5):0xabcdef1234}", "{bin(5):0xabcdef1234}"); + + /* Int types. Hex letters' case doesn't matter. Will always print as lower case */ + assert_key_parsed_printed("{uint64:0xABCDEF1234}", "{uint64:0xabcdef1234}"); + assert_key_parsed_printed("{uint32:0x12345678}", "{uint32:0x12345678}"); + assert_key_parsed_printed("{uint16:0x1234}", "{uint16:0x1234}"); + assert_key_parsed_printed("{uint8:0xAF}", "{uint8:0xaf}"); + + /* Parsing doesn't handle too big of values yet, so will get truncated */ + assert_key_parsed_printed("{uint8:0xFFFAAA}", "{uint8:0xaa}"); + assert_key_parsed_printed("\\/", "\\/"); +} + +/* Test setting and printing the full path given the path parts structure */ +static void +fully_set_and_print_path_parts(void **state) +{ + struct dv_indexed_tree_path itp = {0}; + uuid_t null_uuid = {0}; + + dvt_fake_print_reset(); + + /* Empty path */ + itp_print_full(&g_ctx, &itp); + assert_printed_exact("/"); + dvt_fake_print_reset(); + + /* shouldn't be able to set object before container */ + assert_false(itp_set_obj(&itp, g_oids[0], 3)); + /* Can't set a NULL container */ + assert_false(itp_set_cont(&itp, NULL, 1)); + assert_false(itp_set_cont(&itp, null_uuid, 1)); + + /* Set container and print */ + assert_true(itp_set_cont(&itp, g_uuids[0], 1)); + itp_print_full(&g_ctx, &itp); + assert_printed_exact("CONT: (/[1]) /12345678-1234-1234-1234-123456789001"); + dvt_fake_print_reset(); + + /* Set object and print */ + assert_true(itp_set_obj(&itp, g_oids[0], 2)); + itp_print_full(&g_ctx, &itp); + assert_printed_exact("OBJ: (/[1]/[2]) /12345678-1234-1234-1234-123456789001/" + "281479271743488.4294967296.0.0"); + dvt_fake_print_reset(); + + /* Set dkey and print */ + assert_true(itp_set_dkey(&itp, &g_dkeys[0], 3)); + itp_print_full(&g_ctx, &itp); + assert_printed_exact("DKEY: (/[1]/[2]/[3]) /12345678-1234-1234-1234-123456789001/" + "281479271743488.4294967296.0.0/" + "dkey-1"); + dvt_fake_print_reset(); + + /* set akey and print */ + assert_true(itp_set_akey(&itp, &g_akeys[0], 4)); + itp_print_full(&g_ctx, &itp); + assert_printed_exact("AKEY: (/[1]/[2]/[3]/[4]) /12345678-1234-1234-1234-123456789001/" + "281479271743488.4294967296.0.0/" + "dkey-1/akey-1"); + dvt_fake_print_reset(); + + /* set recx and print */ + assert_true(itp_set_recx(&itp, &g_recxs[0], 5)); + itp_print_full(&g_ctx, &itp); + assert_printed_exact("RECX: (/[1]/[2]/[3]/[4]/[5]) /12345678-1234-1234-1234-123456789001/" + "281479271743488.4294967296.0.0/" + "dkey-1/akey-1/{9-18}"); + dvt_fake_print_reset(); + + itp_free(&itp); +} + +/* This shouldn't actually happen in production, but test just in case */ +static void +path_parts_partial_behavior(void **state) +{ + struct dv_indexed_tree_path itp = {0}; + + itp_set_cont_idx(&itp, 1); + /* missing container uuid */ + itp_print_full(&g_ctx, &itp); + assert_printed_exact(INVALID_PATH); + dvt_fake_print_reset(); + + itp_set_cont_part_value(&itp, g_uuids[0]); + itp_print_full(&g_ctx, &itp); + assert_printed_not_equal(INVALID_PATH); + dvt_fake_print_reset(); +} + +/* + * These tests take a path structure and uses the ddb path printer functions to print the path + * to a test buffer. Then it parses that buffer to a new path structure and compares to make sure + * that the path printing and parsing is consistent. + */ +static void +parse_path_from_printed_path(void **state) +{ + struct dv_indexed_tree_path itp = {0}; + struct dv_indexed_tree_path itp_out = {0}; + + /* Empty path is success */ + dvt_fake_print_reset(); + assert_success(itp_parse(dvt_fake_print_buffer, &itp_out)); + itp_free(&itp_out); + + /* Container */ + itp_set_cont(&itp, g_uuids[0], 10); + + itp_print_indexes(&g_ctx, &itp); + assert_success(itp_parse(dvt_fake_print_buffer, &itp_out)); + assert_int_equal(itp.itp_parts[PATH_PART_CONT].itp_part_idx, + itp_out.itp_parts[PATH_PART_CONT].itp_part_idx); + dvt_fake_print_reset(); + itp_free(&itp_out); + + itp_print_parts(&g_ctx, &itp); + assert_success(itp_parse(dvt_fake_print_buffer, &itp_out)); + assert_uuid_equal(itp.itp_parts[PATH_PART_CONT].itp_part_value.itp_uuid, + itp_out.itp_parts[PATH_PART_CONT].itp_part_value.itp_uuid); + dvt_fake_print_reset(); + itp_free(&itp_out); + + /* object */ + itp_set_obj(&itp, g_oids[0], 1); + itp_print_indexes(&g_ctx, &itp); + assert_success(itp_parse(dvt_fake_print_buffer, &itp_out)); + assert_int_equal(itp.itp_parts[PATH_PART_OBJ].itp_part_idx, + itp_out.itp_parts[PATH_PART_OBJ].itp_part_idx); + dvt_fake_print_reset(); + itp_free(&itp_out); + + itp_print_parts(&g_ctx, &itp); + assert_success(itp_parse(dvt_fake_print_buffer, &itp_out)); + + assert_uoid_equal(itp.itp_parts[PATH_PART_OBJ].itp_part_value.itp_oid, + itp_out.itp_parts[PATH_PART_OBJ].itp_part_value.itp_oid); + dvt_fake_print_reset(); + itp_free(&itp_out); + + /* dkey */ + itp_set_dkey(&itp, &g_dkeys[0], 2); + itp_print_indexes(&g_ctx, &itp); + assert_success(itp_parse(dvt_fake_print_buffer, &itp_out)); + assert_int_equal(itp.itp_parts[PATH_PART_DKEY].itp_part_idx, + itp_out.itp_parts[PATH_PART_DKEY].itp_part_idx); + dvt_fake_print_reset(); + itp_free(&itp_out); + + itp_print_parts(&g_ctx, &itp); + assert_success(itp_parse(dvt_fake_print_buffer, &itp_out)); + + assert_key_equal(itp.itp_parts[PATH_PART_DKEY].itp_part_value.itp_key, + itp_out.itp_parts[PATH_PART_DKEY].itp_part_value.itp_key); + dvt_fake_print_reset(); + itp_free(&itp_out); + + /* akey */ + itp_set_akey(&itp, &g_akeys[0], 2); + itp_print_indexes(&g_ctx, &itp); + assert_success(itp_parse(dvt_fake_print_buffer, &itp_out)); + assert_int_equal(itp.itp_parts[PATH_PART_AKEY].itp_part_idx, + itp_out.itp_parts[PATH_PART_AKEY].itp_part_idx); + dvt_fake_print_reset(); + itp_free(&itp_out); + + itp_print_parts(&g_ctx, &itp); + assert_success(itp_parse(dvt_fake_print_buffer, &itp_out)); + assert_key_equal(itp.itp_parts[PATH_PART_AKEY].itp_part_value.itp_key, + itp_out.itp_parts[PATH_PART_AKEY].itp_part_value.itp_key); + dvt_fake_print_reset(); + itp_free(&itp_out); + + /* recx */ + itp_set_recx(&itp, &g_recxs[0], 2); + itp_print_indexes(&g_ctx, &itp); + assert_success(itp_parse(dvt_fake_print_buffer, &itp_out)); + assert_int_equal(itp.itp_parts[PATH_PART_RECX].itp_part_idx, + itp_out.itp_parts[PATH_PART_RECX].itp_part_idx); + dvt_fake_print_reset(); + itp_free(&itp_out); + + itp_print_parts(&g_ctx, &itp); + assert_success(itp_parse(dvt_fake_print_buffer, &itp_out)); + + assert_recx_equal(itp.itp_parts[PATH_PART_RECX].itp_part_value.itp_recx, + itp_out.itp_parts[PATH_PART_RECX].itp_part_value.itp_recx); + dvt_fake_print_reset(); + + itp_free(&itp); + itp_free(&itp_out); +} + +#define assert_invalid_path(path, err_code) \ +do { \ + struct dv_indexed_tree_path __itp = {0}; \ + assert_rc_equal(-err_code, itp_parse(path, &__itp)); \ + itp_free(&__itp); \ +} while (0) + +#define assert_path_parsed_equals(path, parsed_path) \ +do { \ + struct dv_indexed_tree_path __itp = {0}; \ + itp_parse(path, &__itp); \ + itp_print_parts(&g_ctx, &__itp); \ + assert_printed_exact(parsed_path); \ + dvt_fake_print_reset(); \ + itp_free(&__itp);\ +} while (0) + +/* + * These tests take a string path, parse it, then print the parsed path and compare the output + * to the original. This verifies that the printing and parsing is consistent. + */ +static void +string_to_path_to_string(void **state) +{ + dvt_fake_print_reset(); + assert_path_parsed_equals("", "/"); + assert_path_parsed_equals("/12345678-1234-1234-1234-123456789012/", + "/12345678-1234-1234-1234-123456789012"); + + assert_path_parsed_equals("/12345678-1234-1234-1234-123456789012/1.2.3.4/", + "/12345678-1234-1234-1234-123456789012/1.2.3.4"); + + assert_path_parsed_equals("/12345678-1234-1234-1234-123456789012/1.2.3.4/key/", + "/12345678-1234-1234-1234-123456789012/1.2.3.4/key"); + + assert_path_parsed_equals("/12345678-1234-1234-1234-123456789012/1.2.3.4/key{64}/", + "/12345678-1234-1234-1234-123456789012/1.2.3.4/key{64}"); + + assert_path_parsed_equals("/12345678-1234-1234-1234-123456789012/1.2.3.4/\\/", + "/12345678-1234-1234-1234-123456789012/1.2.3.4/\\/"); +} + +/* Verify that the correct path specific return code is returned */ +static void +invalid_paths_return_error(void **state) +{ + assert_invalid_path("12345678", DDBER_INVALID_CONT); + assert_invalid_path("/12345678-1234-1234-1234-12345678900", + DDBER_INVALID_CONT); + assert_invalid_path("/12345678-1234-1234-1234-123456789012/4321.1234.0", + DDBER_INVALID_OBJ); + assert_invalid_path("/12345678-1234-1234-1234-123456789012/4321.1234.0.", + DDBER_INVALID_OBJ); + assert_invalid_path("/12345678-1234-1234-1234-123456789012/4321.1234.0./dkey", + DDBER_INVALID_OBJ); + assert_invalid_path("/12345678-1234-1234-1234-123456789012/4321.1234.0../", + DDBER_INVALID_OBJ); + assert_invalid_path("/12345678-1234-1234-1234-123456789012/4321.1234.0.0.0.0/", + DDBER_INVALID_OBJ); + assert_invalid_path("/12345678-1234-1234-1234-123456789012/4321.1234.0.0/dkey/akey/invalid", + DDBER_INVALID_RECX); + assert_invalid_path("/12345678-1234-1234-1234-123456789012/4321.1234.0.0/dkey/akey/{-1}", + DDBER_INVALID_RECX); + assert_invalid_path("/12345678-1234-1234-1234-123456789012/4321.1234.0.0/dkey/akey/(0-1)", + DDBER_INVALID_RECX); + assert_invalid_path("/12345678-1234-1234-1234-123456789012/4321.1234.0.0/dkey/akey/" + "{0-1-2}", + DDBER_INVALID_RECX); + assert_invalid_path("/12345678-1234-1234-1234-123456789012/4321.1234.0.0/dkey/akey/" + "{0 1}", + DDBER_INVALID_RECX); + assert_invalid_path("/12345678-1234-1234-1234-123456789012/4321.1234.0.0/dkey/akey/" + "{0->1}", + DDBER_INVALID_RECX); +} + +/* + * ----------------------------------------------- + * Execute + * ----------------------------------------------- + */ +#define TEST(x) {#x, x, NULL, NULL} +int +ddb_path_tests_run() +{ + static const struct CMUnitTest tests[] = { + TEST(simple_path_parsing), + TEST(key_safe_str_tests), + TEST(key_printing_and_parsing_tests), + TEST(fully_set_and_print_path_parts), + TEST(path_parts_partial_behavior), + TEST(parse_path_from_printed_path), + TEST(string_to_path_to_string), + TEST(invalid_paths_return_error), + }; + return cmocka_run_group_tests_name("DDB Path Parsing Tests", tests, NULL, NULL); +} diff --git a/src/ddb/tests/ddb_smoke_tests.sh b/src/ddb/tests/ddb_smoke_tests.sh new file mode 100755 index 00000000000..99f5cb06757 --- /dev/null +++ b/src/ddb/tests/ddb_smoke_tests.sh @@ -0,0 +1,79 @@ +#!/bin/bash + +# ----------------------------------------------------------------------------- +# Utility functions +# ----------------------------------------------------------------------------- +function title() { + LINE="=======================================================================" + echo -e "\e[1m\e[94m${LINE}" + echo "$1" + echo -e "${LINE}\e[0m" +} + +function p() { + if [ "$v_pause" == "1" ]; then + read -r var + if [ "$var" == "c" ]; then + v_pause=0 + fi + fi +} + +function pause() { + echo -e "\e[1m\e[94m==> $*\e[0m" + p +} +function msg() { + echo -e "\e[1m\e[94m*** $* ***\e[0m" +} + +function echo_cmd() { + echo -e "\e[1m\e[32m" + echo -e "\$ " "$@" + echo -e "\e[0m" + p +} + +function run_cmd() { + echo_cmd "$@" + "$@" + echo "" + p +} + +# create a vos file to connect to +run_cmd ddb_tests -c +vos_file=/mnt/daos/12345678-1234-1234-1234-123456789012/ddb_vos_test + +msg "'ls' commands" +run_cmd ddb $vos_file -R 'ls' +run_cmd ddb $vos_file -R 'ls 12345678-1234-1234-1234-123456789001' +run_cmd ddb $vos_file -R 'ls [0]' +run_cmd ddb $vos_file -R 'ls [0]/[1]' +run_cmd ddb $vos_file -R 'ls [0]/[1] -r' + +msg "'dump' and 'load' commands" +vos_path="[0]/[0]/[0]/[2]" +echo 'echo "A New Value" > /tmp/ddb_new_value' +echo "A New Value" > /tmp/ddb_new_value + +run_cmd ddb $vos_file -R "ls -r $vos_path" +run_cmd ddb $vos_file -R "dump_value $vos_path /tmp/ddb_value_dump" +run_cmd cat /tmp/ddb_value_dump +run_cmd ddb $vos_file -R "dump_value [0]/[0]/[0]/[1]/[0] /tmp/ddb_value_dump" +run_cmd cat /tmp/ddb_value_dump + +run_cmd ddb $vos_file -R "load /tmp/ddb_new_value $vos_path" +run_cmd ddb $vos_file -R "dump_value $vos_path /tmp/ddb_value_dump" +run_cmd cat /tmp/ddb_value_dump +run_cmd diff /tmp/ddb_new_value /tmp/ddb_value_dump + +rm -f /tmp/ddb_commands +touch /tmp/ddb_commands +cat << EOF > /tmp/ddb_commands +ls +ls [0] +ls [0]/[0] +dump_superblock +EOF +run_cmd ddb $vos_file -f /tmp/ddb_commands diff --git a/src/ddb/tests/ddb_test_driver.c b/src/ddb/tests/ddb_test_driver.c new file mode 100644 index 00000000000..e23ecba0387 --- /dev/null +++ b/src/ddb/tests/ddb_test_driver.c @@ -0,0 +1,662 @@ +/** + * (C) Copyright 2022 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "ddb_cmocka.h" +#include "ddb_test_driver.h" + +#define DEFINE_IOV(str) {.iov_buf = str, .iov_buf_len = strlen(str), .iov_len = strlen(str)} +#ifndef DDB_FORCE_VALGRIND +#define DDB_FORCE_VALGRIND 0 +#endif + +bool g_verbose; /* Can be set to true while developing/debugging tests */ + +const char *g_uuids_str[] = { + "12345678-1234-1234-1234-123456789001", + "12345678-1234-1234-1234-123456789002", + "12345678-1234-1234-1234-123456789003", + "12345678-1234-1234-1234-123456789004", + "12345678-1234-1234-1234-123456789005", + "12345678-1234-1234-1234-123456789006", + "12345678-1234-1234-1234-123456789007", + "12345678-1234-1234-1234-123456789008", + "12345678-1234-1234-1234-123456789009", + "12345678-1234-1234-1234-123456789010", +}; + +const char *g_invalid_uuid_str = "99999999-9999-9999-9999-999999999999"; +daos_unit_oid_t g_invalid_oid = {.id_pub = {.lo = 99999, .hi = 9999} }; + +char *g_dkeys_str[] = { + "dkey-1", + "dkey-2", + "dkey-3", + "dkey-4", + "dkey-5", + "dkey-6", + "dkey-7", + "dkey-8", + "dkey-9", + "dkey-10", +}; + +char *g_akeys_str[] = { + "akey-1", + "akey-2", + "akey-3", + "akey-4", + "akey-5", + "akey-6", + "akey-7", + "akey-8", + "akey-9", + "akey-10", +}; + +char *g_invalid_key_str = "invalid key"; + +daos_unit_oid_t g_oids[10]; +uuid_t g_uuids[10]; +daos_key_t g_dkeys[10]; +daos_key_t g_akeys[10]; +daos_recx_t g_recxs[10]; +daos_key_t g_invalid_key; +daos_recx_t g_invalid_recx = {.rx_nr = 9999, .rx_idx = 9999}; + + +daos_unit_oid_t +dvt_gen_uoid(uint32_t i) +{ + daos_unit_oid_t uoid = {0}; + daos_obj_id_t oid; + + oid.lo = (1L << 32) + i; + oid.hi = (1 << 16) + i; + daos_obj_set_oid(&oid, DAOS_OT_MULTI_HASHED, OR_RP_1, 1, 0); + + uoid.id_shard = 0; + uoid.id_pub = oid; + + return uoid; +} + +void +dvt_vos_insert_recx(daos_handle_t coh, daos_unit_oid_t uoid, char *dkey_str, char *akey_str, + daos_recx_t *recx, daos_epoch_t epoch) +{ + daos_key_t dkey = DEFINE_IOV(dkey_str); + + d_iov_t iov = DEFINE_IOV("This is a recx value"); + d_sg_list_t sgl = {.sg_iovs = &iov, .sg_nr = 1, .sg_nr_out = 1}; + + daos_iod_t iod = { + .iod_name = DEFINE_IOV(akey_str), + .iod_type = DAOS_IOD_ARRAY, + .iod_nr = 1, + .iod_size = 1, + .iod_recxs = recx + }; + + assert_success(vos_obj_update(coh, uoid, epoch, 0, 0, &dkey, 1, &iod, NULL, &sgl)); +} + +void +dvt_vos_insert_single(daos_handle_t coh, daos_unit_oid_t uoid, char *dkey_str, char *akey_str, + char *data_str, daos_epoch_t epoch) +{ + daos_key_t dkey = DEFINE_IOV(dkey_str); + + d_iov_t iov = DEFINE_IOV(data_str); + d_sg_list_t sgl = {.sg_iovs = &iov, .sg_nr = 1, .sg_nr_out = 1}; + + daos_iod_t iod = { + .iod_name = DEFINE_IOV(akey_str), + .iod_type = DAOS_IOD_SINGLE, + .iod_nr = 1, + .iod_size = strlen(data_str) + }; + + assert_success(vos_obj_update(coh, uoid, epoch, 0, 0, &dkey, 1, &iod, NULL, &sgl)); +} + +/* + * These tests look at and verify how the ddb types are printed. + */ + +uint32_t dvt_fake_print_called; +bool dvt_fake_print_just_count; +char dvt_fake_print_buffer[DVT_FAKE_PRINT_BUFFER_SIZE]; + +int +dvt_fake_print(const char *fmt, ...) +{ + va_list args; + uint32_t buffer_offset; + uint32_t buffer_left; + + dvt_fake_print_called++; + if (dvt_fake_print_just_count) + return 0; + + buffer_offset = strlen(dvt_fake_print_buffer); + buffer_left = ARRAY_SIZE(dvt_fake_print_buffer) - buffer_offset; + va_start(args, fmt); + vsnprintf(dvt_fake_print_buffer + buffer_offset, buffer_left, fmt, args); + va_end(args); + if (g_verbose) + printf("%s", dvt_fake_print_buffer + buffer_offset); + + return 0; +} + +void dvt_fake_print_reset(void) +{ + memset(dvt_fake_print_buffer, 0, ARRAY_SIZE(dvt_fake_print_buffer)); +} + +size_t dvt_fake_get_file_size_result; + +size_t +dvt_fake_get_file_size(const char *path) +{ + return dvt_fake_get_file_size_result; +} + +bool dvt_fake_get_file_exists_result; + +bool +dvt_fake_get_file_exists(const char *path) +{ + return dvt_fake_get_file_exists_result; +} + +uint32_t dvt_fake_read_file_called; +size_t dvt_fake_read_file_result; +char dvt_fake_read_file_buf[64]; + +size_t +dvt_fake_read_file(const char *src_path, d_iov_t *contents) +{ + size_t to_copy = min(contents->iov_buf_len, ARRAY_SIZE(dvt_fake_read_file_buf)); + + dvt_fake_read_file_called++; + memcpy(contents->iov_buf, dvt_fake_read_file_buf, to_copy); + contents->iov_len = to_copy; + + return dvt_fake_read_file_result; +} + +/* + * ----------------------------------------------- + * Test infrastructure + * ----------------------------------------------- + */ + +int +ddb_test_pool_setup(struct dt_vos_pool_ctx *tctx) +{ + int rc; + uint64_t size = (1ULL << 30); + struct stat st = {0}; + char *pool_uuid = "12345678-1234-1234-1234-123456789012"; + + if (strlen(tctx->dvt_pmem_file) == 0) { + char dir[64] = {0}; + + sprintf(dir, "/mnt/daos/%s", pool_uuid); + if (stat(dir, &st) == -1) { + if (!SUCCESS(mkdir(dir, 0700))) { + rc = daos_errno2der(errno); + return rc; + } + } + snprintf(tctx->dvt_pmem_file, ARRAY_SIZE(tctx->dvt_pmem_file), + "%s/ddb_vos_test", dir); + } + if (uuid_is_null(tctx->dvt_pool_uuid)) + uuid_parse(pool_uuid, tctx->dvt_pool_uuid); + + D_ASSERT(!daos_file_is_dax(tctx->dvt_pmem_file)); + rc = open(tctx->dvt_pmem_file, O_CREAT | O_TRUNC | O_RDWR, 0666); + if (rc < 0) { + rc = daos_errno2der(errno); + return rc; + } + + tctx->dvt_fd = rc; + rc = fallocate(tctx->dvt_fd, 0, 0, size); + if (rc) { + rc = daos_errno2der(errno); + close(tctx->dvt_fd); + return rc; + } + + rc = vos_pool_create(tctx->dvt_pmem_file, tctx->dvt_pool_uuid, 0, 0, 0, NULL); + if (rc) { + close(tctx->dvt_fd); + return rc; + } + + return rc; +} + +static int +setup_global_arrays() +{ + int i; + + for (i = 0; i < ARRAY_SIZE(g_oids); i++) + g_oids[i] = dvt_gen_uoid(i); + + for (i = 0; i < ARRAY_SIZE(g_uuids_str); i++) + uuid_parse(g_uuids_str[i], g_uuids[i]); + + for (i = 0; i < ARRAY_SIZE(g_dkeys); i++) + d_iov_set(&g_dkeys[i], g_dkeys_str[i], strlen(g_dkeys_str[i])); + + for (i = 0; i < ARRAY_SIZE(g_akeys); i++) + d_iov_set(&g_akeys[i], g_akeys_str[i], strlen(g_akeys_str[i])); + + d_iov_set(&g_invalid_key, g_invalid_key_str, strlen(g_invalid_key_str)); + + for (i = 0; i < ARRAY_SIZE(g_recxs); i++) { + g_recxs[0].rx_idx = i; + g_recxs[0].rx_nr = 10; + } + + return 0; +} + +int +ddb_test_setup_vos(void **state) +{ + struct dt_vos_pool_ctx *tctx = NULL; + daos_handle_t poh; + + D_ASSERT(state); + D_ALLOC_PTR(tctx); + assert_non_null(tctx); + vos_self_init("/mnt/daos", false, 0); + + assert_success(ddb_test_pool_setup(tctx)); + + assert_success(vos_pool_open(tctx->dvt_pmem_file, tctx->dvt_pool_uuid, 0, &poh)); + + if (DAOS_ON_VALGRIND || DDB_FORCE_VALGRIND) + /* smaller test data for valgrind */ + dvt_insert_data(poh, 8, 4, 4, 4, tctx); + else + /* default test data */ + dvt_insert_data(poh, 0, 0, 0, 0, tctx); + + vos_pool_close(poh); + vos_self_fini(); + + *state = tctx; + + return 0; +} + +int +ddb_teardown_vos(void **state) +{ + struct dt_vos_pool_ctx *tctx = *state; + + vos_self_init("/mnt/daos", false, 0); + assert_success(vos_pool_destroy(tctx->dvt_pmem_file, tctx->dvt_pool_uuid)); + vos_self_fini(); + close(tctx->dvt_fd); + D_FREE(tctx); + + return 0; +} + +void +dvt_iov_alloc(d_iov_t *iov, size_t len) +{ + D_ALLOC(iov->iov_buf, len); + iov->iov_buf_len = iov->iov_len = len; +} + +void +dvt_iov_alloc_str(d_iov_t *iov, const char *str) +{ + dvt_iov_alloc(iov, strlen(str) + 1); + strcpy(iov->iov_buf, str); +} + +static void +create_object_data(daos_handle_t *coh, uint32_t obj_to_create, uint32_t dkeys_to_create, + uint32_t akeys_to_create, uint32_t recx_to_create) +{ + int o, d, a, r; /* loop indexes */ + + for (o = 0; o < obj_to_create; o++) { + for (d = 0; d < dkeys_to_create; d++) { + for (a = 0; a < akeys_to_create; a++) { + if (a % 2 == 0) { + for (r = 0; r < recx_to_create; r++) + dvt_vos_insert_recx((*coh), g_oids[o], + g_dkeys_str[d], + g_akeys_str[a], + &g_recxs[r], 1); + } else { + dvt_vos_insert_single((*coh), g_oids[o], + g_dkeys_str[d], + g_akeys_str[a], + "This is a single value", 1); + } + } + } + } +} + +void +dvt_insert_data(daos_handle_t poh, uint32_t conts, uint32_t objs, uint32_t dkeys, uint32_t akeys, + struct dt_vos_pool_ctx *tctx) +{ + daos_handle_t coh; + uint32_t cont_to_create = ARRAY_SIZE(g_uuids); + uint32_t obj_to_create = ARRAY_SIZE(g_oids); + uint32_t dkeys_to_create = ARRAY_SIZE(g_dkeys); + uint32_t akeys_to_create = ARRAY_SIZE(g_akeys); + uint32_t recx_to_create = ARRAY_SIZE(g_recxs); + int c; + + if (conts > 0) + cont_to_create = conts; + if (objs > 0) + obj_to_create = objs; + if (dkeys > 0) + dkeys_to_create = dkeys; + if (akeys > 0) + akeys_to_create = akeys; + + tctx->dvt_cont_count = cont_to_create; + tctx->dvt_obj_count = obj_to_create; + tctx->dvt_dkey_count = dkeys_to_create; + tctx->dvt_akey_count = akeys_to_create; + + /* Setup by creating containers */ + for (c = 0; c < cont_to_create; c++) { + assert_success(vos_cont_create(poh, g_uuids[c])); + assert_success(vos_cont_open(poh, g_uuids[c], &coh)); + + create_object_data(&coh, obj_to_create, dkeys_to_create, akeys_to_create, + recx_to_create); + vos_cont_close(coh); + } +} + +static void +dvt_dtx_begin_helper(daos_handle_t coh, const daos_unit_oid_t *oid, daos_epoch_t epoch, + uint64_t dkey_hash, struct dtx_handle **dthp) +{ + struct dtx_handle *dth; + struct dtx_memberships *mbs; + size_t size; + + D_ALLOC_PTR(dth); + assert_non_null(dth); + + memset(dth, 0, sizeof(*dth)); + + size = sizeof(struct dtx_memberships) + sizeof(struct dtx_daos_target); + + D_ALLOC(mbs, size); + assert_non_null(mbs); + + mbs->dm_tgt_cnt = 1; + mbs->dm_grp_cnt = 1; + mbs->dm_data_size = sizeof(struct dtx_daos_target); + mbs->dm_tgts[0].ddt_id = 1; + + /** Use unique API so new UUID is generated even on same thread */ + daos_dti_gen_unique(&(&dth->dth_dte)->dte_xid); + dth->dth_dte.dte_ver = 1; + dth->dth_dte.dte_refs = 1; + dth->dth_dte.dte_mbs = mbs; + + dth->dth_coh = coh; + dth->dth_epoch = epoch; + dth->dth_leader_oid = *oid; + + dth->dth_flags = DTE_LEADER; + dth->dth_modification_cnt = 1; + + dth->dth_op_seq = 1; + dth->dth_dkey_hash = dkey_hash; + + D_INIT_LIST_HEAD(&dth->dth_share_cmt_list); + D_INIT_LIST_HEAD(&dth->dth_share_abt_list); + D_INIT_LIST_HEAD(&dth->dth_share_act_list); + D_INIT_LIST_HEAD(&dth->dth_share_tbd_list); + dth->dth_shares_inited = 1; + + vos_dtx_rsrvd_init(dth); + vos_dtx_attach(dth, false, false); + + *dthp = dth; +} + +static void +dvt_dtx_end(struct dtx_handle *dth) +{ + vos_dtx_detach(dth); + D_FREE(dth->dth_dte.dte_mbs); + D_FREE(dth); +} + +/* After called, there should be two dtx records. 1 in committed table and 1 in active table */ +void +dvt_vos_insert_2_records_with_dtx(daos_handle_t coh) +{ + dvt_vos_insert_dtx_records(coh, 2, 1); +} + +void +dvt_vos_insert_dtx_records(daos_handle_t coh, uint32_t nr, uint32_t committed_nr) +{ + struct dtx_handle **dth; + const uint32_t recxs_nr = 1; + const uint32_t rec_size = 1; + daos_recx_t recxs[recxs_nr]; + daos_iod_t iod = {0}; + d_sg_list_t sgl = {0}; + daos_epoch_t epoch = 1; + uint64_t dkey_hash = 0x123; + int i; + + assert_true(committed_nr <= nr); + assert_true(nr <= ARRAY_SIZE(g_oids) && nr <= ARRAY_SIZE(g_dkeys)); + + D_ALLOC_ARRAY(dth, nr); + assert_non_null(dth); + memset(dth, 0, sizeof(*dth) * nr); + + /* use the same data for each update */ + d_sgl_init(&sgl, 1); + + recxs[0].rx_idx = 0; + recxs[0].rx_nr = daos_sgl_buf_size(&sgl); + + iod.iod_recxs = recxs; + iod.iod_nr = recxs_nr; + iod.iod_size = rec_size; + iod.iod_type = DAOS_IOD_ARRAY; + dvt_iov_alloc_str(&iod.iod_name, "akey"); + + for (i = 0; i < nr; i++) { + dvt_dtx_begin_helper(coh, &g_oids[i], epoch++, dkey_hash++, &dth[i]); + assert_success(vos_obj_update_ex(coh, g_oids[i], epoch, 0, 0, &g_dkeys[i], 1, &iod, + NULL, &sgl, dth[i])); + } + + /* commit */ + for (i = 0; i < committed_nr; i++) + assert_int_equal(1, vos_dtx_commit(coh, &dth[i]->dth_xid, 1, NULL)); + + /* end each dtx */ + for (i = 0; i < nr; i++) + dvt_dtx_end(dth[i]); + + /* clean up */ + daos_iov_free(&iod.iod_name); + d_sgl_fini(&sgl, false); + D_FREE(dth); +} + +struct ddb_test_driver_arguments { + bool dtda_create_vos_file; +}; + +static int +ddb_test_driver_arguments_parse(uint32_t argc, char **argv, struct ddb_test_driver_arguments *args) +{ + struct option program_options[] = { + { "create_vos", optional_argument, NULL, 'c' }, + { NULL } + }; + int index = 0, opt; + + memset(args, 0, sizeof(*args)); + + optind = 1; + opterr = 0; + while ((opt = getopt_long(argc, argv, "c", program_options, &index)) != -1) { + switch (opt) { + case 'c': + args->dtda_create_vos_file = true; + break; + case '?': + printf("'%c' is unknown\n", optopt); + return -DER_INVAL; + default: + return -DER_INVAL; + } + } + + return 0; +} + +static int +create_test_vos_file() +{ + struct dt_vos_pool_ctx tctx = {0}; + daos_handle_t poh; + daos_handle_t coh; + int conts = 2; + int objs = 5; + int dkeys = 5; + int akeys = 5; + int rc; + + rc = vos_self_init("/mnt/daos", false, 0); + if (rc != 0) { + fprintf(stderr, "Unable to initialize VOS: "DF_RC"\n", DP_RC(rc)); + ddb_fini(); + return -rc; + } + + rc = ddb_test_pool_setup(&tctx); + if (!SUCCESS(rc)) { + print_error("Unable to setup pool: "DF_RC"\n", DP_RC(rc)); + return rc; + } + assert_success(vos_pool_open(tctx.dvt_pmem_file, tctx.dvt_pool_uuid, 0, &poh)); + dvt_insert_data(poh, conts, objs, dkeys, akeys, &tctx); + + assert_success(vos_cont_open(poh, g_uuids[0], &coh)); + dvt_vos_insert_2_records_with_dtx(coh); + vos_cont_close(coh); + + vos_pool_close(poh); + + close(tctx.dvt_fd); + vos_self_fini(); + + print_message("VOS file created at: %s\n", tctx.dvt_pmem_file); + print_message("\t- pool uuid: "DF_UUIDF"\n", DP_UUID(tctx.dvt_pool_uuid)); + print_message("\t- containers: %d\n", conts); + print_message("\t- objs: %d\n", objs); + print_message("\t- dkeys: %d\n", dkeys); + print_message("\t- akeys: %d\n", akeys); + + return 0; +} + +static bool +char_in_tests(char a, char *str, uint32_t str_len) +{ + int i; + + if (strlen(str) == 0) /* if there is no filter, always return true */ + return true; + for (i = 0; i < str_len; i++) { + if (a == str[i]) + return true; + } + + return false; +} + +/* + * ----------------------------------------------- + * Execute + * ----------------------------------------------- + */ +int main(int argc, char *argv[]) +{ + struct ddb_test_driver_arguments args = {0}; + int rc; + + rc = ddb_init(); + if (rc != 0) + return -rc; + + ddb_test_driver_arguments_parse(argc, argv, &args); + + setup_global_arrays(); + + if (args.dtda_create_vos_file) { + rc = create_test_vos_file(); + goto done; + } + +#define RUN_TEST_SUIT(c, func)\ + do {if (char_in_tests(c, test_suites, ARRAY_SIZE(test_suites))) \ + rc += func(); } while (0) + + /* filtering suites and tests */ + char test_suites[] = ""; +#if CMOCKA_FILTER_SUPPORTED == 1 /** requires cmocka 1.1.5 */ + cmocka_set_test_filter("**"); +#endif + RUN_TEST_SUIT('a', ddb_parse_tests_run); + RUN_TEST_SUIT('b', ddb_cmd_options_tests_run); + RUN_TEST_SUIT('c', ddb_vos_tests_run); + RUN_TEST_SUIT('d', ddb_commands_tests_run); + RUN_TEST_SUIT('e', ddb_main_tests_run); + RUN_TEST_SUIT('f', ddb_commands_print_tests_run); + RUN_TEST_SUIT('g', ddb_path_tests_run); + +done: + ddb_fini(); + if (rc > 0) + printf("%d test(s) failed!\n", rc); + else + printf("All tests successful!\n"); + return rc; +} diff --git a/src/ddb/tests/ddb_test_driver.h b/src/ddb/tests/ddb_test_driver.h new file mode 100644 index 00000000000..229112d48d9 --- /dev/null +++ b/src/ddb/tests/ddb_test_driver.h @@ -0,0 +1,90 @@ +/** + * (C) Copyright 2022 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ +#ifndef DAOS_DDB_TEST_DRIVER_H +#define DAOS_DDB_TEST_DRIVER_H + +extern bool g_verbose; +extern const char *g_uuids_str[10]; +extern const char *g_invalid_uuid_str; +extern uuid_t g_uuids[10]; +extern daos_unit_oid_t g_oids[10]; +extern daos_unit_oid_t g_invalid_oid; +extern char *g_dkeys_str[10]; +extern char *g_akeys_str[10]; +extern daos_key_t g_dkeys[10]; +extern daos_key_t g_akeys[10]; +extern daos_key_t g_invalid_key; +extern daos_recx_t g_recxs[10]; +extern daos_recx_t g_invalid_recx; + +struct dt_vos_pool_ctx { + daos_handle_t dvt_poh; + uuid_t dvt_pool_uuid; + int dvt_fd; + char dvt_pmem_file[128]; + uint32_t dvt_cont_count; + uint32_t dvt_obj_count; + uint32_t dvt_dkey_count; + uint32_t dvt_akey_count; +}; + +daos_unit_oid_t dvt_gen_uoid(uint32_t i); +void dvt_vos_insert_recx(daos_handle_t coh, daos_unit_oid_t uoid, char *dkey_str, char *akey_str, + daos_recx_t *recx, daos_epoch_t epoch); +void +dvt_vos_insert_single(daos_handle_t coh, daos_unit_oid_t uoid, char *dkey_str, char *akey_str, + char *data_str, daos_epoch_t epoch); + +void dvt_iov_alloc(d_iov_t *iov, size_t len); +void dvt_iov_alloc_str(d_iov_t *iov, const char *str); + + +int ddb_test_setup_vos(void **state); +int ddb_teardown_vos(void **state); + +int ddb_parse_tests_run(void); +int ddb_vos_tests_run(void); +int ddb_commands_tests_run(void); +int ddb_main_tests_run(void); +int ddb_cmd_options_tests_run(void); +int ddb_commands_print_tests_run(void); +int ddb_path_tests_run(void); + +/* + * Insert data into the pool. The cont, objs, ... parameters indicate how many of each to + * insert into its parent. If numbers are 0, then it will use a default number. + */ +void dvt_insert_data(daos_handle_t poh, uint32_t conts, uint32_t objs, uint32_t dkeys, + uint32_t akeys, struct dt_vos_pool_ctx *tctx); + +int ddb_test_pool_setup(struct dt_vos_pool_ctx *tctx); + +extern uint32_t dvt_fake_print_called; +extern bool dvt_fake_print_just_count; +#define DVT_FAKE_PRINT_BUFFER_SIZE (1024) +extern char dvt_fake_print_buffer[DVT_FAKE_PRINT_BUFFER_SIZE]; +int dvt_fake_print(const char *fmt, ...); +void dvt_fake_print_reset(void); +#define assert_printed_exact(str) assert_string_equal(str, dvt_fake_print_buffer) +#define assert_printed_not_equal(str) assert_string_not_equal(str, dvt_fake_print_buffer) +#define assert_printed_contains(str) assert_string_contains(dvt_fake_print_buffer, str) + + +extern size_t dvt_fake_get_file_size_result; +size_t dvt_fake_get_file_size(const char *path); + +extern bool dvt_fake_get_file_exists_result; +bool dvt_fake_get_file_exists(const char *path); + +extern uint32_t dvt_fake_read_file_called; +extern size_t dvt_fake_read_file_result; +extern char dvt_fake_read_file_buf[64]; +size_t dvt_fake_read_file(const char *src_path, d_iov_t *contents); + +void dvt_vos_insert_2_records_with_dtx(daos_handle_t coh); +void dvt_vos_insert_dtx_records(daos_handle_t coh, uint32_t nr, uint32_t committed_nr); + +#endif /* DAOS_DDB_TEST_DRIVER_H */ diff --git a/src/ddb/tests/ddb_vos_tests.c b/src/ddb/tests/ddb_vos_tests.c new file mode 100644 index 00000000000..d398303ae98 --- /dev/null +++ b/src/ddb/tests/ddb_vos_tests.c @@ -0,0 +1,1129 @@ +/** + * (C) Copyright 2022-2023 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ +#include +#include +#include +#include +#include +#include +#include "ddb_cmocka.h" +#include "ddb_test_driver.h" + +/* + * The tests in this file depend on a VOS instance with a bunch of data written. The tests will + * verify that different parts of the VOS tree can be navigated/iterated. The way the + */ + +static int fake_cont_handler_call_count; +static struct ddb_cont fake_cont_handler_conts[64]; +int fake_cont_handler(struct ddb_cont *cont, void *args) +{ + assert_true(fake_cont_handler_call_count < ARRAY_SIZE(fake_cont_handler_conts)); + fake_cont_handler_conts[fake_cont_handler_call_count] = *cont; + fake_cont_handler_call_count++; + + return 0; +} + +static int fake_obj_handler_call_count; +static struct ddb_obj fake_obj_handler_objs[128]; +int fake_obj_handler(struct ddb_obj *obj, void *args) +{ + assert_true(fake_obj_handler_call_count < ARRAY_SIZE(fake_obj_handler_objs)); + fake_obj_handler_objs[fake_obj_handler_call_count] = *obj; + fake_obj_handler_call_count++; + + return 0; +} + +static int fake_dkey_handler_call_count; +static struct ddb_key fake_dkey_handler_dkeys[1024]; +int fake_dkey_handler(struct ddb_key *key, void *args) +{ + assert_true(fake_dkey_handler_call_count < ARRAY_SIZE(fake_dkey_handler_dkeys)); + fake_dkey_handler_dkeys[fake_dkey_handler_call_count] = *key; + fake_dkey_handler_call_count++; + + return 0; +} + +static int fake_akey_handler_call_count; +static struct ddb_key fake_akey_handler_akeys[2048 * 10]; +int fake_akey_handler(struct ddb_key *key, void *args) +{ + assert_true(fake_akey_handler_call_count < ARRAY_SIZE(fake_akey_handler_akeys)); + fake_akey_handler_akeys[fake_akey_handler_call_count] = *key; + fake_akey_handler_call_count++; + + return 0; +} + +static int fake_sv_handler_call_count; +static struct ddb_sv fake_sv_handler_svs[2048 * 10]; +int fake_sv_handler(struct ddb_sv *sv, void *args) +{ + assert_true((uint32_t)fake_sv_handler_call_count < ARRAY_SIZE(fake_sv_handler_svs)); + fake_sv_handler_svs[fake_sv_handler_call_count] = *sv; + fake_sv_handler_call_count++; + + return 0; +} + +static int fake_array_handler_call_count; +static struct ddb_array fake_array_handler_arrays[2048 * 10]; +int fake_array_handler(struct ddb_array *array, void *args) +{ + assert_true(fake_array_handler_call_count < ARRAY_SIZE(fake_array_handler_arrays)); + fake_array_handler_arrays[fake_array_handler_call_count] = *array; + fake_array_handler_call_count++; + + return 0; +} + +static void +fake_call_counts_reset() +{ + fake_cont_handler_call_count = 0; + fake_obj_handler_call_count = 0; + fake_dkey_handler_call_count = 0; + fake_akey_handler_call_count = 0; + fake_sv_handler_call_count = 0; + fake_array_handler_call_count = 0; +} + +static struct vos_tree_handlers fake_handlers = { + .ddb_cont_handler = fake_cont_handler, + .ddb_obj_handler = fake_obj_handler, + .ddb_dkey_handler = fake_dkey_handler, + .ddb_akey_handler = fake_akey_handler, + .ddb_sv_handler = fake_sv_handler, + .ddb_array_handler = fake_array_handler, +}; + +#define expect_int_equal(a, b, rc) \ + do { \ + if ((a) != (b)) { \ + rc++; \ + print_error("%s:%d - %lu != %lu\n", __FILE__, __LINE__, \ + (uint64_t)(a), (uint64_t)(b)); \ + } \ + } while (0) + +#define assert_ddb_iterate(poh, cont_uuid, oid, dkey, akey, is_recx, recursive, expected_cont, \ + expected_obj, expected_dkey, expected_akey, \ + expected_sv, expected_array) \ + assert_success(__assert_ddb_iterate(poh, cont_uuid, oid, dkey, \ + akey, is_recx, recursive, expected_cont, expected_obj, \ + expected_dkey, expected_akey, expected_sv, expected_array)) +static int +__assert_ddb_iterate(daos_handle_t poh, uuid_t *cont_uuid, daos_unit_oid_t *oid, daos_key_t *dkey, + daos_key_t *akey, bool is_recx, bool recursive, uint32_t expected_cont, + uint32_t expected_obj, uint32_t expected_dkey, uint32_t expected_akey, + uint32_t expected_sv, uint32_t expected_array) +{ + int i; + int rc = 0; + struct dv_tree_path path = {0}; + + if (cont_uuid) + uuid_copy(path.vtp_cont, *cont_uuid); + if (oid) + path.vtp_oid = *oid; + if (dkey) + path.vtp_dkey = *dkey; + if (akey) + path.vtp_akey = *akey; + path.vtp_is_recx = is_recx; + + assert_success(dv_iterate(poh, &path, recursive, &fake_handlers, NULL, NULL)); + + expect_int_equal(expected_cont, fake_cont_handler_call_count, rc); + expect_int_equal(expected_obj, fake_obj_handler_call_count, rc); + expect_int_equal(expected_dkey, fake_dkey_handler_call_count, rc); + expect_int_equal(expected_akey, fake_akey_handler_call_count, rc); + expect_int_equal(expected_sv, fake_sv_handler_call_count, rc); + expect_int_equal(expected_array, fake_array_handler_call_count, rc); + + for (i = 0; i < expected_cont; i++) + expect_int_equal(i, fake_cont_handler_conts[i].ddbc_idx, rc); + + /* Even if a parent handler isn't seen it's because only children of the parent + * are listed. Always assume 1 parent. + */ + + /* In these tests the objs will always be evenly distributed in the conts */ + expected_cont = expected_cont == 0 ? 1 : expected_cont; + for (i = 0; i < expected_obj; i++) + expect_int_equal(i % (expected_obj / expected_cont), + fake_obj_handler_objs[i].ddbo_idx, rc); + + expected_obj = expected_obj == 0 ? 1 : expected_obj; + for (i = 0; i < expected_dkey; i++) + expect_int_equal(i % (expected_dkey / expected_obj), + fake_dkey_handler_dkeys[i].ddbk_idx, rc); + + expected_dkey = expected_dkey == 0 ? 1 : expected_dkey; + for (i = 0; i < expected_akey; i++) + expect_int_equal(i % (expected_akey / expected_dkey), + fake_akey_handler_akeys[i].ddbk_idx, rc); + + fake_call_counts_reset(); + + return rc; +} + +static void +open_pool_test(void **state) +{ + daos_handle_t poh; + struct dt_vos_pool_ctx *tctx = *state; + + assert_rc_equal(-DER_INVAL, dv_pool_open("/bad/path", &poh)); + + assert_success(dv_pool_open(tctx->dvt_pmem_file, &poh)); + assert_success(dv_pool_close(poh)); + + /* should be able to open again after closing */ + assert_success(dv_pool_open(tctx->dvt_pmem_file, &poh)); + assert_success(dv_pool_close(poh)); +} + +static void +list_items_test(void **state) +{ + struct dt_vos_pool_ctx *tctx = *state; + daos_handle_t poh = tctx->dvt_poh; + + uint32_t cont_count = tctx->dvt_cont_count; + uint32_t obj_count = tctx->dvt_obj_count; + uint32_t dkey_count = tctx->dvt_dkey_count; + uint32_t akey_count = tctx->dvt_akey_count; + + /* + * The vos tree is created with equal number of children at each level. Meaning if + * cont_count is 10 and obj_count is 10, there are 10 objects for each cont, 100 + * in total. + * + * Half of the akeys are single value and half are arrays + */ + + /* list containers */ + assert_ddb_iterate(poh, NULL, NULL, NULL, NULL, false, false, cont_count, 0, 0, 0, 0, 0); + assert_ddb_iterate(poh, NULL, NULL, NULL, NULL, false, true, + cont_count, + cont_count * obj_count, + cont_count * obj_count * dkey_count, + cont_count * obj_count * dkey_count * akey_count, + cont_count * obj_count * dkey_count * akey_count / 2, + cont_count * obj_count * dkey_count * akey_count / 2); + + /* list objects of a container */ + assert_ddb_iterate(poh, &g_uuids[0], NULL, NULL, NULL, false, false, + 0, obj_count, 0, 0, 0, 0); + assert_ddb_iterate(poh, &g_uuids[0], NULL, NULL, NULL, false, true, + 0, obj_count, + obj_count * dkey_count, + obj_count * dkey_count * akey_count, + obj_count * dkey_count * akey_count / 2, + obj_count * dkey_count * akey_count / 2); + + /* list dkeys of an object */ + assert_ddb_iterate(poh, &g_uuids[0], &g_oids[0], NULL, NULL, false, false, + 0, 0, dkey_count, 0, 0, 0); + assert_ddb_iterate(poh, &g_uuids[0], &g_oids[0], NULL, NULL, false, true, + 0, 0, dkey_count, dkey_count * akey_count, + dkey_count * akey_count / 2, + dkey_count * akey_count / 2); + + /* list akeys of a dkey */ + assert_ddb_iterate(poh, &g_uuids[0], &g_oids[0], &g_dkeys[0], NULL, false, false, + 0, 0, 0, akey_count, 0, 0); + assert_ddb_iterate(poh, &g_uuids[0], &g_oids[0], &g_dkeys[0], NULL, false, true, + 0, 0, 0, akey_count, akey_count / 2, akey_count / 2); + + /* list values in akeys */ + assert_ddb_iterate(poh, &g_uuids[0], &g_oids[0], &g_dkeys[0], &g_akeys[0], true, false, + 0, 0, 0, 0, 0, 1); + assert_ddb_iterate(poh, &g_uuids[0], &g_oids[0], &g_dkeys[0], &g_akeys[1], false, true, + 0, 0, 0, 0, 1, 0); +} + +static void +get_cont_uuid_from_idx_tests(void **state) +{ + struct dt_vos_pool_ctx *tctx = *state; + uuid_t uuid; + uuid_t uuid_2; + int i; + + assert_rc_equal(-DER_NONEXIST, dv_get_cont_uuid(tctx->dvt_poh, 10000000, uuid)); + assert_success(dv_get_cont_uuid(tctx->dvt_poh, 0, uuid)); + for (i = 1; i < 5; i++) { + assert_success(dv_get_cont_uuid(tctx->dvt_poh, i, uuid_2)); + assert_uuid_not_equal(uuid, uuid_2); + } + + /* while containers aren't in the same order they were inserted (and the order can't + * be guaranteed), it should be the same order each time assuming no data is + * inserted/deleted. + */ + for (i = 0; i < 100; i++) { + assert_success(dv_get_cont_uuid(tctx->dvt_poh, 0, uuid_2)); + assert_uuid_equal(uuid, uuid_2); + } +} + +static void +get_dkey_from_idx_tests(void **state) +{ + struct dt_vos_pool_ctx *tctx = *state; + daos_unit_oid_t uoid = {0}; + int i; + + daos_handle_t coh = DAOS_HDL_INVAL; + daos_key_t dkey; + daos_key_t dkey2; + + assert_rc_equal(-DER_INVAL, dv_get_dkey(coh, uoid, 0, &dkey)); + vos_cont_open(tctx->dvt_poh, g_uuids[0], &coh); + assert_rc_equal(-DER_INVAL, dv_get_dkey(coh, uoid, 0, &dkey)); + uoid = g_oids[0]; + + assert_success(dv_get_dkey(coh, uoid, 0, &dkey)); + i = 1; + while (SUCCESS(dv_get_dkey(coh, uoid, i, &dkey2))) { + assert_string_not_equal(dkey.iov_buf, dkey2.iov_buf); + i++; + daos_iov_free(&dkey2); + } + + for (i = 0; i < 100; i++) { + assert_success(dv_get_dkey(coh, uoid, 0, &dkey2)); + assert_key_equal(dkey, dkey2); + daos_iov_free(&dkey2); + } + daos_iov_free(&dkey); + + vos_cont_close(coh); +} + +static void +get_akey_from_idx_tests(void **state) +{ + struct dt_vos_pool_ctx *tctx = *state; + daos_unit_oid_t uoid = {0}; + daos_handle_t coh = DAOS_HDL_INVAL; + daos_key_t dkey = {0}; + daos_key_t akey = {0}; + daos_key_t akey2 = {0}; + int i; + + assert_rc_equal(-DER_INVAL, dv_get_akey(coh, uoid, &dkey, 0, &akey)); + assert_success(vos_cont_open(tctx->dvt_poh, g_uuids[0], &coh)); + assert_rc_equal(-DER_INVAL, dv_get_akey(coh, uoid, &dkey, 0, &akey)); + uoid = g_oids[0]; + assert_rc_equal(-DER_NONEXIST, dv_get_akey(coh, uoid, &dkey, 0, &akey)); + dv_get_dkey(coh, uoid, 0, &dkey); + + assert_success(dv_get_akey(coh, uoid, &dkey, 0, &akey)); + i = 1; + while (SUCCESS(dv_get_dkey(coh, uoid, i, &akey2))) { + assert_string_not_equal(akey.iov_buf, akey2.iov_buf); + i++; + daos_iov_free(&akey2); + } + + for (i = 0; i < 100; i++) { + assert_success(dv_get_akey(coh, uoid, &dkey, 0, &akey2)); + assert_memory_equal(akey.iov_buf, akey2.iov_buf, akey.iov_len); + daos_iov_free(&akey2); + } + daos_iov_free(&dkey); + daos_iov_free(&akey); + + vos_cont_close(coh); +} + +static void +get_recx_from_idx_tests(void **state) +{ + struct dt_vos_pool_ctx *tctx = *state; + daos_unit_oid_t uoid = {0}; + daos_handle_t coh = DAOS_HDL_INVAL; + daos_key_t dkey = {0}; + daos_key_t akey = {0}; + daos_recx_t recx = {0}; + + assert_rc_equal(-DER_INVAL, dv_get_recx(coh, uoid, &dkey, &akey, 0, &recx)); + + assert_success(vos_cont_open(tctx->dvt_poh, g_uuids[0], &coh)); + assert_rc_equal(-DER_INVAL, dv_get_recx(coh, uoid, &dkey, &akey, 0, &recx)); + dv_get_object_oid(coh, 0, &uoid); + assert_rc_equal(-DER_NONEXIST, dv_get_recx(coh, uoid, &dkey, &akey, 0, &recx)); + dv_get_dkey(coh, uoid, 0, &dkey); + assert_rc_equal(-DER_NONEXIST, dv_get_recx(coh, uoid, &dkey, &akey, 0, &recx)); + dv_get_akey(coh, uoid, &dkey, 0, &akey); + assert_success(dv_get_recx(coh, uoid, &dkey, &akey, 0, &recx)); + daos_iov_free(&dkey); + daos_iov_free(&akey); + + vos_cont_close(coh); +} + +static int fake_dump_superblock_cb_called; +static struct ddb_superblock fake_dump_superblock_cb_sb; +static int +fake_dump_superblock_cb(void *cb_arg, struct ddb_superblock *sb) +{ + fake_dump_superblock_cb_called++; + fake_dump_superblock_cb_sb = *sb; + + return 0; +} + +static void +get_superblock_tests(void **state) +{ + struct dt_vos_pool_ctx *tctx = *state; + + assert_rc_equal(-DER_INVAL, dv_superblock(DAOS_HDL_INVAL, + fake_dump_superblock_cb, NULL)); + + assert_success(dv_superblock(tctx->dvt_poh, fake_dump_superblock_cb, NULL)); + assert_int_equal(1, fake_dump_superblock_cb_called); + + /* just do some basics to verify got a valid pool df */ + assert_true(fake_dump_superblock_cb_sb.dsb_durable_format_version); +} + +static void +obj_id_2_ddb_test(void **state) +{ + struct ddb_obj obj = {0}; + daos_obj_id_t oid = {0}; + + daos_obj_set_oid(&oid, DAOS_OT_MULTI_HASHED, OR_RP_2, 2, 0); + + dv_oid_to_obj(oid, &obj); + + assert_int_equal(2, obj.ddbo_nr_grps); + assert_string_equal("DAOS_OT_MULTI_HASHED", obj.ddbo_otype_str); +} + + +static uint32_t fake_dump_value_cb_called; +static d_iov_t fake_dump_value_cb_value; +static uint8_t fake_dump_value_cb_value_buf[128]; +static int +fake_dump_value_cb(void *cb_args, d_iov_t *value) +{ + fake_dump_value_cb_called++; + assert_true(value->iov_len <= ARRAY_SIZE(fake_dump_value_cb_value_buf)); + fake_dump_value_cb_value = *value; + fake_dump_value_cb_value.iov_buf = fake_dump_value_cb_value_buf; + memcpy(fake_dump_value_cb_value_buf, value->iov_buf, value->iov_len); + return 0; +} + +static int +test_dump_value(daos_handle_t poh, uuid_t cont_uuid, daos_unit_oid_t oid, daos_key_t *dkey, + daos_key_t *akey, daos_recx_t *recx, dv_dump_value_cb dump_cb, void *cb_arg) +{ + struct dv_tree_path path = {0}; + + uuid_copy(path.vtp_cont, cont_uuid); + path.vtp_oid = oid; + path.vtp_dkey = *dkey; + path.vtp_akey = *akey; + if (recx) + path.vtp_recx = *recx; + + return dv_dump_value(poh, &path, dump_cb, cb_arg); + +} + +static void +get_value_tests(void **state) +{ + struct dt_vos_pool_ctx *tctx = *state; + daos_recx_t recx = {.rx_idx = 0, .rx_nr = 10}; + + /* first akey is a recx */ + assert_success(test_dump_value(tctx->dvt_poh, g_uuids[0], g_oids[0], &g_dkeys[0], + &g_akeys[0], &recx, fake_dump_value_cb, NULL)); + + assert_int_equal(1, fake_dump_value_cb_called); + assert_non_null(fake_dump_value_cb_value.iov_buf); + assert_true(fake_dump_value_cb_value.iov_len > 0); + + /* second akey is a single value */ + fake_dump_value_cb_called = 0; + assert_success(test_dump_value(tctx->dvt_poh, g_uuids[0], g_oids[0], &g_dkeys[0], + &g_akeys[1], NULL, fake_dump_value_cb, NULL)); + + assert_int_equal(1, fake_dump_value_cb_called); + assert_non_null(fake_dump_value_cb_value.iov_buf); + assert_true(fake_dump_value_cb_value.iov_len > 0); +} + +static uint32_t fake_dump_ilog_entry_called; +static int +fake_dump_ilog_entry(void *cb_arg, struct ddb_ilog_entry *entry) +{ + fake_dump_ilog_entry_called++; + return 0; +} + +static void +get_obj_ilog_tests(void **state) +{ + struct dt_vos_pool_ctx *tctx = *state; + daos_handle_t coh; + + daos_unit_oid_t null_oid = {0}; + daos_unit_oid_t bad_oid = {.id_pub.lo = 1}; + + assert_success(vos_cont_open(tctx->dvt_poh, g_uuids[0], &coh)); + + assert_rc_equal(-DER_INVAL, dv_get_obj_ilog_entries(DAOS_HDL_INVAL, null_oid, + fake_dump_ilog_entry, NULL)); + assert_rc_equal(-DER_INVAL, dv_get_obj_ilog_entries(DAOS_HDL_INVAL, g_oids[0], + fake_dump_ilog_entry, NULL)); + assert_rc_equal(-DER_INVAL, dv_get_obj_ilog_entries(coh, null_oid, + fake_dump_ilog_entry, NULL)); + assert_rc_equal(-DER_INVAL, dv_get_obj_ilog_entries(coh, bad_oid, + fake_dump_ilog_entry, NULL)); + + assert_success(dv_get_obj_ilog_entries(coh, g_oids[0], fake_dump_ilog_entry, NULL)); + + assert_int_equal(1, fake_dump_ilog_entry_called); + + vos_cont_close(coh); +} + +static void +abort_obj_ilog_tests(void **state) +{ + struct dt_vos_pool_ctx *tctx = *state; + daos_handle_t coh = {0}; + daos_unit_oid_t null_oid = {0}; + + fake_dump_ilog_entry_called = 0; + + /* error handling */ + assert_rc_equal(-DER_INVAL, dv_process_obj_ilog_entries(coh, null_oid, DDB_ILOG_OP_ABORT)); + + assert_success(vos_cont_open(tctx->dvt_poh, g_uuids[0], &coh)); + + /* First make sure there is an ilog to rm */ + assert_success(dv_get_obj_ilog_entries(coh, g_oids[0], fake_dump_ilog_entry, NULL)); + assert_int_equal(1, fake_dump_ilog_entry_called); + fake_dump_ilog_entry_called = 0; + + /* Abort the ilogs */ + assert_success(dv_process_obj_ilog_entries(coh, g_oids[0], DDB_ILOG_OP_ABORT)); + + /* Now should not be any ilog entries */ + assert_success(dv_get_obj_ilog_entries(coh, g_oids[0], fake_dump_ilog_entry, NULL)); + assert_int_equal(0, fake_dump_ilog_entry_called); + + vos_cont_close(coh); +} + +static void +get_dkey_ilog_tests(void **state) +{ + struct dt_vos_pool_ctx *tctx = *state; + daos_handle_t coh; + daos_unit_oid_t null_oid = {0}; + + assert_success(vos_cont_open(tctx->dvt_poh, g_uuids[0], &coh)); + + assert_rc_equal(-DER_INVAL, dv_get_key_ilog_entries(DAOS_HDL_INVAL, null_oid, NULL, NULL, + fake_dump_ilog_entry, NULL)); + + fake_dump_ilog_entry_called = 0; + assert_success(dv_get_key_ilog_entries(coh, g_oids[1], &g_dkeys[0], NULL, + fake_dump_ilog_entry, + NULL)); + assert_int_equal(1, fake_dump_ilog_entry_called); + + fake_dump_ilog_entry_called = 0; + assert_success(dv_get_key_ilog_entries(coh, g_oids[1], &g_dkeys[0], &g_akeys[0], + fake_dump_ilog_entry, + NULL)); + assert_int_equal(1, fake_dump_ilog_entry_called); + fake_dump_ilog_entry_called = 0; + + vos_cont_close(coh); +} + +static void +abort_dkey_ilog_tests(void **state) +{ + struct dt_vos_pool_ctx *tctx = *state; + daos_handle_t coh; + daos_unit_oid_t null_oid = {0}; + + assert_success(vos_cont_open(tctx->dvt_poh, g_uuids[1], &coh)); + + assert_invalid(dv_process_key_ilog_entries(DAOS_HDL_INVAL, null_oid, NULL, NULL, + DDB_ILOG_OP_UNKNOWN)); + + + /* akey */ + assert_success(dv_get_key_ilog_entries(coh, g_oids[0], &g_dkeys[0], &g_akeys[0], + fake_dump_ilog_entry, NULL)); + assert_int_equal(1, fake_dump_ilog_entry_called); + + assert_success(dv_process_key_ilog_entries(coh, g_oids[0], &g_dkeys[0], &g_akeys[0], + DDB_ILOG_OP_ABORT)); + + fake_dump_ilog_entry_called = 0; + assert_success(dv_get_key_ilog_entries(coh, g_oids[0], &g_dkeys[0], &g_akeys[0], + fake_dump_ilog_entry, NULL)); + assert_int_equal(0, fake_dump_ilog_entry_called); + + /* dkey */ + assert_success(dv_get_key_ilog_entries(coh, g_oids[0], &g_dkeys[0], NULL, + fake_dump_ilog_entry, NULL)); + assert_int_equal(1, fake_dump_ilog_entry_called); + + assert_success(dv_process_key_ilog_entries(coh, g_oids[0], &g_dkeys[0], NULL, + DDB_ILOG_OP_ABORT)); + + fake_dump_ilog_entry_called = 0; + assert_success(dv_get_key_ilog_entries(coh, g_oids[0], &g_dkeys[0], NULL, + fake_dump_ilog_entry, NULL)); + assert_int_equal(0, fake_dump_ilog_entry_called); + + vos_cont_close(coh); +} + +int committed_entry_handler_called; +struct dv_dtx_committed_entry committed_entry_handler_entry; +static int +committed_entry_handler(struct dv_dtx_committed_entry *entry, void *cb_arg) +{ + committed_entry_handler_called++; + committed_entry_handler_entry = *entry; + + return 0; +} + +int active_entry_handler_called; +struct dv_dtx_active_entry active_entry_handler_entry; +static int +active_entry_handler(struct dv_dtx_active_entry *entry, void *cb_arg) +{ + active_entry_handler_called++; + active_entry_handler_entry = *entry; + + return 0; +} + +static void +get_dtx_tables_tests(void **state) +{ + struct dt_vos_pool_ctx *tctx = *state; + daos_handle_t coh = DAOS_HDL_INVAL; + + assert_rc_equal(-DER_INVAL, dv_dtx_get_cmt_table(coh, committed_entry_handler, NULL)); + assert_int_equal(0, committed_entry_handler_called); + + assert_rc_equal(-DER_INVAL, dv_dtx_get_act_table(coh, active_entry_handler, NULL)); + assert_int_equal(0, active_entry_handler_called); + + assert_success(vos_cont_open(tctx->dvt_poh, g_uuids[0], &coh)); + + dvt_vos_insert_2_records_with_dtx(coh); + + assert_success(dv_dtx_get_cmt_table(coh, committed_entry_handler, NULL)); + assert_int_equal(1, committed_entry_handler_called); + + assert_success(dv_dtx_get_act_table(coh, active_entry_handler, NULL)); + assert_int_equal(1, active_entry_handler_called); + + vos_cont_close(coh); +} + +static void +verify_correct_params_for_update_value_tests(void **state) +{ + struct dt_vos_pool_ctx *tctx = *state; + daos_handle_t poh = tctx->dvt_poh; + struct dv_tree_path vtp = {}; + d_iov_t value_iov = {0}; + + assert_rc_equal(-DER_INVAL, dv_update(DAOS_HDL_INVAL, &vtp, &value_iov)); + assert_rc_equal(-DER_INVAL, dv_update(poh, &vtp, &value_iov)); + + uuid_copy(vtp.vtp_cont, g_uuids[3]); + vtp.vtp_oid = g_oids[0]; + vtp.vtp_dkey = g_dkeys[0]; + vtp.vtp_akey = g_akeys[0]; + assert_rc_equal(-DER_INVAL, dv_update(poh, &vtp, &value_iov)); +} + +static void +assert_update_existing_path(daos_handle_t poh, struct dv_tree_path *vtp) +{ + d_iov_t value_iov = {0}; + char value_buf[256]; + + /* First get the value_buf using dump_value then use it to create an updated value */ + assert_success(dv_dump_value(poh, vtp, fake_dump_value_cb, NULL)); + snprintf(value_buf, 256, "Updated: %s", fake_dump_value_cb_value_buf); + + d_iov_set(&value_iov, value_buf, strlen(value_buf)); + + /* if it's an array path, update so will be same length as new value */ + if (vtp->vtp_recx.rx_nr > 0) + vtp->vtp_recx.rx_nr = value_iov.iov_len; + assert_success(dv_update(poh, vtp, &value_iov)); + + /* Verify that after loading the value_buf, the same value_buf is dumped */ + assert_success(dv_dump_value(poh, vtp, fake_dump_value_cb, NULL)); + assert_key_equal(value_iov, fake_dump_value_cb_value); +} + +static void +update_value_to_modify_tests(void **state) +{ + struct dt_vos_pool_ctx *tctx = *state; + daos_handle_t poh = tctx->dvt_poh; + struct dv_tree_path vtp = {}; + daos_handle_t coh; + + + uuid_copy(vtp.vtp_cont, g_uuids[3]); + vtp.vtp_oid = g_oids[0]; + vtp.vtp_dkey = g_dkeys[0]; + vtp.vtp_akey = g_akeys[1]; /* single value type */ + + assert_update_existing_path(poh, &vtp); + + vtp.vtp_akey = g_akeys[0]; /* array value type */ + dv_cont_open(poh, vtp.vtp_cont, &coh); + dv_get_recx(coh, vtp.vtp_oid, &vtp.vtp_dkey, &vtp.vtp_akey, 0, &vtp.vtp_recx); + dv_cont_close(&coh); + assert_update_existing_path(poh, &vtp); +} + +static void +assert_update_new_path(daos_handle_t poh, struct dv_tree_path *vtp) +{ + d_iov_t value_iov = {0}; + char *value_buf = "A New value"; + + /* First check that the value doesn't exist */ + memset(fake_dump_value_cb_value_buf, 0, ARRAY_SIZE(fake_dump_value_cb_value_buf)); + assert_success(dv_dump_value(poh, vtp, fake_dump_value_cb, NULL)); + assert_int_equal(0, fake_dump_value_cb_value_buf[0]); + + d_iov_set(&value_iov, value_buf, strlen(value_buf)); + + assert_success(dv_update(poh, vtp, &value_iov)); + + /* Verify that after loading the value_buf, the same value_buf is dumped */ + assert_success(dv_dump_value(poh, vtp, fake_dump_value_cb, NULL)); + assert_key_equal(value_iov, fake_dump_value_cb_value); +} + +static void +update_value_to_insert_tests(void **state) +{ + struct dt_vos_pool_ctx *tctx = *state; + daos_handle_t poh = tctx->dvt_poh; + struct dv_tree_path vtp = {}; + + uuid_copy(vtp.vtp_cont, g_uuids[3]); + /* + * Create a new object with dkey & akey. If this succeeds, we assume that could also create + * a new dkey within an existing oid, etc + */ + vtp.vtp_oid = dvt_gen_uoid(999); + vtp.vtp_dkey = g_dkeys[0]; + vtp.vtp_akey = g_akeys[0]; + + assert_update_new_path(poh, &vtp); +} + +static void +clear_committed_table(void **state) +{ + struct dt_vos_pool_ctx *tctx = *state; + daos_handle_t poh = tctx->dvt_poh; + daos_handle_t coh; + + dv_cont_open(poh, g_uuids[5], &coh); + + dvt_vos_insert_2_records_with_dtx(coh); + + assert_int_equal(1, dv_dtx_clear_cmt_table(coh)); + + committed_entry_handler_called = 0; + dv_dtx_get_cmt_table(coh, committed_entry_handler, NULL); + + assert_int_equal(0, committed_entry_handler_called); + + dv_cont_close(&coh); +} + +static void +dtx_commit_active_table(void **state) +{ + struct dt_vos_pool_ctx *tctx = *state; + daos_handle_t poh = tctx->dvt_poh; + daos_handle_t coh; + + dv_cont_open(poh, g_uuids[6], &coh); + + dvt_vos_insert_dtx_records(coh, 2, 0); + + /* Make sure there are no committed entries when starting */ + dv_dtx_get_cmt_table(coh, committed_entry_handler, NULL); + assert_int_equal(0, committed_entry_handler_called); + + /* get a dtx_id. entry_handler_committed_entry is set when dv_dtx_get_act_table is called */ + dv_dtx_get_act_table(coh, active_entry_handler, NULL); + assert_int_equal(2, active_entry_handler_called); + assert_int_equal(1, dv_dtx_commit_active_entry(coh, &active_entry_handler_entry.ddtx_id)); + + /* Should be 1 committed entry in the table now */ + dv_dtx_get_cmt_table(coh, committed_entry_handler, NULL); + assert_int_equal(1, committed_entry_handler_called); + + /* Should still be 1 active */ + active_entry_handler_called = 0; + dv_dtx_get_act_table(coh, active_entry_handler, NULL); + assert_int_equal(1, active_entry_handler_called); + + dv_cont_close(&coh); +} + +static void +dtx_abort_active_table(void **state) +{ + struct dt_vos_pool_ctx *tctx = *state; + daos_handle_t poh = tctx->dvt_poh; + daos_handle_t coh; + + dv_cont_open(poh, g_uuids[7], &coh); + + dvt_vos_insert_dtx_records(coh, 2, 0); + + /* get a dtx_id. entry_handler_committed_entry is set when dv_dtx_get_act_table is called */ + dv_dtx_get_act_table(coh, active_entry_handler, NULL); + assert_int_equal(2, active_entry_handler_called); + assert_success(dv_dtx_abort_active_entry(coh, &active_entry_handler_entry.ddtx_id)); + + /* Should still be 0 committed entries in table */ + dv_dtx_get_cmt_table(coh, committed_entry_handler, NULL); + assert_int_equal(0, committed_entry_handler_called); + + /* Should still be 1 active */ + active_entry_handler_called = 0; + dv_dtx_get_act_table(coh, active_entry_handler, NULL); + assert_int_equal(1, active_entry_handler_called); + + dv_cont_close(&coh); +} + +static void +path_verify(void **state) +{ + struct dt_vos_pool_ctx *tctx = *state; + struct dv_indexed_tree_path itp = {0}; + char path[256]; + + /* empty path is fine */ + assert_success(itp_parse("", &itp)); + assert_success(dv_path_verify(tctx->dvt_poh, &itp)); + itp_free(&itp); + + /* + * Container + */ + /* set to an index */ + assert_success(itp_parse("[0]", &itp)); + assert_success(dv_path_verify(tctx->dvt_poh, &itp)); + assert_true(itp_has_cont_complete(&itp)); + itp_free(&itp); + /* set to a uuid */ + sprintf(path, "/%s", g_uuids_str[3]); + assert_success(itp_parse(path, &itp)); + assert_success(dv_path_verify(tctx->dvt_poh, &itp)); + assert_true(itp_has_cont_complete(&itp)); + itp_free(&itp); + + /* parses fine, but isn't found */ + assert_success(itp_parse("[999]", &itp)); + assert_rc_equal(dv_path_verify(tctx->dvt_poh, &itp), -DDBER_INVALID_CONT); + assert_false(itp_has_cont_complete(&itp)); + itp_free(&itp); + assert_success(itp_parse("/99999999-9999-9999-9999-999999999999", &itp)); + assert_rc_equal(dv_path_verify(tctx->dvt_poh, &itp), -DDBER_INVALID_CONT); + assert_false(itp_has_cont_complete(&itp)); + itp_free(&itp); + + /* + * object + */ + /* set to an index */ + assert_success(itp_parse("[0]/[0]", &itp)); + assert_success(dv_path_verify(tctx->dvt_poh, &itp)); + assert_true(itp_has_obj_complete(&itp)); + itp_free(&itp); + /* set to an oid */ + sprintf(path, "/%s/"DF_UOID, g_uuids_str[3], DP_UOID(g_oids[0])); + assert_success(itp_parse(path, &itp)); + assert_success(dv_path_verify(tctx->dvt_poh, &itp)); + assert_true(itp_has_obj_complete(&itp)); + itp_free(&itp); + /* parses fine, but isn't found */ + assert_success(itp_parse("[0]/[999]", &itp)); + assert_rc_equal(dv_path_verify(tctx->dvt_poh, &itp), -DDBER_INVALID_OBJ); + assert_false(itp_has_obj_complete(&itp)); + itp_free(&itp); + assert_success(itp_parse("[0]/99.1.0.0", &itp)); + assert_rc_equal(dv_path_verify(tctx->dvt_poh, &itp), -DDBER_INVALID_OBJ); + assert_false(itp_has_obj_complete(&itp)); + itp_free(&itp); + + /* + * dkey + */ + /* set to an index */ + assert_success(itp_parse("[0]/[0]/[0]", &itp)); + assert_success(dv_path_verify(tctx->dvt_poh, &itp)); + assert_true(itp_has_dkey_complete(&itp)); + itp_free(&itp); + /* set to key */ + sprintf(path, "/%s/"DF_UOID"/%s", g_uuids_str[3], DP_UOID(g_oids[0]), + (char *)g_dkeys[0].iov_buf); + assert_success(itp_parse(path, &itp)); + assert_success(dv_path_verify(tctx->dvt_poh, &itp)); + assert_true(itp_has_dkey_complete(&itp)); + itp_free(&itp); + /* parses fine, but isn't found */ + assert_success(itp_parse("[0]/[0]/[999]", &itp)); + assert_rc_equal(dv_path_verify(tctx->dvt_poh, &itp), -DDBER_INVALID_DKEY); + assert_false(itp_has_dkey_complete(&itp)); + itp_free(&itp); + assert_success(itp_parse("[0]/[0]/invalid_dkey", &itp)); + assert_rc_equal(dv_path_verify(tctx->dvt_poh, &itp), -DDBER_INVALID_DKEY); + assert_false(itp_has_dkey_complete(&itp)); + itp_free(&itp); + + /* + * akey + */ + /* set to an index */ + assert_success(itp_parse("[0]/[0]/[0]/[0]", &itp)); + assert_success(dv_path_verify(tctx->dvt_poh, &itp)); + assert_true(itp_has_akey_complete(&itp)); + itp_free(&itp); + /* set to key */ + sprintf(path, "/%s/"DF_UOID"/%s/%s", g_uuids_str[3], DP_UOID(g_oids[0]), + (char *)g_dkeys[0].iov_buf, + (char *)g_akeys[0].iov_buf); + assert_success(itp_parse(path, &itp)); + assert_success(dv_path_verify(tctx->dvt_poh, &itp)); + assert_true(itp_has_akey_complete(&itp)); + itp_free(&itp); + /* parses fine, but isn't found */ + assert_success(itp_parse("[0]/[0]/[0]/[999]", &itp)); + assert_rc_equal(dv_path_verify(tctx->dvt_poh, &itp), -DDBER_INVALID_AKEY); + assert_false(itp_has_akey_complete(&itp)); + itp_free(&itp); + assert_success(itp_parse("[0]/[0]/[0]/invalid_akey", &itp)); + assert_rc_equal(dv_path_verify(tctx->dvt_poh, &itp), -DDBER_INVALID_AKEY); + assert_false(itp_has_akey_complete(&itp)); + itp_free(&itp); + + /* + * recx + */ + /* set to an index */ + assert_success(itp_parse("[3]/[0]/[0]/[0]/[0]", &itp)); + assert_success(dv_path_verify(tctx->dvt_poh, &itp)); + assert_true(itp_has_recx_complete(&itp)); + itp_free(&itp); + /* set to key */ + sprintf(path, "/%s/"DF_UOID"/%s/%s/"DF_DDB_RECX, g_uuids_str[3], DP_UOID(g_oids[0]), + (char *)g_dkeys[0].iov_buf, + (char *)g_akeys[0].iov_buf, + DP_DDB_RECX(g_recxs[0])); + assert_success(itp_parse(path, &itp)); + assert_success(dv_path_verify(tctx->dvt_poh, &itp)); + assert_true(itp_has_recx_complete(&itp)); + itp_free(&itp); + /* parses fine, but isn't found */ + assert_success(itp_parse("[0]/[0]/[0]/[0]/[999]", &itp)); + assert_rc_equal(dv_path_verify(tctx->dvt_poh, &itp), -DDBER_INVALID_RECX); + assert_false(itp_has_recx_complete(&itp)); + itp_free(&itp); + assert_success(itp_parse("[0]/[0]/[0]/[0]/{99-100}", &itp)); + assert_rc_equal(dv_path_verify(tctx->dvt_poh, &itp), -DDBER_INVALID_RECX); + assert_false(itp_has_recx_complete(&itp)); + itp_free(&itp); +} + +#define DELETE_SUCCESS(poh, vtp) assert_success(dv_delete(poh, &vtp)) +static void +delete_path_parts_tests(void **state) +{ + struct dt_vos_pool_ctx *tctx = *state; + daos_handle_t poh = tctx->dvt_poh; + daos_handle_t coh; + struct dv_tree_path vtp = {0}; + uuid_t cont_test; + daos_unit_oid_t uoid_test = {0}; + daos_key_t dkey_test = {0}; + daos_key_t akey_test = {0}; + + /* Don't allow empty path */ + assert_rc_equal(-DER_INVAL, dv_delete(poh, &vtp)); + + dv_get_cont_uuid(poh, 0, vtp.vtp_cont); + DELETE_SUCCESS(poh, vtp); + dv_get_cont_uuid(poh, 0, cont_test); + assert_uuid_not_equal(vtp.vtp_cont, cont_test); + /* shouldn't be able to delete same container */ + assert_rc_equal(-DER_NONEXIST, dv_delete(poh, &vtp)); + + /* + * Remaining deletes happen within a container, so open the container to get the + * VOS path part identifier + */ + dv_get_cont_uuid(poh, 0, vtp.vtp_cont); + assert_success(dv_cont_open(poh, vtp.vtp_cont, &coh)); + + /* + * Delete an object + * get oid from index 0. This will be deleted, so should not exist after + */ + assert_success(dv_get_object_oid(coh, 0, &vtp.vtp_oid)); + DELETE_SUCCESS(poh, vtp); + /* index 0 should not be same oid now */ + assert_success(dv_get_object_oid(coh, 0, &uoid_test)); + assert_oid_not_equal(vtp.vtp_oid.id_pub, uoid_test.id_pub); + /* Shouldn't be able to delete the same object again */ + assert_rc_equal(-DER_NONEXIST, dv_delete(poh, &vtp)); + + /* + * delete dkey + */ + vtp.vtp_oid = uoid_test; /* reset uoid_before to oid that hasn't been deleted */ + dv_get_dkey(coh, vtp.vtp_oid, 0, &vtp.vtp_dkey); + DELETE_SUCCESS(poh, vtp); + /* should still have the object */ + assert_success(dv_get_object_oid(coh, 0, &uoid_test)); + assert_oid_equal(vtp.vtp_oid.id_pub, uoid_test.id_pub); + daos_iov_free(&vtp.vtp_dkey); + + dv_get_dkey(coh, vtp.vtp_oid, 0, &dkey_test); + assert_key_not_equal(vtp.vtp_dkey, dkey_test); + + /* + * delete akey + */ + vtp.vtp_dkey = dkey_test; + dv_get_akey(coh, vtp.vtp_oid, &vtp.vtp_dkey, 0, &vtp.vtp_akey); + DELETE_SUCCESS(poh, vtp); + /* should still have the object and dkey */ + assert_success(dv_get_object_oid(coh, 0, &uoid_test)); + assert_oid_equal(vtp.vtp_oid.id_pub, uoid_test.id_pub); + daos_iov_free(&vtp.vtp_akey); + + dv_get_dkey(coh, vtp.vtp_oid, 0, &dkey_test); + assert_key_equal(vtp.vtp_dkey, dkey_test); + dv_get_akey(coh, vtp.vtp_oid, &vtp.vtp_dkey, 0, &akey_test); + assert_key_not_equal(vtp.vtp_akey, akey_test); + daos_iov_free(&vtp.vtp_dkey); + daos_iov_free(&akey_test); + daos_iov_free(&dkey_test); + + dv_cont_close(&coh); +} + +static int +dv_suit_setup(void **state) +{ + return ddb_test_setup_vos(state); +} + +static int +dv_suit_teardown(void **state) +{ + struct dt_vos_pool_ctx *tctx = *state; + + if (tctx == NULL) + fail_msg("Test context wasn't setup. Possible issue in test setup\n"); + + ddb_teardown_vos(state); + + return 0; +} + +static int +dv_test_setup(void **state) +{ + struct dt_vos_pool_ctx *tctx = *state; + + active_entry_handler_called = 0; + committed_entry_handler_called = 0; + assert_success(dv_pool_open(tctx->dvt_pmem_file, &tctx->dvt_poh)); + return 0; +} + +static int +dv_test_teardown(void **state) +{ + struct dt_vos_pool_ctx *tctx = *state; + + assert_success(dv_pool_close(tctx->dvt_poh)); + return 0; +} + + +/* + * All these tests use the same VOS tree that is created at suit_setup. Therefore, tests + * that modify the state of the tree (delete, add, etc) should be run after all others. + */ +#define TEST(x) { #x, x, dv_test_setup, dv_test_teardown } +const struct CMUnitTest dv_test_cases[] = { + { "open_pool", open_pool_test, NULL, NULL }, /* don't want this test to run with setup */ + TEST(list_items_test), + TEST(get_cont_uuid_from_idx_tests), + TEST(get_dkey_from_idx_tests), + TEST(get_akey_from_idx_tests), + TEST(get_recx_from_idx_tests), + TEST(get_value_tests), + TEST(get_obj_ilog_tests), + TEST(abort_obj_ilog_tests), + TEST(get_dkey_ilog_tests), + TEST(abort_dkey_ilog_tests), + TEST(get_superblock_tests), + TEST(obj_id_2_ddb_test), + TEST(get_dtx_tables_tests), + TEST(delete_path_parts_tests), + TEST(verify_correct_params_for_update_value_tests), + TEST(update_value_to_modify_tests), + TEST(update_value_to_insert_tests), + TEST(clear_committed_table), + TEST(dtx_commit_active_table), + TEST(dtx_abort_active_table), + TEST(path_verify), +}; + +int +ddb_vos_tests_run() +{ + return cmocka_run_group_tests_name("DDB VOS Interface Tests", dv_test_cases, + dv_suit_setup, dv_suit_teardown); +} diff --git a/src/dtx/dtx_common.c b/src/dtx/dtx_common.c index a23f35e3fdd..95a2ef37c7b 100644 --- a/src/dtx/dtx_common.c +++ b/src/dtx/dtx_common.c @@ -698,8 +698,10 @@ dtx_batched_commit(void *arg) struct dtx_stat stat = { 0 }; int sleep_time = 50; /* ms */ - if (d_list_empty(&dmi->dmi_dtx_batched_cont_open_list)) + if (d_list_empty(&dmi->dmi_dtx_batched_cont_open_list)) { + sleep_time = 500; goto check; + } if (DAOS_FAIL_CHECK(DAOS_DTX_NO_BATCHED_CMT) || DAOS_FAIL_CHECK(DAOS_DTX_NO_COMMITTABLE)) diff --git a/src/dtx/dtx_resync.c b/src/dtx/dtx_resync.c index c93f963ca24..9e9f47537d2 100644 --- a/src/dtx/dtx_resync.c +++ b/src/dtx/dtx_resync.c @@ -756,12 +756,16 @@ dtx_resync_one(void *data) if (child == NULL) D_GOTO(out, rc = -DER_NONEXIST); + if (unlikely(child->spc_no_storage)) + D_GOTO(put, rc = 0); + cb_arg.arg = *arg; param.ip_hdl = child->spc_hdl; param.ip_flags = VOS_IT_FOR_MIGRATION; rc = vos_iterate(¶m, VOS_ITER_COUUID, false, &anchor, container_scan_cb, NULL, &cb_arg, NULL); +put: ds_pool_child_put(child); out: D_DEBUG(DB_TRACE, DF_UUID" iterate pool done: rc %d\n", diff --git a/src/engine/SConscript b/src/engine/SConscript index ceb00a409d0..69a2e1624bc 100644 --- a/src/engine/SConscript +++ b/src/engine/SConscript @@ -10,6 +10,7 @@ def scons(): denv = env.Clone() + denv.AppendUnique(CPPPATH=[Dir('..').srcnode()]) denv.Append(CPPDEFINES=['-DDAOS_PMEM_BUILD']) libraries = ['daos_common_pmem', 'gurt', 'cart', 'vos_srv'] libraries += ['bio', 'dl', 'uuid', 'pthread', 'abt'] diff --git a/src/engine/drpc_client.c b/src/engine/drpc_client.c index 8f4763a0dfc..f4d8ddb1fa9 100644 --- a/src/engine/drpc_client.c +++ b/src/engine/drpc_client.c @@ -1,5 +1,5 @@ /* - * (C) Copyright 2019-2021 Intel Corporation. + * (C) Copyright 2019-2022 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -155,7 +155,7 @@ dss_drpc_call(int32_t module, int32_t method, void *req, size_t req_size, /* Notify daos_server that we are ready (e.g., to receive dRPC requests). */ int -drpc_notify_ready(void) +drpc_notify_ready(bool check_mode) { Srv__NotifyReadyReq req = SRV__NOTIFY_READY_REQ__INIT; uint8_t *reqb; @@ -177,6 +177,7 @@ drpc_notify_ready(void) req.drpclistenersock = drpc_listener_socket_path; req.instanceidx = dss_instance_idx; req.ntgts = dss_tgt_nr; + req.check_mode = check_mode; reqb_size = srv__notify_ready_req__get_packed_size(&req); D_ALLOC(reqb, reqb_size); diff --git a/src/engine/drpc_internal.h b/src/engine/drpc_internal.h index 085d92ef664..eef8bbb9243 100644 --- a/src/engine/drpc_internal.h +++ b/src/engine/drpc_internal.h @@ -1,5 +1,5 @@ /* - * (C) Copyright 2018-2021 Intel Corporation. + * (C) Copyright 2018-2022 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -123,6 +123,6 @@ int drpc_init(void); /** Finalize the dRPC client. */ void drpc_fini(void); -int drpc_notify_ready(void); +int drpc_notify_ready(bool check_mode); #endif /* __DAOS_DRPC_INTERNAL_H__ */ diff --git a/src/engine/drpc_ras.c b/src/engine/drpc_ras.c index 041493212dd..2361cd6acff 100644 --- a/src/engine/drpc_ras.c +++ b/src/engine/drpc_ras.c @@ -1,5 +1,5 @@ /* - * (C) Copyright 2021-2022 Intel Corporation. + * (C) Copyright 2021-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -16,6 +16,7 @@ #include "event.pb-c.h" #include "drpc_internal.h" #include "srv_internal.h" +#include "srv.pb-c.h" static void free_event(Shared__RASEvent *evt) @@ -365,3 +366,252 @@ ds_notify_swim_rank_dead(d_rank_t rank, uint64_t incarnation) NULL /* pool */, NULL /* cont */, NULL /* objid */, NULL /* ctlop */, &evt, false /* wait_for_resp */); } + +void +ds_chk_free_pool_list(struct chk_list_pool *clp, uint32_t nr) +{ + int i; + + for (i = 0; i < nr; i++) { + D_FREE(clp[i].clp_label); + d_rank_list_free(clp[i].clp_svcreps); + } + + D_FREE(clp); +} + +int +ds_chk_listpool_upcall(struct chk_list_pool **clp) +{ + struct chk_list_pool *pools = NULL; + struct drpc_alloc alloc = PROTO_ALLOCATOR_INIT(alloc); + Srv__CheckListPoolReq req = SRV__CHECK_LIST_POOL_REQ__INIT; + Srv__CheckListPoolResp *respb = NULL; + Drpc__Response *dresp = NULL; + uint8_t *reqb = NULL; + size_t size; + int rc; + int i; + + size = srv__check_list_pool_req__get_packed_size(&req); + D_ALLOC(reqb, size); + if (reqb == NULL) + D_GOTO(out_req, rc = -DER_NOMEM); + + rc = srv__check_list_pool_req__pack(&req, reqb); + if (rc < 0) + goto out_req; + + rc = dss_drpc_call(DRPC_MODULE_SRV, DRPC_METHOD_CHK_LIST_POOL, reqb, size, 0, &dresp); + if (rc != 0) + goto out_req; + + if (dresp->status != DRPC__STATUS__SUCCESS) { + D_ERROR("Received erroneous dRPC response for list pool: %d\n", dresp->status); + D_GOTO(out_dresp, rc = -DER_IO); + } + + respb = srv__check_list_pool_resp__unpack(&alloc.alloc, dresp->body.len, dresp->body.data); + if (alloc.oom || respb == NULL) + D_GOTO(out_dresp, rc = -DER_NOMEM); + + if (respb->status != 0) + D_GOTO(out_respb, rc = respb->status); + + D_ALLOC_ARRAY(pools, respb->n_pools); + if (pools == NULL) + D_GOTO(out_respb, rc = -DER_NOMEM); + + for (i = 0; i < respb->n_pools; i++) { + rc = uuid_parse(respb->pools[i]->uuid, pools[i].clp_uuid); + if (rc != 0) { + D_ERROR("Failed to parse uuid %s: %d\n", respb->pools[i]->uuid, rc); + D_GOTO(out_parse, rc); + } + + D_STRNDUP(pools[i].clp_label, respb->pools[i]->label, DAOS_PROP_LABEL_MAX_LEN); + if (pools[i].clp_label == NULL) + D_GOTO(out_parse, rc = -DER_NOMEM); + + pools[i].clp_svcreps = uint32_array_to_rank_list(respb->pools[i]->svcreps, + respb->pools[i]->n_svcreps); + if (pools[i].clp_svcreps == NULL) + D_GOTO(out_parse, rc = -DER_NOMEM); + } + + rc = respb->n_pools; + *clp = pools; + pools = NULL; + +out_parse: + if (pools != NULL) + ds_chk_free_pool_list(pools, respb->n_pools); +out_respb: + srv__check_list_pool_resp__free_unpacked(respb, &alloc.alloc); +out_dresp: + drpc_response_free(dresp); +out_req: + D_FREE(reqb); + + return rc; +} + +/* + * Register the pool information on MS via DRPC_METHOD_CHK_REG_POOL: + * if the pool does not exist, then add it on MS; otherwise, refresh + * the pool service replicas and label information. + */ +int +ds_chk_regpool_upcall(uint64_t seq, uuid_t uuid, char *label, d_rank_list_t *svcreps) +{ + struct drpc_alloc alloc = PROTO_ALLOCATOR_INIT(alloc); + Srv__CheckRegPoolReq req = SRV__CHECK_REG_POOL_REQ__INIT; + Srv__CheckRegPoolResp *respb = NULL; + Drpc__Response *dresp = NULL; + uint8_t *reqb = NULL; + size_t size; + int rc; + + if (DAOS_FAIL_CHECK(DAOS_CHK_LEADER_FAIL_REGPOOL)) + return -DER_IO; + + req.seq = seq; + D_ASPRINTF(req.uuid, DF_UUIDF, DP_UUID(uuid)); + if (req.uuid == NULL) + D_GOTO(out_req, rc = -DER_NOMEM); + + req.label = label; + req.n_svcreps = svcreps->rl_nr; + req.svcreps = svcreps->rl_ranks; + + size = srv__check_reg_pool_req__get_packed_size(&req); + D_ALLOC(reqb, size); + if (reqb == NULL) + D_GOTO(out_req, rc = -DER_NOMEM); + + rc = srv__check_reg_pool_req__pack(&req, reqb); + if (rc < 0) + goto out_req; + + rc = dss_drpc_call(DRPC_MODULE_SRV, DRPC_METHOD_CHK_REG_POOL, reqb, size, 0, &dresp); + if (rc != 0) + goto out_req; + + if (dresp->status != DRPC__STATUS__SUCCESS) { + D_ERROR("Received erroneous dRPC response for register pool: %d\n", dresp->status); + D_GOTO(out_dresp, rc = -DER_IO); + } + + respb = srv__check_reg_pool_resp__unpack(&alloc.alloc, dresp->body.len, dresp->body.data); + if (alloc.oom || respb == NULL) + D_GOTO(out_dresp, rc = -DER_NOMEM); + + rc = respb->status; + srv__check_reg_pool_resp__free_unpacked(respb, &alloc.alloc); + +out_dresp: + drpc_response_free(dresp); +out_req: + D_FREE(req.uuid); + D_FREE(reqb); + + return rc; +} + +int +ds_chk_deregpool_upcall(uint64_t seq, uuid_t uuid) +{ + struct drpc_alloc alloc = PROTO_ALLOCATOR_INIT(alloc); + Srv__CheckDeregPoolReq req = SRV__CHECK_DEREG_POOL_REQ__INIT; + Srv__CheckDeregPoolResp *respb = NULL; + Drpc__Response *dresp = NULL; + uint8_t *reqb = NULL; + size_t size; + int rc; + + req.seq = seq; + D_ASPRINTF(req.uuid, DF_UUIDF, DP_UUID(uuid)); + if (req.uuid == NULL) + D_GOTO(out_req, rc = -DER_NOMEM); + + size = srv__check_dereg_pool_req__get_packed_size(&req); + D_ALLOC(reqb, size); + if (reqb == NULL) + D_GOTO(out_req, rc = -DER_NOMEM); + + rc = srv__check_dereg_pool_req__pack(&req, reqb); + if (rc < 0) + goto out_req; + + rc = dss_drpc_call(DRPC_MODULE_SRV, DRPC_METHOD_CHK_DEREG_POOL, reqb, size, 0, &dresp); + if (rc != 0) + goto out_req; + + if (dresp->status != DRPC__STATUS__SUCCESS) { + D_ERROR("Received erroneous dRPC response for de-register pool: %d\n", + dresp->status); + D_GOTO(out_dresp, rc = -DER_IO); + } + + respb = srv__check_dereg_pool_resp__unpack(&alloc.alloc, dresp->body.len, dresp->body.data); + if (alloc.oom || respb == NULL) + D_GOTO(out_dresp, rc = -DER_NOMEM); + + rc = respb->status; + srv__check_dereg_pool_resp__free_unpacked(respb, &alloc.alloc); + +out_dresp: + drpc_response_free(dresp); +out_req: + D_FREE(req.uuid); + D_FREE(reqb); + + return rc; +} + +int +ds_chk_report_upcall(void *rpt) +{ + struct drpc_alloc alloc = PROTO_ALLOCATOR_INIT(alloc); + Srv__CheckReportReq req = SRV__CHECK_REPORT_REQ__INIT; + Srv__CheckReportResp *respb = NULL; + Drpc__Response *dresp = NULL; + uint8_t *reqb = NULL; + size_t size; + int rc; + + D_ASSERT(rpt != NULL); + req.report = rpt; + + size = srv__check_report_req__get_packed_size(&req); + D_ALLOC(reqb, size); + if (reqb == NULL) + D_GOTO(out_req, rc = -DER_NOMEM); + + rc = srv__check_report_req__pack(&req, reqb); + if (rc < 0) + goto out_req; + + rc = dss_drpc_call(DRPC_MODULE_SRV, DRPC_METHOD_CHK_REPORT, reqb, size, 0, &dresp); + if (rc != 0) + goto out_req; + + if (dresp->status != DRPC__STATUS__SUCCESS) { + D_ERROR("Received erroneous dRPC response for check report: %d\n", dresp->status); + D_GOTO(out_dresp, rc = -DER_IO); + } + + respb = srv__check_report_resp__unpack(&alloc.alloc, dresp->body.len, dresp->body.data); + if (alloc.oom || respb == NULL) + D_GOTO(out_dresp, rc = -DER_NOMEM); + + rc = respb->status; + srv__check_report_resp__free_unpacked(respb, &alloc.alloc); + +out_dresp: + drpc_response_free(dresp); +out_req: + D_FREE(reqb); + + return rc; +} diff --git a/src/engine/init.c b/src/engine/init.c index 0f705e5366b..0111e3c1540 100644 --- a/src/engine/init.c +++ b/src/engine/init.c @@ -35,6 +35,7 @@ #else #define MODULE_LIST "vos,rdb,rsvc,security,mgmt,dtx,pool,cont,obj,rebuild" #endif +#define MODS_LIST_CHK "vos,rdb,rsvc,security,mgmt,dtx,pool,cont,obj,rebuild,chk" /** List of modules to load */ static char modules[MAX_MODULE_OPTIONS + 1]; @@ -91,9 +92,18 @@ unsigned int dss_storage_tiers = 2; /** Flag to indicate Arbogots is initialized */ static bool dss_abt_init; +/** Start daos_engine under check mode. */ +static bool dss_check_mode; + /* stream used to dump ABT infos and ULTs stacks */ static FILE *abt_infos; +bool +engine_in_check(void) +{ + return dss_check_mode; +} + d_rank_t dss_self_rank(void) { @@ -711,28 +721,20 @@ server_init(int argc, char *argv[]) D_GOTO(exit_mod_init, rc); D_INFO("Network successfully initialized\n"); - if (dss_mod_facs & DSS_FAC_LOAD_CLI) { - rc = daos_init(); - if (rc) { - D_ERROR("daos_init (client) failed, rc: "DF_RC"\n", - DP_RC(rc)); - D_GOTO(exit_crt, rc); - } - D_INFO("Client stack enabled\n"); - } else { - rc = daos_hhash_init(); - if (rc) { - D_ERROR("daos_hhash_init failed, rc: "DF_RC"\n", - DP_RC(rc)); - D_GOTO(exit_crt, rc); - } - rc = pl_init(); - if (rc != 0) { - daos_hhash_fini(); - goto exit_crt; - } - D_INFO("handle hash table and placement initialized\n"); + rc = daos_hhash_init(); + if (rc != 0) { + D_ERROR("daos_hhash_init failed, rc: "DF_RC"\n", + DP_RC(rc)); + D_GOTO(exit_crt, rc); + } + + rc = pl_init(); + if (rc != 0) { + daos_hhash_fini(); + goto exit_crt; } + D_INFO("handle hash table and placement initialized\n"); + /* server-side uses D_HTYPE_PTR handle */ d_hhash_set_ptrtype(daos_ht.dht_hhash); @@ -781,7 +783,7 @@ server_init(int argc, char *argv[]) goto exit_srv_init; } - rc = drpc_notify_ready(); + rc = drpc_notify_ready(dss_check_mode); if (rc != 0) { D_ERROR("Failed to notify daos_server: "DF_RC"\n", DP_RC(rc)); goto exit_init_state; @@ -789,9 +791,11 @@ server_init(int argc, char *argv[]) server_init_state_wait(DSS_INIT_STATE_SET_UP); - rc = crt_register_event_cb(dss_crt_event_cb, NULL); - if (rc) - D_GOTO(exit_init_state, rc); + if (!dss_check_mode) { + rc = crt_register_event_cb(dss_crt_event_cb, NULL); + if (rc != 0) + D_GOTO(exit_init_state, rc); + } rc = crt_register_hlc_error_cb(dss_crt_hlc_error_cb, NULL); if (rc) @@ -825,12 +829,8 @@ server_init(int argc, char *argv[]) exit_mod_loaded: ds_iv_fini(); dss_module_unload_all(); - if (dss_mod_facs & DSS_FAC_LOAD_CLI) { - daos_fini(); - } else { - pl_fini(); - daos_hhash_fini(); - } + pl_fini(); + daos_hhash_fini(); exit_crt: crt_finalize(); exit_mod_init: @@ -858,7 +858,8 @@ server_fini(bool force) * xstreams won't start shutting down until we call dss_srv_fini below. */ dss_srv_set_shutting_down(); - crt_unregister_event_cb(dss_crt_event_cb, NULL); + if (!dss_check_mode) + crt_unregister_event_cb(dss_crt_event_cb, NULL); D_INFO("unregister event callbacks done\n"); /* * Cleaning up modules needs to create ULTs on other xstreams; must be @@ -884,12 +885,8 @@ server_fini(bool force) * Client stuff finalization needs be done after all ULTs drained * in dss_srv_fini(). */ - if (dss_mod_facs & DSS_FAC_LOAD_CLI) { - daos_fini(); - } else { - pl_fini(); - daos_hhash_fini(); - } + pl_fini(); + daos_hhash_fini(); D_INFO("daos_fini() or pl_fini() done\n"); crt_finalize(); D_INFO("crt_finalize() done\n"); @@ -945,6 +942,8 @@ Options:\n\ Passes the configured hugepage size(2MB or 1GB)\n\ --storage_tiers=ntiers, -T ntiers\n\ Number of storage tiers\n\ + --check, -C\n\ + Start engine with check mode, global consistency check\n\ --help, -h\n\ Print this description\n", prog, prog, modules, daos_sysname, dss_storage_path, @@ -984,22 +983,32 @@ parse(int argc, char **argv) { "instance_idx", required_argument, NULL, 'I' }, { "bypass_health_chk", no_argument, NULL, 'b' }, { "storage_tiers", required_argument, NULL, 'T' }, + { "check", no_argument, NULL, 'C' }, { NULL, 0, NULL, 0} }; int rc = 0; int c; + bool spec_mod = false; + + dss_check_mode = false; /* load all of modules by default */ sprintf(modules, "%s", MODULE_LIST); - while ((c = getopt_long(argc, argv, "c:d:f:g:hi:m:n:p:r:H:t:s:x:I:bT:", + while ((c = getopt_long(argc, argv, "c:d:f:g:hi:m:n:p:r:H:t:s:x:I:bT:C", opts, NULL)) != -1) { switch (c) { case 'm': + if (dss_check_mode) { + printf("'-c|--modules' option is ignored under check mode\n"); + break; + } + if (strlen(optarg) > MAX_MODULE_OPTIONS) { rc = -DER_INVAL; usage(argv[0], stderr); break; } + spec_mod = true; snprintf(modules, sizeof(modules), "%s", optarg); break; case 'c': @@ -1060,6 +1069,14 @@ parse(int argc, char **argv) rc = -DER_INVAL; } break; + case 'C': + dss_check_mode = true; + if (spec_mod) { + printf("'-c|--modules' option is ignored under check mode\n"); + spec_mod = false; + } + snprintf(modules, sizeof(modules), "%s", MODS_LIST_CHK); + break; default: usage(argv[0], stderr); rc = -DER_INVAL; diff --git a/src/engine/sched.c b/src/engine/sched.c index 04caea0f1a4..763bc0b64be 100644 --- a/src/engine/sched.c +++ b/src/engine/sched.c @@ -1496,9 +1496,8 @@ sched_req_sleep(struct sched_request *req, uint32_t msecs) static void req_wakeup_internal(struct dss_xstream *dx, struct sched_request *req) { - D_ASSERT(req != NULL); /* The request is not in sleep */ - if (req->sr_wakeup_time == 0) + if (req == NULL || req->sr_wakeup_time == 0) return; D_ASSERT(req->sr_in_heap == 0); diff --git a/src/engine/server_iv.c b/src/engine/server_iv.c index 56ed49f9bcf..9086130a1a1 100644 --- a/src/engine/server_iv.c +++ b/src/engine/server_iv.c @@ -162,7 +162,7 @@ iv_key_unpack(struct ds_iv_key *key_iv, crt_iv_key_t *key_iov) return rc; } -static void +void ds_iv_ns_get(struct ds_iv_ns *ns) { ns->iv_refcount++; @@ -865,11 +865,20 @@ ds_iv_ns_leader_stop(struct ds_iv_ns *ns) } void -ds_iv_ns_stop(struct ds_iv_ns *ns) +ds_iv_ns_cleanup(struct ds_iv_ns *ns) { struct ds_iv_entry *entry; struct ds_iv_entry *tmp; + d_list_for_each_entry_safe(entry, tmp, &ns->iv_entry_list, iv_link) { + d_list_del(&entry->iv_link); + iv_entry_free(entry); + } +} + +void +ds_iv_ns_stop(struct ds_iv_ns *ns) +{ ns->iv_stop = 1; ds_iv_ns_put(ns); ABT_mutex_lock(ns->iv_mutex); /* only for ABT_cond_wait; unnecessary otherwise */ @@ -884,10 +893,7 @@ ds_iv_ns_stop(struct ds_iv_ns *ns) ABT_mutex_unlock(ns->iv_mutex); D_DEBUG(DB_MGMT, DF_UUID " ns stopped\n", DP_UUID(ns->iv_pool_uuid)); - d_list_for_each_entry_safe(entry, tmp, &ns->iv_entry_list, iv_link) { - d_list_del(&entry->iv_link); - iv_entry_free(entry); - } + ds_iv_ns_cleanup(ns); D_INFO(DF_UUID" ns stopped\n", DP_UUID(ns->iv_pool_uuid)); } @@ -1062,6 +1068,16 @@ _iv_op(struct ds_iv_ns *ns, struct ds_iv_key *key, d_sg_list_t *value, rc = iv_op_internal(ns, key, value, sync, shortcut, opc); if (retry && !ns->iv_stop && (daos_rpc_retryable_rc(rc) || rc == -DER_NOTLEADER || rc == -DER_BUSY)) { + if (rc == -DER_GRPVER && engine_in_check()) { + /* + * Under check mode, the pool shard on peer rank/target does + * not exist, then it will reply "-DER_GRPVER" that is normal + * for check. Return the errno to the caller instead of retry. + */ + D_WARN("IV for DAOS check hit unmatched GRP version %d\n", rc); + return rc; + } + if (rc == -DER_NOTLEADER && key->rank != (d_rank_t)(-1) && sync && (sync->ivs_mode == CRT_IV_SYNC_LAZY || sync->ivs_mode == CRT_IV_SYNC_EAGER)) { diff --git a/src/engine/srv.pb-c.c b/src/engine/srv.pb-c.c index 8b7845b3dce..50c5845a5cb 100644 --- a/src/engine/srv.pb-c.c +++ b/src/engine/srv.pb-c.c @@ -232,7 +232,373 @@ void srv__pool_find_by_label_resp__free_unpacked assert(message->base.descriptor == &srv__pool_find_by_label_resp__descriptor); protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator); } -static const ProtobufCFieldDescriptor srv__notify_ready_req__field_descriptors[8] = +void srv__check_list_pool_req__init + (Srv__CheckListPoolReq *message) +{ + static const Srv__CheckListPoolReq init_value = SRV__CHECK_LIST_POOL_REQ__INIT; + *message = init_value; +} +size_t srv__check_list_pool_req__get_packed_size + (const Srv__CheckListPoolReq *message) +{ + assert(message->base.descriptor == &srv__check_list_pool_req__descriptor); + return protobuf_c_message_get_packed_size ((const ProtobufCMessage*)(message)); +} +size_t srv__check_list_pool_req__pack + (const Srv__CheckListPoolReq *message, + uint8_t *out) +{ + assert(message->base.descriptor == &srv__check_list_pool_req__descriptor); + return protobuf_c_message_pack ((const ProtobufCMessage*)message, out); +} +size_t srv__check_list_pool_req__pack_to_buffer + (const Srv__CheckListPoolReq *message, + ProtobufCBuffer *buffer) +{ + assert(message->base.descriptor == &srv__check_list_pool_req__descriptor); + return protobuf_c_message_pack_to_buffer ((const ProtobufCMessage*)message, buffer); +} +Srv__CheckListPoolReq * + srv__check_list_pool_req__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data) +{ + return (Srv__CheckListPoolReq *) + protobuf_c_message_unpack (&srv__check_list_pool_req__descriptor, + allocator, len, data); +} +void srv__check_list_pool_req__free_unpacked + (Srv__CheckListPoolReq *message, + ProtobufCAllocator *allocator) +{ + if(!message) + return; + assert(message->base.descriptor == &srv__check_list_pool_req__descriptor); + protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator); +} +void srv__check_list_pool_resp__one_pool__init + (Srv__CheckListPoolResp__OnePool *message) +{ + static const Srv__CheckListPoolResp__OnePool init_value = SRV__CHECK_LIST_POOL_RESP__ONE_POOL__INIT; + *message = init_value; +} +void srv__check_list_pool_resp__init + (Srv__CheckListPoolResp *message) +{ + static const Srv__CheckListPoolResp init_value = SRV__CHECK_LIST_POOL_RESP__INIT; + *message = init_value; +} +size_t srv__check_list_pool_resp__get_packed_size + (const Srv__CheckListPoolResp *message) +{ + assert(message->base.descriptor == &srv__check_list_pool_resp__descriptor); + return protobuf_c_message_get_packed_size ((const ProtobufCMessage*)(message)); +} +size_t srv__check_list_pool_resp__pack + (const Srv__CheckListPoolResp *message, + uint8_t *out) +{ + assert(message->base.descriptor == &srv__check_list_pool_resp__descriptor); + return protobuf_c_message_pack ((const ProtobufCMessage*)message, out); +} +size_t srv__check_list_pool_resp__pack_to_buffer + (const Srv__CheckListPoolResp *message, + ProtobufCBuffer *buffer) +{ + assert(message->base.descriptor == &srv__check_list_pool_resp__descriptor); + return protobuf_c_message_pack_to_buffer ((const ProtobufCMessage*)message, buffer); +} +Srv__CheckListPoolResp * + srv__check_list_pool_resp__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data) +{ + return (Srv__CheckListPoolResp *) + protobuf_c_message_unpack (&srv__check_list_pool_resp__descriptor, + allocator, len, data); +} +void srv__check_list_pool_resp__free_unpacked + (Srv__CheckListPoolResp *message, + ProtobufCAllocator *allocator) +{ + if(!message) + return; + assert(message->base.descriptor == &srv__check_list_pool_resp__descriptor); + protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator); +} +void srv__check_reg_pool_req__init + (Srv__CheckRegPoolReq *message) +{ + static const Srv__CheckRegPoolReq init_value = SRV__CHECK_REG_POOL_REQ__INIT; + *message = init_value; +} +size_t srv__check_reg_pool_req__get_packed_size + (const Srv__CheckRegPoolReq *message) +{ + assert(message->base.descriptor == &srv__check_reg_pool_req__descriptor); + return protobuf_c_message_get_packed_size ((const ProtobufCMessage*)(message)); +} +size_t srv__check_reg_pool_req__pack + (const Srv__CheckRegPoolReq *message, + uint8_t *out) +{ + assert(message->base.descriptor == &srv__check_reg_pool_req__descriptor); + return protobuf_c_message_pack ((const ProtobufCMessage*)message, out); +} +size_t srv__check_reg_pool_req__pack_to_buffer + (const Srv__CheckRegPoolReq *message, + ProtobufCBuffer *buffer) +{ + assert(message->base.descriptor == &srv__check_reg_pool_req__descriptor); + return protobuf_c_message_pack_to_buffer ((const ProtobufCMessage*)message, buffer); +} +Srv__CheckRegPoolReq * + srv__check_reg_pool_req__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data) +{ + return (Srv__CheckRegPoolReq *) + protobuf_c_message_unpack (&srv__check_reg_pool_req__descriptor, + allocator, len, data); +} +void srv__check_reg_pool_req__free_unpacked + (Srv__CheckRegPoolReq *message, + ProtobufCAllocator *allocator) +{ + if(!message) + return; + assert(message->base.descriptor == &srv__check_reg_pool_req__descriptor); + protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator); +} +void srv__check_reg_pool_resp__init + (Srv__CheckRegPoolResp *message) +{ + static const Srv__CheckRegPoolResp init_value = SRV__CHECK_REG_POOL_RESP__INIT; + *message = init_value; +} +size_t srv__check_reg_pool_resp__get_packed_size + (const Srv__CheckRegPoolResp *message) +{ + assert(message->base.descriptor == &srv__check_reg_pool_resp__descriptor); + return protobuf_c_message_get_packed_size ((const ProtobufCMessage*)(message)); +} +size_t srv__check_reg_pool_resp__pack + (const Srv__CheckRegPoolResp *message, + uint8_t *out) +{ + assert(message->base.descriptor == &srv__check_reg_pool_resp__descriptor); + return protobuf_c_message_pack ((const ProtobufCMessage*)message, out); +} +size_t srv__check_reg_pool_resp__pack_to_buffer + (const Srv__CheckRegPoolResp *message, + ProtobufCBuffer *buffer) +{ + assert(message->base.descriptor == &srv__check_reg_pool_resp__descriptor); + return protobuf_c_message_pack_to_buffer ((const ProtobufCMessage*)message, buffer); +} +Srv__CheckRegPoolResp * + srv__check_reg_pool_resp__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data) +{ + return (Srv__CheckRegPoolResp *) + protobuf_c_message_unpack (&srv__check_reg_pool_resp__descriptor, + allocator, len, data); +} +void srv__check_reg_pool_resp__free_unpacked + (Srv__CheckRegPoolResp *message, + ProtobufCAllocator *allocator) +{ + if(!message) + return; + assert(message->base.descriptor == &srv__check_reg_pool_resp__descriptor); + protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator); +} +void srv__check_dereg_pool_req__init + (Srv__CheckDeregPoolReq *message) +{ + static const Srv__CheckDeregPoolReq init_value = SRV__CHECK_DEREG_POOL_REQ__INIT; + *message = init_value; +} +size_t srv__check_dereg_pool_req__get_packed_size + (const Srv__CheckDeregPoolReq *message) +{ + assert(message->base.descriptor == &srv__check_dereg_pool_req__descriptor); + return protobuf_c_message_get_packed_size ((const ProtobufCMessage*)(message)); +} +size_t srv__check_dereg_pool_req__pack + (const Srv__CheckDeregPoolReq *message, + uint8_t *out) +{ + assert(message->base.descriptor == &srv__check_dereg_pool_req__descriptor); + return protobuf_c_message_pack ((const ProtobufCMessage*)message, out); +} +size_t srv__check_dereg_pool_req__pack_to_buffer + (const Srv__CheckDeregPoolReq *message, + ProtobufCBuffer *buffer) +{ + assert(message->base.descriptor == &srv__check_dereg_pool_req__descriptor); + return protobuf_c_message_pack_to_buffer ((const ProtobufCMessage*)message, buffer); +} +Srv__CheckDeregPoolReq * + srv__check_dereg_pool_req__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data) +{ + return (Srv__CheckDeregPoolReq *) + protobuf_c_message_unpack (&srv__check_dereg_pool_req__descriptor, + allocator, len, data); +} +void srv__check_dereg_pool_req__free_unpacked + (Srv__CheckDeregPoolReq *message, + ProtobufCAllocator *allocator) +{ + if(!message) + return; + assert(message->base.descriptor == &srv__check_dereg_pool_req__descriptor); + protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator); +} +void srv__check_dereg_pool_resp__init + (Srv__CheckDeregPoolResp *message) +{ + static const Srv__CheckDeregPoolResp init_value = SRV__CHECK_DEREG_POOL_RESP__INIT; + *message = init_value; +} +size_t srv__check_dereg_pool_resp__get_packed_size + (const Srv__CheckDeregPoolResp *message) +{ + assert(message->base.descriptor == &srv__check_dereg_pool_resp__descriptor); + return protobuf_c_message_get_packed_size ((const ProtobufCMessage*)(message)); +} +size_t srv__check_dereg_pool_resp__pack + (const Srv__CheckDeregPoolResp *message, + uint8_t *out) +{ + assert(message->base.descriptor == &srv__check_dereg_pool_resp__descriptor); + return protobuf_c_message_pack ((const ProtobufCMessage*)message, out); +} +size_t srv__check_dereg_pool_resp__pack_to_buffer + (const Srv__CheckDeregPoolResp *message, + ProtobufCBuffer *buffer) +{ + assert(message->base.descriptor == &srv__check_dereg_pool_resp__descriptor); + return protobuf_c_message_pack_to_buffer ((const ProtobufCMessage*)message, buffer); +} +Srv__CheckDeregPoolResp * + srv__check_dereg_pool_resp__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data) +{ + return (Srv__CheckDeregPoolResp *) + protobuf_c_message_unpack (&srv__check_dereg_pool_resp__descriptor, + allocator, len, data); +} +void srv__check_dereg_pool_resp__free_unpacked + (Srv__CheckDeregPoolResp *message, + ProtobufCAllocator *allocator) +{ + if(!message) + return; + assert(message->base.descriptor == &srv__check_dereg_pool_resp__descriptor); + protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator); +} +void srv__check_report_req__init + (Srv__CheckReportReq *message) +{ + static const Srv__CheckReportReq init_value = SRV__CHECK_REPORT_REQ__INIT; + *message = init_value; +} +size_t srv__check_report_req__get_packed_size + (const Srv__CheckReportReq *message) +{ + assert(message->base.descriptor == &srv__check_report_req__descriptor); + return protobuf_c_message_get_packed_size ((const ProtobufCMessage*)(message)); +} +size_t srv__check_report_req__pack + (const Srv__CheckReportReq *message, + uint8_t *out) +{ + assert(message->base.descriptor == &srv__check_report_req__descriptor); + return protobuf_c_message_pack ((const ProtobufCMessage*)message, out); +} +size_t srv__check_report_req__pack_to_buffer + (const Srv__CheckReportReq *message, + ProtobufCBuffer *buffer) +{ + assert(message->base.descriptor == &srv__check_report_req__descriptor); + return protobuf_c_message_pack_to_buffer ((const ProtobufCMessage*)message, buffer); +} +Srv__CheckReportReq * + srv__check_report_req__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data) +{ + return (Srv__CheckReportReq *) + protobuf_c_message_unpack (&srv__check_report_req__descriptor, + allocator, len, data); +} +void srv__check_report_req__free_unpacked + (Srv__CheckReportReq *message, + ProtobufCAllocator *allocator) +{ + if(!message) + return; + assert(message->base.descriptor == &srv__check_report_req__descriptor); + protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator); +} +void srv__check_report_resp__init + (Srv__CheckReportResp *message) +{ + static const Srv__CheckReportResp init_value = SRV__CHECK_REPORT_RESP__INIT; + *message = init_value; +} +size_t srv__check_report_resp__get_packed_size + (const Srv__CheckReportResp *message) +{ + assert(message->base.descriptor == &srv__check_report_resp__descriptor); + return protobuf_c_message_get_packed_size ((const ProtobufCMessage*)(message)); +} +size_t srv__check_report_resp__pack + (const Srv__CheckReportResp *message, + uint8_t *out) +{ + assert(message->base.descriptor == &srv__check_report_resp__descriptor); + return protobuf_c_message_pack ((const ProtobufCMessage*)message, out); +} +size_t srv__check_report_resp__pack_to_buffer + (const Srv__CheckReportResp *message, + ProtobufCBuffer *buffer) +{ + assert(message->base.descriptor == &srv__check_report_resp__descriptor); + return protobuf_c_message_pack_to_buffer ((const ProtobufCMessage*)message, buffer); +} +Srv__CheckReportResp * + srv__check_report_resp__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data) +{ + return (Srv__CheckReportResp *) + protobuf_c_message_unpack (&srv__check_report_resp__descriptor, + allocator, len, data); +} +void srv__check_report_resp__free_unpacked + (Srv__CheckReportResp *message, + ProtobufCAllocator *allocator) +{ + if(!message) + return; + assert(message->base.descriptor == &srv__check_report_resp__descriptor); + protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator); +} +static const ProtobufCFieldDescriptor srv__notify_ready_req__field_descriptors[9] = { { "uri", @@ -330,8 +696,21 @@ static const ProtobufCFieldDescriptor srv__notify_ready_req__field_descriptors[8 0, /* flags */ 0,NULL,NULL /* reserved1,reserved2, etc */ }, + { + "check_mode", + 9, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_BOOL, + 0, /* quantifier_offset */ + offsetof(Srv__NotifyReadyReq, check_mode), + NULL, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, }; static const unsigned srv__notify_ready_req__field_indices_by_name[] = { + 8, /* field[8] = check_mode */ 2, /* field[2] = drpcListenerSock */ 5, /* field[5] = incarnation */ 3, /* field[3] = instanceIdx */ @@ -344,7 +723,7 @@ static const unsigned srv__notify_ready_req__field_indices_by_name[] = { static const ProtobufCIntRange srv__notify_ready_req__number_ranges[1 + 1] = { { 1, 0 }, - { 0, 8 } + { 0, 9 } }; const ProtobufCMessageDescriptor srv__notify_ready_req__descriptor = { @@ -354,7 +733,7 @@ const ProtobufCMessageDescriptor srv__notify_ready_req__descriptor = "Srv__NotifyReadyReq", "srv", sizeof(Srv__NotifyReadyReq), - 8, + 9, srv__notify_ready_req__field_descriptors, srv__notify_ready_req__field_indices_by_name, 1, srv__notify_ready_req__number_ranges, @@ -552,3 +931,416 @@ const ProtobufCMessageDescriptor srv__pool_find_by_label_resp__descriptor = (ProtobufCMessageInit) srv__pool_find_by_label_resp__init, NULL,NULL,NULL /* reserved[123] */ }; +#define srv__check_list_pool_req__field_descriptors NULL +#define srv__check_list_pool_req__field_indices_by_name NULL +#define srv__check_list_pool_req__number_ranges NULL +const ProtobufCMessageDescriptor srv__check_list_pool_req__descriptor = +{ + PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, + "srv.CheckListPoolReq", + "CheckListPoolReq", + "Srv__CheckListPoolReq", + "srv", + sizeof(Srv__CheckListPoolReq), + 0, + srv__check_list_pool_req__field_descriptors, + srv__check_list_pool_req__field_indices_by_name, + 0, srv__check_list_pool_req__number_ranges, + (ProtobufCMessageInit) srv__check_list_pool_req__init, + NULL,NULL,NULL /* reserved[123] */ +}; +static const ProtobufCFieldDescriptor srv__check_list_pool_resp__one_pool__field_descriptors[3] = +{ + { + "uuid", + 1, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_STRING, + 0, /* quantifier_offset */ + offsetof(Srv__CheckListPoolResp__OnePool, uuid), + NULL, + &protobuf_c_empty_string, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "label", + 2, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_STRING, + 0, /* quantifier_offset */ + offsetof(Srv__CheckListPoolResp__OnePool, label), + NULL, + &protobuf_c_empty_string, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "svcreps", + 3, + PROTOBUF_C_LABEL_REPEATED, + PROTOBUF_C_TYPE_UINT32, + offsetof(Srv__CheckListPoolResp__OnePool, n_svcreps), + offsetof(Srv__CheckListPoolResp__OnePool, svcreps), + NULL, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, +}; +static const unsigned srv__check_list_pool_resp__one_pool__field_indices_by_name[] = { + 1, /* field[1] = label */ + 2, /* field[2] = svcreps */ + 0, /* field[0] = uuid */ +}; +static const ProtobufCIntRange srv__check_list_pool_resp__one_pool__number_ranges[1 + 1] = +{ + { 1, 0 }, + { 0, 3 } +}; +const ProtobufCMessageDescriptor srv__check_list_pool_resp__one_pool__descriptor = +{ + PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, + "srv.CheckListPoolResp.OnePool", + "OnePool", + "Srv__CheckListPoolResp__OnePool", + "srv", + sizeof(Srv__CheckListPoolResp__OnePool), + 3, + srv__check_list_pool_resp__one_pool__field_descriptors, + srv__check_list_pool_resp__one_pool__field_indices_by_name, + 1, srv__check_list_pool_resp__one_pool__number_ranges, + (ProtobufCMessageInit) srv__check_list_pool_resp__one_pool__init, + NULL,NULL,NULL /* reserved[123] */ +}; +static const ProtobufCFieldDescriptor srv__check_list_pool_resp__field_descriptors[2] = +{ + { + "status", + 1, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_INT32, + 0, /* quantifier_offset */ + offsetof(Srv__CheckListPoolResp, status), + NULL, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "pools", + 2, + PROTOBUF_C_LABEL_REPEATED, + PROTOBUF_C_TYPE_MESSAGE, + offsetof(Srv__CheckListPoolResp, n_pools), + offsetof(Srv__CheckListPoolResp, pools), + &srv__check_list_pool_resp__one_pool__descriptor, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, +}; +static const unsigned srv__check_list_pool_resp__field_indices_by_name[] = { + 1, /* field[1] = pools */ + 0, /* field[0] = status */ +}; +static const ProtobufCIntRange srv__check_list_pool_resp__number_ranges[1 + 1] = +{ + { 1, 0 }, + { 0, 2 } +}; +const ProtobufCMessageDescriptor srv__check_list_pool_resp__descriptor = +{ + PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, + "srv.CheckListPoolResp", + "CheckListPoolResp", + "Srv__CheckListPoolResp", + "srv", + sizeof(Srv__CheckListPoolResp), + 2, + srv__check_list_pool_resp__field_descriptors, + srv__check_list_pool_resp__field_indices_by_name, + 1, srv__check_list_pool_resp__number_ranges, + (ProtobufCMessageInit) srv__check_list_pool_resp__init, + NULL,NULL,NULL /* reserved[123] */ +}; +static const ProtobufCFieldDescriptor srv__check_reg_pool_req__field_descriptors[4] = +{ + { + "seq", + 1, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_UINT64, + 0, /* quantifier_offset */ + offsetof(Srv__CheckRegPoolReq, seq), + NULL, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "uuid", + 2, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_STRING, + 0, /* quantifier_offset */ + offsetof(Srv__CheckRegPoolReq, uuid), + NULL, + &protobuf_c_empty_string, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "label", + 3, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_STRING, + 0, /* quantifier_offset */ + offsetof(Srv__CheckRegPoolReq, label), + NULL, + &protobuf_c_empty_string, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "svcreps", + 4, + PROTOBUF_C_LABEL_REPEATED, + PROTOBUF_C_TYPE_UINT32, + offsetof(Srv__CheckRegPoolReq, n_svcreps), + offsetof(Srv__CheckRegPoolReq, svcreps), + NULL, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, +}; +static const unsigned srv__check_reg_pool_req__field_indices_by_name[] = { + 2, /* field[2] = label */ + 0, /* field[0] = seq */ + 3, /* field[3] = svcreps */ + 1, /* field[1] = uuid */ +}; +static const ProtobufCIntRange srv__check_reg_pool_req__number_ranges[1 + 1] = +{ + { 1, 0 }, + { 0, 4 } +}; +const ProtobufCMessageDescriptor srv__check_reg_pool_req__descriptor = +{ + PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, + "srv.CheckRegPoolReq", + "CheckRegPoolReq", + "Srv__CheckRegPoolReq", + "srv", + sizeof(Srv__CheckRegPoolReq), + 4, + srv__check_reg_pool_req__field_descriptors, + srv__check_reg_pool_req__field_indices_by_name, + 1, srv__check_reg_pool_req__number_ranges, + (ProtobufCMessageInit) srv__check_reg_pool_req__init, + NULL,NULL,NULL /* reserved[123] */ +}; +static const ProtobufCFieldDescriptor srv__check_reg_pool_resp__field_descriptors[1] = +{ + { + "status", + 1, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_INT32, + 0, /* quantifier_offset */ + offsetof(Srv__CheckRegPoolResp, status), + NULL, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, +}; +static const unsigned srv__check_reg_pool_resp__field_indices_by_name[] = { + 0, /* field[0] = status */ +}; +static const ProtobufCIntRange srv__check_reg_pool_resp__number_ranges[1 + 1] = +{ + { 1, 0 }, + { 0, 1 } +}; +const ProtobufCMessageDescriptor srv__check_reg_pool_resp__descriptor = +{ + PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, + "srv.CheckRegPoolResp", + "CheckRegPoolResp", + "Srv__CheckRegPoolResp", + "srv", + sizeof(Srv__CheckRegPoolResp), + 1, + srv__check_reg_pool_resp__field_descriptors, + srv__check_reg_pool_resp__field_indices_by_name, + 1, srv__check_reg_pool_resp__number_ranges, + (ProtobufCMessageInit) srv__check_reg_pool_resp__init, + NULL,NULL,NULL /* reserved[123] */ +}; +static const ProtobufCFieldDescriptor srv__check_dereg_pool_req__field_descriptors[2] = +{ + { + "seq", + 1, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_UINT64, + 0, /* quantifier_offset */ + offsetof(Srv__CheckDeregPoolReq, seq), + NULL, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "uuid", + 2, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_STRING, + 0, /* quantifier_offset */ + offsetof(Srv__CheckDeregPoolReq, uuid), + NULL, + &protobuf_c_empty_string, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, +}; +static const unsigned srv__check_dereg_pool_req__field_indices_by_name[] = { + 0, /* field[0] = seq */ + 1, /* field[1] = uuid */ +}; +static const ProtobufCIntRange srv__check_dereg_pool_req__number_ranges[1 + 1] = +{ + { 1, 0 }, + { 0, 2 } +}; +const ProtobufCMessageDescriptor srv__check_dereg_pool_req__descriptor = +{ + PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, + "srv.CheckDeregPoolReq", + "CheckDeregPoolReq", + "Srv__CheckDeregPoolReq", + "srv", + sizeof(Srv__CheckDeregPoolReq), + 2, + srv__check_dereg_pool_req__field_descriptors, + srv__check_dereg_pool_req__field_indices_by_name, + 1, srv__check_dereg_pool_req__number_ranges, + (ProtobufCMessageInit) srv__check_dereg_pool_req__init, + NULL,NULL,NULL /* reserved[123] */ +}; +static const ProtobufCFieldDescriptor srv__check_dereg_pool_resp__field_descriptors[1] = +{ + { + "status", + 1, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_INT32, + 0, /* quantifier_offset */ + offsetof(Srv__CheckDeregPoolResp, status), + NULL, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, +}; +static const unsigned srv__check_dereg_pool_resp__field_indices_by_name[] = { + 0, /* field[0] = status */ +}; +static const ProtobufCIntRange srv__check_dereg_pool_resp__number_ranges[1 + 1] = +{ + { 1, 0 }, + { 0, 1 } +}; +const ProtobufCMessageDescriptor srv__check_dereg_pool_resp__descriptor = +{ + PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, + "srv.CheckDeregPoolResp", + "CheckDeregPoolResp", + "Srv__CheckDeregPoolResp", + "srv", + sizeof(Srv__CheckDeregPoolResp), + 1, + srv__check_dereg_pool_resp__field_descriptors, + srv__check_dereg_pool_resp__field_indices_by_name, + 1, srv__check_dereg_pool_resp__number_ranges, + (ProtobufCMessageInit) srv__check_dereg_pool_resp__init, + NULL,NULL,NULL /* reserved[123] */ +}; +static const ProtobufCFieldDescriptor srv__check_report_req__field_descriptors[1] = +{ + { + "report", + 1, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_MESSAGE, + 0, /* quantifier_offset */ + offsetof(Srv__CheckReportReq, report), + &chk__check_report__descriptor, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, +}; +static const unsigned srv__check_report_req__field_indices_by_name[] = { + 0, /* field[0] = report */ +}; +static const ProtobufCIntRange srv__check_report_req__number_ranges[1 + 1] = +{ + { 1, 0 }, + { 0, 1 } +}; +const ProtobufCMessageDescriptor srv__check_report_req__descriptor = +{ + PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, + "srv.CheckReportReq", + "CheckReportReq", + "Srv__CheckReportReq", + "srv", + sizeof(Srv__CheckReportReq), + 1, + srv__check_report_req__field_descriptors, + srv__check_report_req__field_indices_by_name, + 1, srv__check_report_req__number_ranges, + (ProtobufCMessageInit) srv__check_report_req__init, + NULL,NULL,NULL /* reserved[123] */ +}; +static const ProtobufCFieldDescriptor srv__check_report_resp__field_descriptors[1] = +{ + { + "status", + 1, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_INT32, + 0, /* quantifier_offset */ + offsetof(Srv__CheckReportResp, status), + NULL, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, +}; +static const unsigned srv__check_report_resp__field_indices_by_name[] = { + 0, /* field[0] = status */ +}; +static const ProtobufCIntRange srv__check_report_resp__number_ranges[1 + 1] = +{ + { 1, 0 }, + { 0, 1 } +}; +const ProtobufCMessageDescriptor srv__check_report_resp__descriptor = +{ + PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, + "srv.CheckReportResp", + "CheckReportResp", + "Srv__CheckReportResp", + "srv", + sizeof(Srv__CheckReportResp), + 1, + srv__check_report_resp__field_descriptors, + srv__check_report_resp__field_indices_by_name, + 1, srv__check_report_resp__number_ranges, + (ProtobufCMessageInit) srv__check_report_resp__init, + NULL,NULL,NULL /* reserved[123] */ +}; diff --git a/src/engine/srv.pb-c.h b/src/engine/srv.pb-c.h index 2f79787a375..a8518700f7b 100644 --- a/src/engine/srv.pb-c.h +++ b/src/engine/srv.pb-c.h @@ -14,12 +14,22 @@ PROTOBUF_C__BEGIN_DECLS # error This file was generated by an older version of protoc-c which is incompatible with your libprotobuf-c headers. Please regenerate this file with a newer version of protoc-c. #endif +#include "chk/chk.pb-c.h" typedef struct _Srv__NotifyReadyReq Srv__NotifyReadyReq; typedef struct _Srv__GetPoolSvcReq Srv__GetPoolSvcReq; typedef struct _Srv__GetPoolSvcResp Srv__GetPoolSvcResp; typedef struct _Srv__PoolFindByLabelReq Srv__PoolFindByLabelReq; typedef struct _Srv__PoolFindByLabelResp Srv__PoolFindByLabelResp; +typedef struct _Srv__CheckListPoolReq Srv__CheckListPoolReq; +typedef struct _Srv__CheckListPoolResp Srv__CheckListPoolResp; +typedef struct _Srv__CheckListPoolResp__OnePool Srv__CheckListPoolResp__OnePool; +typedef struct _Srv__CheckRegPoolReq Srv__CheckRegPoolReq; +typedef struct _Srv__CheckRegPoolResp Srv__CheckRegPoolResp; +typedef struct _Srv__CheckDeregPoolReq Srv__CheckDeregPoolReq; +typedef struct _Srv__CheckDeregPoolResp Srv__CheckDeregPoolResp; +typedef struct _Srv__CheckReportReq Srv__CheckReportReq; +typedef struct _Srv__CheckReportResp Srv__CheckReportResp; /* --- enums --- */ @@ -64,10 +74,14 @@ struct _Srv__NotifyReadyReq */ size_t n_secondarynctxs; uint32_t *secondarynctxs; + /* + * True if engine started in checker mode + */ + protobuf_c_boolean check_mode; }; #define SRV__NOTIFY_READY_REQ__INIT \ { PROTOBUF_C_MESSAGE_INIT (&srv__notify_ready_req__descriptor) \ - , (char *)protobuf_c_empty_string, 0, (char *)protobuf_c_empty_string, 0, 0, 0, 0,NULL, 0,NULL } + , (char *)protobuf_c_empty_string, 0, (char *)protobuf_c_empty_string, 0, 0, 0, 0,NULL, 0,NULL, 0 } struct _Srv__GetPoolSvcReq @@ -136,6 +150,159 @@ struct _Srv__PoolFindByLabelResp , 0, (char *)protobuf_c_empty_string, 0,NULL } +/* + * List all the known pools from MS. + */ +struct _Srv__CheckListPoolReq +{ + ProtobufCMessage base; +}; +#define SRV__CHECK_LIST_POOL_REQ__INIT \ + { PROTOBUF_C_MESSAGE_INIT (&srv__check_list_pool_req__descriptor) \ + } + + +struct _Srv__CheckListPoolResp__OnePool +{ + ProtobufCMessage base; + /* + * Pool UUID. + */ + char *uuid; + /* + * Pool label. + */ + char *label; + /* + * Pool service replica ranks. + */ + size_t n_svcreps; + uint32_t *svcreps; +}; +#define SRV__CHECK_LIST_POOL_RESP__ONE_POOL__INIT \ + { PROTOBUF_C_MESSAGE_INIT (&srv__check_list_pool_resp__one_pool__descriptor) \ + , (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, 0,NULL } + + +struct _Srv__CheckListPoolResp +{ + ProtobufCMessage base; + /* + * DAOS error code. + */ + int32_t status; + /* + * The list of pools. + */ + size_t n_pools; + Srv__CheckListPoolResp__OnePool **pools; +}; +#define SRV__CHECK_LIST_POOL_RESP__INIT \ + { PROTOBUF_C_MESSAGE_INIT (&srv__check_list_pool_resp__descriptor) \ + , 0, 0,NULL } + + +/* + * Register pool to MS. + */ +struct _Srv__CheckRegPoolReq +{ + ProtobufCMessage base; + /* + * DAOS Check event sequence, unique for the instance. + */ + uint64_t seq; + /* + * Pool UUID. + */ + char *uuid; + /* + * Pool label. + */ + char *label; + /* + * Pool service replica ranks. + */ + size_t n_svcreps; + uint32_t *svcreps; +}; +#define SRV__CHECK_REG_POOL_REQ__INIT \ + { PROTOBUF_C_MESSAGE_INIT (&srv__check_reg_pool_req__descriptor) \ + , 0, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, 0,NULL } + + +struct _Srv__CheckRegPoolResp +{ + ProtobufCMessage base; + /* + * DAOS error code. + */ + int32_t status; +}; +#define SRV__CHECK_REG_POOL_RESP__INIT \ + { PROTOBUF_C_MESSAGE_INIT (&srv__check_reg_pool_resp__descriptor) \ + , 0 } + + +/* + * Deregister pool from MS. + */ +struct _Srv__CheckDeregPoolReq +{ + ProtobufCMessage base; + /* + * DAOS Check event sequence, unique for the instance. + */ + uint64_t seq; + /* + * The pool to be deregistered. + */ + char *uuid; +}; +#define SRV__CHECK_DEREG_POOL_REQ__INIT \ + { PROTOBUF_C_MESSAGE_INIT (&srv__check_dereg_pool_req__descriptor) \ + , 0, (char *)protobuf_c_empty_string } + + +struct _Srv__CheckDeregPoolResp +{ + ProtobufCMessage base; + /* + * DAOS error code. + */ + int32_t status; +}; +#define SRV__CHECK_DEREG_POOL_RESP__INIT \ + { PROTOBUF_C_MESSAGE_INIT (&srv__check_dereg_pool_resp__descriptor) \ + , 0 } + + +struct _Srv__CheckReportReq +{ + ProtobufCMessage base; + /* + * Report payload + */ + Chk__CheckReport *report; +}; +#define SRV__CHECK_REPORT_REQ__INIT \ + { PROTOBUF_C_MESSAGE_INIT (&srv__check_report_req__descriptor) \ + , NULL } + + +struct _Srv__CheckReportResp +{ + ProtobufCMessage base; + /* + * DAOS error code. + */ + int32_t status; +}; +#define SRV__CHECK_REPORT_RESP__INIT \ + { PROTOBUF_C_MESSAGE_INIT (&srv__check_report_resp__descriptor) \ + , 0 } + + /* Srv__NotifyReadyReq methods */ void srv__notify_ready_req__init (Srv__NotifyReadyReq *message); @@ -231,6 +398,161 @@ Srv__PoolFindByLabelResp * void srv__pool_find_by_label_resp__free_unpacked (Srv__PoolFindByLabelResp *message, ProtobufCAllocator *allocator); +/* Srv__CheckListPoolReq methods */ +void srv__check_list_pool_req__init + (Srv__CheckListPoolReq *message); +size_t srv__check_list_pool_req__get_packed_size + (const Srv__CheckListPoolReq *message); +size_t srv__check_list_pool_req__pack + (const Srv__CheckListPoolReq *message, + uint8_t *out); +size_t srv__check_list_pool_req__pack_to_buffer + (const Srv__CheckListPoolReq *message, + ProtobufCBuffer *buffer); +Srv__CheckListPoolReq * + srv__check_list_pool_req__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data); +void srv__check_list_pool_req__free_unpacked + (Srv__CheckListPoolReq *message, + ProtobufCAllocator *allocator); +/* Srv__CheckListPoolResp__OnePool methods */ +void srv__check_list_pool_resp__one_pool__init + (Srv__CheckListPoolResp__OnePool *message); +/* Srv__CheckListPoolResp methods */ +void srv__check_list_pool_resp__init + (Srv__CheckListPoolResp *message); +size_t srv__check_list_pool_resp__get_packed_size + (const Srv__CheckListPoolResp *message); +size_t srv__check_list_pool_resp__pack + (const Srv__CheckListPoolResp *message, + uint8_t *out); +size_t srv__check_list_pool_resp__pack_to_buffer + (const Srv__CheckListPoolResp *message, + ProtobufCBuffer *buffer); +Srv__CheckListPoolResp * + srv__check_list_pool_resp__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data); +void srv__check_list_pool_resp__free_unpacked + (Srv__CheckListPoolResp *message, + ProtobufCAllocator *allocator); +/* Srv__CheckRegPoolReq methods */ +void srv__check_reg_pool_req__init + (Srv__CheckRegPoolReq *message); +size_t srv__check_reg_pool_req__get_packed_size + (const Srv__CheckRegPoolReq *message); +size_t srv__check_reg_pool_req__pack + (const Srv__CheckRegPoolReq *message, + uint8_t *out); +size_t srv__check_reg_pool_req__pack_to_buffer + (const Srv__CheckRegPoolReq *message, + ProtobufCBuffer *buffer); +Srv__CheckRegPoolReq * + srv__check_reg_pool_req__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data); +void srv__check_reg_pool_req__free_unpacked + (Srv__CheckRegPoolReq *message, + ProtobufCAllocator *allocator); +/* Srv__CheckRegPoolResp methods */ +void srv__check_reg_pool_resp__init + (Srv__CheckRegPoolResp *message); +size_t srv__check_reg_pool_resp__get_packed_size + (const Srv__CheckRegPoolResp *message); +size_t srv__check_reg_pool_resp__pack + (const Srv__CheckRegPoolResp *message, + uint8_t *out); +size_t srv__check_reg_pool_resp__pack_to_buffer + (const Srv__CheckRegPoolResp *message, + ProtobufCBuffer *buffer); +Srv__CheckRegPoolResp * + srv__check_reg_pool_resp__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data); +void srv__check_reg_pool_resp__free_unpacked + (Srv__CheckRegPoolResp *message, + ProtobufCAllocator *allocator); +/* Srv__CheckDeregPoolReq methods */ +void srv__check_dereg_pool_req__init + (Srv__CheckDeregPoolReq *message); +size_t srv__check_dereg_pool_req__get_packed_size + (const Srv__CheckDeregPoolReq *message); +size_t srv__check_dereg_pool_req__pack + (const Srv__CheckDeregPoolReq *message, + uint8_t *out); +size_t srv__check_dereg_pool_req__pack_to_buffer + (const Srv__CheckDeregPoolReq *message, + ProtobufCBuffer *buffer); +Srv__CheckDeregPoolReq * + srv__check_dereg_pool_req__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data); +void srv__check_dereg_pool_req__free_unpacked + (Srv__CheckDeregPoolReq *message, + ProtobufCAllocator *allocator); +/* Srv__CheckDeregPoolResp methods */ +void srv__check_dereg_pool_resp__init + (Srv__CheckDeregPoolResp *message); +size_t srv__check_dereg_pool_resp__get_packed_size + (const Srv__CheckDeregPoolResp *message); +size_t srv__check_dereg_pool_resp__pack + (const Srv__CheckDeregPoolResp *message, + uint8_t *out); +size_t srv__check_dereg_pool_resp__pack_to_buffer + (const Srv__CheckDeregPoolResp *message, + ProtobufCBuffer *buffer); +Srv__CheckDeregPoolResp * + srv__check_dereg_pool_resp__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data); +void srv__check_dereg_pool_resp__free_unpacked + (Srv__CheckDeregPoolResp *message, + ProtobufCAllocator *allocator); +/* Srv__CheckReportReq methods */ +void srv__check_report_req__init + (Srv__CheckReportReq *message); +size_t srv__check_report_req__get_packed_size + (const Srv__CheckReportReq *message); +size_t srv__check_report_req__pack + (const Srv__CheckReportReq *message, + uint8_t *out); +size_t srv__check_report_req__pack_to_buffer + (const Srv__CheckReportReq *message, + ProtobufCBuffer *buffer); +Srv__CheckReportReq * + srv__check_report_req__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data); +void srv__check_report_req__free_unpacked + (Srv__CheckReportReq *message, + ProtobufCAllocator *allocator); +/* Srv__CheckReportResp methods */ +void srv__check_report_resp__init + (Srv__CheckReportResp *message); +size_t srv__check_report_resp__get_packed_size + (const Srv__CheckReportResp *message); +size_t srv__check_report_resp__pack + (const Srv__CheckReportResp *message, + uint8_t *out); +size_t srv__check_report_resp__pack_to_buffer + (const Srv__CheckReportResp *message, + ProtobufCBuffer *buffer); +Srv__CheckReportResp * + srv__check_report_resp__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data); +void srv__check_report_resp__free_unpacked + (Srv__CheckReportResp *message, + ProtobufCAllocator *allocator); /* --- per-message closures --- */ typedef void (*Srv__NotifyReadyReq_Closure) @@ -248,6 +570,33 @@ typedef void (*Srv__PoolFindByLabelReq_Closure) typedef void (*Srv__PoolFindByLabelResp_Closure) (const Srv__PoolFindByLabelResp *message, void *closure_data); +typedef void (*Srv__CheckListPoolReq_Closure) + (const Srv__CheckListPoolReq *message, + void *closure_data); +typedef void (*Srv__CheckListPoolResp__OnePool_Closure) + (const Srv__CheckListPoolResp__OnePool *message, + void *closure_data); +typedef void (*Srv__CheckListPoolResp_Closure) + (const Srv__CheckListPoolResp *message, + void *closure_data); +typedef void (*Srv__CheckRegPoolReq_Closure) + (const Srv__CheckRegPoolReq *message, + void *closure_data); +typedef void (*Srv__CheckRegPoolResp_Closure) + (const Srv__CheckRegPoolResp *message, + void *closure_data); +typedef void (*Srv__CheckDeregPoolReq_Closure) + (const Srv__CheckDeregPoolReq *message, + void *closure_data); +typedef void (*Srv__CheckDeregPoolResp_Closure) + (const Srv__CheckDeregPoolResp *message, + void *closure_data); +typedef void (*Srv__CheckReportReq_Closure) + (const Srv__CheckReportReq *message, + void *closure_data); +typedef void (*Srv__CheckReportResp_Closure) + (const Srv__CheckReportResp *message, + void *closure_data); /* --- services --- */ @@ -259,6 +608,15 @@ extern const ProtobufCMessageDescriptor srv__get_pool_svc_req__descriptor; extern const ProtobufCMessageDescriptor srv__get_pool_svc_resp__descriptor; extern const ProtobufCMessageDescriptor srv__pool_find_by_label_req__descriptor; extern const ProtobufCMessageDescriptor srv__pool_find_by_label_resp__descriptor; +extern const ProtobufCMessageDescriptor srv__check_list_pool_req__descriptor; +extern const ProtobufCMessageDescriptor srv__check_list_pool_resp__descriptor; +extern const ProtobufCMessageDescriptor srv__check_list_pool_resp__one_pool__descriptor; +extern const ProtobufCMessageDescriptor srv__check_reg_pool_req__descriptor; +extern const ProtobufCMessageDescriptor srv__check_reg_pool_resp__descriptor; +extern const ProtobufCMessageDescriptor srv__check_dereg_pool_req__descriptor; +extern const ProtobufCMessageDescriptor srv__check_dereg_pool_resp__descriptor; +extern const ProtobufCMessageDescriptor srv__check_report_req__descriptor; +extern const ProtobufCMessageDescriptor srv__check_report_resp__descriptor; PROTOBUF_C__END_DECLS diff --git a/src/engine/tests/SConscript b/src/engine/tests/SConscript index 4c9fa8a7dea..b5560a71b96 100644 --- a/src/engine/tests/SConscript +++ b/src/engine/tests/SConscript @@ -29,7 +29,8 @@ def scons(): Depends('drpc_client_tests', common_mock_ld_script) unit_env.d_test_program('drpc_client_tests', ['drpc_client_tests.c', drpc_test_utils, '../drpc_client.c', - '../drpc_ras.c', '../srv.pb-c.c', '../event.pb-c.c'], + '../drpc_ras.c', '../srv.pb-c.c', '../event.pb-c.c', + '../../chk/chk.pb-c.c'], LIBS=['daos_common', 'protobuf-c', 'gurt', 'cmocka', 'uuid', 'pthread', 'abt', 'cart']) diff --git a/src/engine/tests/drpc_client_tests.c b/src/engine/tests/drpc_client_tests.c index c53d1b4725d..23fd37b410f 100644 --- a/src/engine/tests/drpc_client_tests.c +++ b/src/engine/tests/drpc_client_tests.c @@ -229,7 +229,7 @@ test_drpc_verify_notify_ready(void **state) mock_valid_drpc_resp_in_recvmsg(DRPC__STATUS__SUCCESS); - assert_rc_equal(drpc_notify_ready(), 0); + assert_rc_equal(drpc_notify_ready(false), 0); /* socket was closed */ assert_int_equal(close_call_count, 1); diff --git a/src/gurt/tests/test_gurt.c b/src/gurt/tests/test_gurt.c index e9f8f435486..40f9256e724 100644 --- a/src/gurt/tests/test_gurt.c +++ b/src/gurt/tests/test_gurt.c @@ -129,13 +129,11 @@ test_d_errstr(void **state) assert_string_equal(value, "DER_UNKNOWN"); /* Check the end of the DAOS error numbers. */ - value = d_errstr(-DER_DIV_BY_ZERO); - assert_string_equal(value, "DER_DIV_BY_ZERO"); - value = d_errstr(-2047); - assert_string_equal(value, "DER_DIV_BY_ZERO"); - value = d_errstr(-(DER_DIV_BY_ZERO + 1)); - assert_string_equal(value, "DER_OVERLOAD_RETRY"); - value = d_errstr(-(DER_OVERLOAD_RETRY + 1)); + value = d_errstr(-DER_NOT_RESUME); + assert_string_equal(value, "DER_NOT_RESUME"); + value = d_errstr(-2049); + assert_string_equal(value, "DER_NOT_RESUME"); + value = d_errstr(-(DER_NOT_RESUME + 1)); assert_string_equal(value, "DER_UNKNOWN"); } diff --git a/src/include/cart/types.h b/src/include/cart/types.h index 33fad3a271e..515c8cdb923 100644 --- a/src/include/cart/types.h +++ b/src/include/cart/types.h @@ -184,7 +184,9 @@ typedef void *crt_bulk_array_t; /**< abstract bulk array handle */ /** RPC flags enumeration */ enum crt_rpc_flags { /** send CORPC to filter_ranks only */ - CRT_RPC_FLAG_FILTER_INVERT = (1U << 1) + CRT_RPC_FLAG_FILTER_INVERT = (1U << 1), + /** Do not invoke RPC handler on local node when fail to forward corpc to children. */ + CRT_RPC_FLAG_CO_FAILOUT = (1U << 2), }; struct crt_rpc; diff --git a/src/include/daos/btree.h b/src/include/daos/btree.h index 63be2132fd0..cea3833597f 100644 --- a/src/include/daos/btree.h +++ b/src/include/daos/btree.h @@ -1,5 +1,5 @@ /** - * (C) Copyright 2016-2023 Intel Corporation. + * (C) Copyright 2016-2024 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -653,8 +653,8 @@ enum { DBTREE_VOS_BEGIN = 10, DBTREE_VOS_END = DBTREE_VOS_BEGIN + 9, DBTREE_DSM_BEGIN = 20, - DBTREE_DSM_END = DBTREE_DSM_BEGIN + 9, - DBTREE_SMD_BEGIN = 30, + DBTREE_DSM_END = DBTREE_DSM_BEGIN + 19, + DBTREE_SMD_BEGIN = 40, DBTREE_SMD_END = DBTREE_SMD_BEGIN + 9, }; diff --git a/src/include/daos/btree_class.h b/src/include/daos/btree_class.h index 4f3faf5ad7a..1d9e46880bd 100644 --- a/src/include/daos/btree_class.h +++ b/src/include/daos/btree_class.h @@ -1,5 +1,5 @@ /** - * (C) Copyright 2016-2022 Intel Corporation. + * (C) Copyright 2016-2024 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -100,15 +100,36 @@ extern btr_ops_t dbtree_recx_ops; /** Integer-fixed-value pairs * - * Each key is uint64_t integer, values have fixed size for a given key. Keys ordered numerically. + * Each key is uint64_t integer, values have fixed size for + * a given key. Keys ordered numerically. */ #define DBTREE_CLASS_IFV (DBTREE_DSM_BEGIN + 8) +extern btr_ops_t dbtree_ifv_ops; /** * Used for recording object targets with sparse ranks for collective operation. The key is rank#. */ #define DBTREE_CLASS_COLL (DBTREE_DSM_BEGIN + 9) -extern btr_ops_t dbtree_ifv_ops; +/** + * DAOS check pool tree, the key is pool uuid + */ +#define DBTREE_CLASS_CHK_POOL (DBTREE_DSM_BEGIN + 10) + +/** + * DAOS check rank tree, the key is rank ID + */ +#define DBTREE_CLASS_CHK_RANK (DBTREE_DSM_BEGIN + 11) + +/** + * DAOS check pending action tree, the key is 64-bit sequence + */ +#define DBTREE_CLASS_CHK_PA (DBTREE_DSM_BEGIN + 12) + +/** + * DAOS check container tree, the key is container uuid + */ +#define DBTREE_CLASS_CHK_CONT (DBTREE_DSM_BEGIN + 13) + #endif /* __DAOS_SRV_BTREE_CLASS_H__ */ diff --git a/src/include/daos/common.h b/src/include/daos/common.h index e03c14a85dd..0865b24d5cc 100644 --- a/src/include/daos/common.h +++ b/src/include/daos/common.h @@ -497,6 +497,13 @@ int daos_sgl_processor(d_sg_list_t *sgl, bool check_buf, daos_sgl_process_cb process_cb, void *cb_args); char *daos_str_trimwhite(char *str); + +static inline bool +daos_iov_empty(d_iov_t *iov) +{ + return iov == NULL || iov->iov_buf == NULL || iov->iov_len == 0; +} + int daos_iov_copy(d_iov_t *dst, d_iov_t *src); int daos_iov_alloc(d_iov_t *iov, daos_size_t size, bool set_full); void daos_iov_free(d_iov_t *iov); @@ -545,8 +552,6 @@ void daos_iov_append(d_iov_t *iov, void *buf, uint64_t buf_len); ({ type __x = (x); type __y = (y); __x > __y ? __x : __y; }) #endif -#define DAOS_UUID_STR_SIZE 37 /* 36 + 1 for '\0' */ - /* byte swapper */ #define D_SWAP16(x) bswap_16(x) #define D_SWAP32(x) bswap_32(x) @@ -890,6 +895,17 @@ enum { #define DAOS_POOL_EVICT_FAIL (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0xa0) +#define DAOS_CHK_CONT_ORPHAN (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0xb0) +#define DAOS_CHK_CONT_BAD_LABEL (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0xb1) +#define DAOS_CHK_LEADER_BLOCK (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0xb2) +#define DAOS_CHK_LEADER_FAIL_REGPOOL (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0xb3) +#define DAOS_CHK_PS_NOTIFY_LEADER (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0xb4) +#define DAOS_CHK_PS_NOTIFY_ENGINE (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0xb5) +#define DAOS_CHK_SYNC_ORPHAN_PROCESS (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0xb6) +#define DAOS_CHK_FAIL_REPORT_POOL1 (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0xb7) +#define DAOS_CHK_FAIL_REPORT_POOL2 (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0xb8) +#define DAOS_CHK_ENGINE_DEATH (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0xb9) + /* WAL && checkpoint failure inject */ #define DAOS_WAL_NO_REPLAY (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0x100) #define DAOS_WAL_FAIL_REPLAY (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0x101) @@ -1008,6 +1024,7 @@ int crt_proc_daos_prop_t(crt_proc_t proc, crt_proc_op_t proc_op, daos_prop_t **data); int crt_proc_struct_daos_acl(crt_proc_t proc, crt_proc_op_t proc_op, struct daos_acl **data); +int crt_proc_d_sg_list_t(crt_proc_t proc, crt_proc_op_t proc_op, d_sg_list_t *p); bool daos_prop_valid(daos_prop_t *prop, bool pool, bool input); daos_prop_t *daos_prop_dup(daos_prop_t *prop, bool pool, bool input); diff --git a/src/include/daos/debug.h b/src/include/daos/debug.h index 17a4c2bdffd..bcf8834c517 100644 --- a/src/include/daos/debug.h +++ b/src/include/daos/debug.h @@ -47,6 +47,7 @@ ACTION(drpc, drpc, arg) \ ACTION(security, security, arg) \ ACTION(dtx, dtx, arg) \ + ACTION(chk, chk, arg) \ ACTION(dfuse, dfuse, arg) \ ACTION(il, il, arg) \ ACTION(csum, csum, arg) \ diff --git a/src/include/daos/drpc_modules.h b/src/include/daos/drpc_modules.h index 8d97f23d175..b7eb8233a28 100644 --- a/src/include/daos/drpc_modules.h +++ b/src/include/daos/drpc_modules.h @@ -68,6 +68,11 @@ enum drpc_mgmt_method { DRPC_METHOD_MGMT_POOL_UPGRADE = 239, DRPC_METHOD_MGMT_POOL_QUERY_TARGETS = 240, DRPC_METHOD_MGMT_LED_MANAGE = 241, + DRPC_METHOD_MGMT_CHK_START = 242, + DRPC_METHOD_MGMT_CHK_STOP = 243, + DRPC_METHOD_MGMT_CHK_QUERY = 244, + DRPC_METHOD_MGMT_CHK_PROP = 245, + DRPC_METHOD_MGMT_CHK_ACT = 246, NUM_DRPC_MGMT_METHODS /* Must be last */ }; @@ -77,6 +82,10 @@ enum drpc_srv_method { DRPC_METHOD_SRV_GET_POOL_SVC = 303, DRPC_METHOD_SRV_CLUSTER_EVENT = 304, DRPC_METHOD_SRV_POOL_FIND_BYLABEL = 305, + DRPC_METHOD_CHK_LIST_POOL = 306, + DRPC_METHOD_CHK_REG_POOL = 307, + DRPC_METHOD_CHK_DEREG_POOL = 308, + DRPC_METHOD_CHK_REPORT = 309, NUM_DRPC_SRV_METHODS /* Must be last */ }; diff --git a/src/include/daos/dtx.h b/src/include/daos/dtx.h index 1ee783a5e1f..ca719077a14 100644 --- a/src/include/daos/dtx.h +++ b/src/include/daos/dtx.h @@ -266,6 +266,7 @@ daos_dti_equal(struct dtx_id *dti0, struct dtx_id *dti1) } #define DF_DTI DF_UUID"."DF_X64 +#define DF_DTIF DF_UUIDF"."DF_X64 #define DP_DTI(dti) DP_UUID((dti)->dti_uuid), (dti)->dti_hlc enum daos_ops_intent { diff --git a/src/include/daos/pool_map.h b/src/include/daos/pool_map.h index a61a4106690..7e8069dc01c 100644 --- a/src/include/daos/pool_map.h +++ b/src/include/daos/pool_map.h @@ -63,12 +63,16 @@ enum pool_component_flags { * indicate when in status PO_COMP_ST_DOWNOUT, it is changed from * PO_COMP_ST_DOWN (rather than from PO_COMP_ST_DRAIN). */ - PO_COMPF_DOWN2OUT = 1, + PO_COMPF_DOWN2OUT = (1 << 0), /** * If the target status is UP, then it indicates the UP status is * from DOWN directly, instead of NEW and DOWNOUT. */ - PO_COMPF_DOWN2UP = 2, + PO_COMPF_DOWN2UP = (1 << 1), + /** + * The component has been processed by DAOS check, only in DRAM. + */ + PO_COMPF_CHK_DONE = (1 << 2), }; #define co_in_ver co_out_ver @@ -223,6 +227,27 @@ static inline unsigned int pool_buf_nr(size_t size) sizeof(struct pool_component); } +static inline const char * +pool_map_status2name(uint32_t status) +{ + switch (status) { + case PO_COMP_ST_UNKNOWN: + return "unknown"; + case PO_COMP_ST_NEW: + return "new"; + case PO_COMP_ST_UP: + return "up"; + case PO_COMP_ST_DOWN: + return "down"; + case PO_COMP_ST_DOWNOUT: + return "downout"; + case PO_COMP_ST_DRAIN: + return "drain"; + default: + D_ASSERTF(0, "Invalid status %u\n", status); + } +} + struct pool_map; struct pool_buf *pool_buf_alloc(unsigned int nr); @@ -246,6 +271,7 @@ void pool_map_print(struct pool_map *map); int pool_map_set_version(struct pool_map *map, uint32_t version); uint32_t pool_map_get_version(struct pool_map *map); +uint32_t pool_map_bump_version(struct pool_map *map); int pool_map_get_failed_cnt(struct pool_map *map, uint32_t domain); diff --git a/src/include/daos/rpc.h b/src/include/daos/rpc.h index b46dd7e2b76..27cf925b061 100644 --- a/src/include/daos/rpc.h +++ b/src/include/daos/rpc.h @@ -53,7 +53,8 @@ X(DAOS_SEC_MODULE, 9) /** security framework */ \ X(DAOS_DTX_MODULE, 10) /** DTX */ \ X(DAOS_PIPELINE_MODULE, 11) \ - X(DAOS_NR_MODULE, 12) /** number of defined modules */ \ + X(DAOS_CHK_MODULE, 12) /** check */ \ + X(DAOS_NR_MODULE, 13) /** number of defined modules */ \ X(DAOS_MAX_MODULE, 64) /** Size of uint64_t see dmg profile */ enum daos_module_id { @@ -114,6 +115,8 @@ enum daos_rpc_type { DAOS_REQ_SWIM, /** Per VOS target request */ DAOS_REQ_TGT, + /** The DAOS check request handled by cart, send/recv by tag 0. */ + DAOS_REQ_CHK, }; struct daos_req_comm_in { @@ -172,6 +175,7 @@ daos_rpc_tag(int req_type, int tgt_idx) case DAOS_REQ_REBUILD: case DAOS_REQ_IV: case DAOS_REQ_BCAST: + case DAOS_REQ_CHK: return 0; default: D_ASSERTF(0, "bad req_type %d.\n", req_type); diff --git a/src/include/daos/rsvc.h b/src/include/daos/rsvc.h index f9f006f6f64..95d96f5d95c 100644 --- a/src/include/daos/rsvc.h +++ b/src/include/daos/rsvc.h @@ -1,5 +1,5 @@ /* - * (C) Copyright 2017-2021 Intel Corporation. + * (C) Copyright 2017-2022 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ diff --git a/src/include/daos/tests_lib.h b/src/include/daos/tests_lib.h index 1d3960e76aa..16edb71b55d 100644 --- a/src/include/daos/tests_lib.h +++ b/src/include/daos/tests_lib.h @@ -97,6 +97,124 @@ typedef struct { int n_tgtidx; } device_list; +enum test_cr_start_flags { + TCSF_NONE = 0, + TCSF_DRYRUN = (1 << 0), + TCSF_RESET = (1 << 1), + TCSF_FAILOUT = (1 << 2), + TCSF_AUTO = (1 << 3), + TCSF_ORPHAN = (1 << 4), + TCSF_NO_FAILOUT = (1 << 5), + TCSF_NO_AUTO = (1 << 6), +}; + +enum test_cr_policy_flags { + TCPF_NONE = 0, + TCPF_RESET = (1 << 0), + TCPF_INTERACT = (1 << 1), +}; + +enum test_cr_ins_status { + TCIS_INIT = 0, + TCIS_RUNNING = 1, + TCIS_COMPLETED = 2, + TCIS_STOPPED = 3, + TCIS_FAILED = 4, + TCIS_PAUSED = 5, + TCIS_IMPLICATED = 6, +}; + +enum test_cr_pool_status { + TCPS_UNCHECKED = 0, + TCPS_CHECKING = 1, + TCPS_CHECKED = 2, + TCPS_FAILED = 3, + TCPS_PAUSED = 4, + TCPS_PENDING = 5, + TCPS_STOPPED = 6, + TCPS_IMPLICATED = 7, +}; + +enum test_cr_phase { + TCP_PREPARE = 0, + TCP_POOL_LIST = 1, + TCP_POOL_MBS = 2, + TCP_POOL_CLEANUP = 3, + TCP_CONT_LIST = 4, + TCP_CONT_CLEANUP = 5, + TCP_DTX_RESYNC = 6, + TCP_OBJ_SCRUB = 7, + TCP_REBUILD = 8, + TCP_AGGREGATION = 9, + TCP_DONE = 10, +}; + +enum test_cr_class { + TCC_NONE = 0, + TCC_POOL_LESS_SVC_WITH_QUORUM = 1, + TCC_POOL_LESS_SVC_WITHOUT_QUORUM = 2, + TCC_POOL_MORE_SVC = 3, + TCC_POOL_NONEXIST_ON_MS = 4, + TCC_POOL_NONEXIST_ON_ENGINE = 5, + TCC_POOL_BAD_SVCL = 6, + TCC_POOL_BAD_LABEL = 7, + TCC_ENGINE_NONEXIST_IN_MAP = 8, + TCC_ENGINE_DOWN_IN_MAP = 9, + TCC_ENGINE_HAS_NO_STORAGE = 10, + TCC_CONT_NONEXIST_ON_PS = 11, + TCC_CONT_BAD_LABEL = 12, + TCC_DTX_CORRUPTED = 13, + TCC_DTX_ORPHAN = 14, + TCC_CSUM_LOST = 15, + TCC_CSUM_FAILURE = 16, + TCC_OBJ_LOST_REP = 17, + TCC_OBJ_LOST_EC_SHARD = 18, + TCC_OBJ_LOST_EC_DATA = 19, + TCC_OBJ_DATA_INCONSIST = 20, + TCC_UNKNOWN = 100, +}; + +enum test_cr_action { + TCA_DEFAULT = 0, + TCA_INTERACT = 1, + TCA_IGNORE = 2, + TCA_DISCARD = 3, + TCA_READD = 4, + TCA_TRUST_MS = 5, + TCA_TRUST_PS = 6, + TCA_TRUST_TARGET = 7, + TCA_TRUST_MAJORITY = 8, + TCA_TRUST_LATEST = 9, + TCA_TRUST_OLDEST = 10, + TCA_TRUST_EC_PARITY = 11, + TCA_TRUST_EC_DATA = 12, +}; + +struct daos_check_pool_info { + uuid_t dcpi_uuid; + char *dcpi_status; + char *dcpi_phase; +}; + +struct daos_check_report_info { + uuid_t dcri_uuid; + uint64_t dcri_seq; + uint32_t dcri_class; + uint32_t dcri_act; + int dcri_result; + int dcri_option_nr; + int dcri_options[4]; +}; + +struct daos_check_info { + char *dci_status; + char *dci_phase; + int dci_pool_nr; + int dci_report_nr; + struct daos_check_pool_info *dci_pools; + struct daos_check_report_info *dci_reports; +}; + /** Initialize an SGL with a variable number of IOVs and set the IOV buffers * to the value of the strings passed. This will allocate memory for the iov * structures as well as the iov buffers, so d_sgl_fini(sgl, true) must be @@ -351,6 +469,20 @@ dmg_pool_set_prop(const char *dmg_config_file, const char *prop_name, const char *prop_value, const uuid_t pool_uuid); +/** + * Get property for the pool. + * + * \param dmg_config_file [IN] DMG config file. + * \param label [IN] The pool label, can be NULL. + * \param uuid [IN] UUID of the pool. + * \param name [IN] the name of the property. + * \param value [OUT] the value of the property. + * + * \return Zero on success, negative value if error. + */ +int dmg_pool_get_prop(const char *dmg_config_file, const char *label, const uuid_t uuid, + const char *name, char **value); + /** * List all disks in the specified DAOS system. * @@ -423,6 +555,24 @@ int dmg_system_stop_rank(const char *dmg_config_file, d_rank_t rank, int force); */ int dmg_system_start_rank(const char *dmg_config_file, d_rank_t rank); +/** + * Reintegrate a rank into the system. + * + * \param dmg_config_file + * [IN] DMG config file + * \param rank [IN] Rank to be reintegrated. + */ +int dmg_system_reint_rank(const char *dmg_config_file, d_rank_t rank); + +/** + * Exclude a rank from the system. + * + * \param dmg_config_file + * [IN] DMG config file + * \param rank [IN] Rank to be excluded. + */ +int dmg_system_exclude_rank(const char *dmg_config_file, d_rank_t rank); + const char *daos_target_state_enum_to_str(int state); /* Used to easily setup data needed for tests */ @@ -447,4 +597,95 @@ void td_init_array_values(struct test_data *td, uint32_t iod_nr, uint32_t recx_n uint32_t data_size, uint32_t chunksize); void td_destroy(struct test_data *td); +/** + * Inject specified fault to simulate some system inconsistency. + * + * \param dmg_config_file + * [IN] DMG config file. + * \param uuid [IN] The UUID for the pool for which the inconsistency to be injected. + * \param mgmt [IN] Inject fault on MS or PS + * \param fault [IN] Which inconsistency to be simulated. + * + * \return Zero on success, negative value if error. + */ +int dmg_fault_inject(const char *dmg_config_file, uuid_t uuid, bool mgmt, const char *fault); + +/** + * Switch DAOS check mode. + * + * \param dmg_config_file + * [IN] DMG config file. + * \param enable [IN] Enable or disable check mode. + * + * \return Zero on success, negative value if error. + */ +int dmg_check_switch(const char *dmg_config_file, bool enable); + +/** + * Start DAOS checker. + * + * \param dmg_config_file + * [IN] DMG config file. + * \param flags [IN] The flags to start the checker. + * \param pool_nr [IN] The count of pools to be checked. + * \param uuids [IN] The UUID list for pools on which to start the checker. + * \param policies [IN] The policies for handling detected inconsistent issues. + * + * \return Zero on success, negative value if error. + */ +int dmg_check_start(const char *dmg_config_file, uint32_t flags, uint32_t pool_nr, uuid_t uuids[], + const char *policies); + +/** + * Stop DAOS checker. + * + * \param dmg_config_file + * [IN] DMG config file. + * \param pool_nr [IN] The count of pools to stop the check. + * \param uuids [IN] The UUID list for pools on which to stop the checker. + * + * \return Zero on success, negative value if error. + */ +int dmg_check_stop(const char *dmg_config_file, uint32_t pool_nr, uuid_t uuids[]); + +/** + * Query DAOS checker. + * + * \param dmg_config_file + * [IN] DMG config file. + * \param pool_nr [IN] The count of pools to query the check. + * \param uuids [IN] The UUID list for pools on which to query the checker. + * \param dci [OUT] The query results. + * + * \return Zero on success, negative value if error. + */ +int dmg_check_query(const char *dmg_config_file, uint32_t pool_nr, uuid_t uuids[], + struct daos_check_info *dci); + +/** + * Execute the specified check repair action. + * + * \param dmg_config_file + * [IN] DMG config file. + * \param seq [IN] The sequence# of the inconsistency to be repaired. + * \param opt [IN] The option for what action to handle the inconsistency. + * \param for_all [IN] Whether the repair decision is applicable for the other issues + * with the same inconsistency class or not. + * + * \return Zero on success, negative value if error. + */ +int dmg_check_repair(const char *dmg_config_file, uint64_t seq, uint32_t opt, bool for_all); + +/** + * Set inconsistency handle policy for DAOS checker. + * + * \param dmg_config_file + * [IN] DMG config file. + * \param flags [IN] The flags for set DAOS checker policy. + * \param policies [IN] The policies to be set. + * + * \return Zero on success, negative value if error. + */ +int dmg_check_set_policy(const char *dmg_config_file, uint32_t flags, const char *policies); + #endif /* __DAOS_TESTS_LIB_H__ */ diff --git a/src/include/daos_errno.h b/src/include/daos_errno.h index f3a3cdddcdf..d8521461663 100644 --- a/src/include/daos_errno.h +++ b/src/include/daos_errno.h @@ -211,7 +211,8 @@ extern "C" { ACTION(DER_CHKPT_BUSY, Page is temporarily read only due to checkpointing) \ ACTION(DER_DIV_BY_ZERO, Division by zero) \ /** Target is overload, retry RPC */ \ - ACTION(DER_OVERLOAD_RETRY, "retry later because of overloaded service") + ACTION(DER_OVERLOAD_RETRY, "retry later because of overloaded service") \ + ACTION(DER_NOT_RESUME, Cannot resume former DAOS check instance) /** Defines the gurt error codes */ #define D_FOREACH_ERR_RANGE(ACTION) \ diff --git a/src/include/daos_prop.h b/src/include/daos_prop.h index e3dc9d918a7..85826018ab9 100644 --- a/src/include/daos_prop.h +++ b/src/include/daos_prop.h @@ -610,28 +610,8 @@ daos_label_is_valid(const char *label) } /** Check to see if it could be a valid UUID */ - if (maybe_uuid && strnlen(label, 36) == 36) { - bool is_uuid = true; - const char *p; - - /** Implement the check directly to avoid uuid_parse() overhead */ - for (i = 0, p = label; i < 36; i++, p++) { - if (i == 8 || i == 13 || i == 18 || i == 23) { - if (*p != '-') { - is_uuid = false; - break; - } - continue; - } - if (!isxdigit(*p)) { - is_uuid = false; - break; - } - } - - if (is_uuid) - return false; - } + if (maybe_uuid && daos_is_valid_uuid_string(label)) + return false; return true; } @@ -639,6 +619,15 @@ daos_label_is_valid(const char *label) /* default data threshold size of 4KiB */ #define DAOS_PROP_PO_DATA_THRESH_DEFAULT (1UL << 12) +/* For the case of no label is set for the pool. */ +#define DAOS_PROP_NO_PO_LABEL "pool_label_not_set" + +/* Default container label */ +#define DEFAULT_CONT_LABEL "container_label_not_set" + +/* For the case of no label is set for the container. */ +#define DAOS_PROP_NO_CO_LABEL DEFAULT_CONT_LABEL + /** * Check if DAOS pool performance domain string is valid, string * has same requirement as label. diff --git a/src/include/daos_srv/container.h b/src/include/daos_srv/container.h index 058a896795d..c65c2054f3e 100644 --- a/src/include/daos_srv/container.h +++ b/src/include/daos_srv/container.h @@ -185,6 +185,7 @@ int ds_cont_close_by_pool_hdls(uuid_t pool_uuid, uuid_t *pool_hdls, int n_pool_hdls, crt_context_t ctx); int ds_cont_local_close(uuid_t cont_hdl_uuid); +int ds_cont_chk_post(struct ds_pool_child *pool_child); int ds_cont_child_start_all(struct ds_pool_child *pool_child); void ds_cont_child_stop_all(struct ds_pool_child *pool_child); @@ -264,5 +265,15 @@ typedef int(*cont_rdb_iter_cb_t)(uuid_t pool_uuid, uuid_t cont_uuid, struct rdb_ int ds_cont_rdb_iterate(struct cont_svc *svc, cont_rdb_iter_cb_t iter_cb, void *cb_arg); int ds_cont_rf_check(uuid_t pool_uuid, uuid_t cont_uuid, struct rdb_tx *tx); +int ds_cont_existence_check(struct cont_svc *svc, uuid_t uuid, daos_prop_t **prop); + +int ds_cont_destroy_orphan(struct cont_svc *svc, uuid_t uuid); + +int ds_cont_iterate_labels(struct cont_svc *svc, rdb_iterate_cb_t cb, void *arg); + +int ds_cont_set_label(struct cont_svc *svc, uuid_t uuid, daos_prop_t *prop_in, + daos_prop_t *prop_old, bool for_svc); + int ds_cont_fetch_ec_agg_boundary(void *ns, uuid_t cont_uuid); + #endif /* ___DAOS_SRV_CONTAINER_H_ */ diff --git a/src/include/daos_srv/daos_chk.h b/src/include/daos_srv/daos_chk.h new file mode 100644 index 00000000000..93fc2a75c9c --- /dev/null +++ b/src/include/daos_srv/daos_chk.h @@ -0,0 +1,89 @@ +/** + * (C) Copyright 2022 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ + +#ifndef __DAOS_CHK_H__ +#define __DAOS_CHK_H__ + +#include +#include + +struct chk_policy { + uint32_t cp_class; + uint32_t cp_action; +}; + +/* Time information on related component: system, pool or target. */ +struct chk_time { + /* The time of check instance being started on the component. */ + uint64_t ct_start_time; + union { + /* The time of the check instance completed, failed or stopped on the component. */ + uint64_t ct_stop_time; + /* The estimated remaining time to complete the check on the component. */ + uint64_t ct_left_time; + }; +}; + +/* Inconsistency statistics on related component: system, pool or target. */ +struct chk_statistics { + /* The count of total found inconsistency on the component. */ + uint64_t cs_total; + /* The count of repaired inconsistency on the component. */ + uint64_t cs_repaired; + /* The count of ignored inconsistency on the component. */ + uint64_t cs_ignored; + /* The count of fail to repaired inconsistency on the component. */ + uint64_t cs_failed; +}; + +struct chk_query_target { + d_rank_t cqt_rank; + uint32_t cqt_tgt; + uint32_t cqt_ins_status; + uint32_t cqt_padding; + struct chk_statistics cqt_statistics; + struct chk_time cqt_time; +}; + +struct chk_query_pool_shard { + uuid_t cqps_uuid; + uint32_t cqps_status; + uint32_t cqps_phase; + struct chk_statistics cqps_statistics; + struct chk_time cqps_time; + uint32_t cqps_rank; + uint32_t cqps_target_nr; + struct chk_query_target *cqps_targets; +}; + +struct chk_list_pool { + uuid_t clp_uuid; + char *clp_label; + d_rank_list_t *clp_svcreps; +}; + +typedef int (*chk_query_head_cb_t)(uint32_t ins_status, uint32_t ins_phase, + struct chk_statistics *inconsistency, struct chk_time *time, + size_t n_pools, void *buf); + +typedef int (*chk_query_pool_cb_t)(struct chk_query_pool_shard *shard, uint32_t idx, void *buf); + +typedef int (*chk_prop_cb_t)(void *buf, struct chk_policy *policies, int cnt, uint32_t flags); + +int chk_leader_start(uint32_t rank_nr, d_rank_t *ranks, uint32_t policy_nr, + struct chk_policy *policies, int pool_nr, uuid_t pools[], + uint32_t api_flags, int phase); + +int chk_leader_stop(int pool_nr, uuid_t pools[]); + +int chk_leader_query(int pool_nr, uuid_t pools[], chk_query_head_cb_t head_cb, + chk_query_pool_cb_t pool_cb, void *buf); + +int chk_leader_prop(chk_prop_cb_t prop_cb, void *buf); + +int chk_leader_act(uint64_t seq, uint32_t act, bool for_all); + +#endif /* __DAOS_CHK_H__ */ diff --git a/src/include/daos_srv/daos_engine.h b/src/include/daos_srv/daos_engine.h index bd7ed8e9fcc..1d2a9c2f14f 100644 --- a/src/include/daos_srv/daos_engine.h +++ b/src/include/daos_srv/daos_engine.h @@ -871,4 +871,6 @@ struct dss_chore { int dss_chore_delegate(struct dss_chore *chore, dss_chore_func_t func); void dss_chore_diy(struct dss_chore *chore, dss_chore_func_t func); +bool engine_in_check(void); + #endif /* __DSS_API_H__ */ diff --git a/src/include/daos_srv/daos_mgmt_srv.h b/src/include/daos_srv/daos_mgmt_srv.h index 7154f46db4f..d00b821442b 100644 --- a/src/include/daos_srv/daos_mgmt_srv.h +++ b/src/include/daos_srv/daos_mgmt_srv.h @@ -1,5 +1,5 @@ /** - * (C) Copyright 2016-2021 Intel Corporation. + * (C) Copyright 2016-2022 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -27,5 +27,17 @@ ds_mgmt_tgt_file(const uuid_t pool_uuid, const char *fname, int *idx, char **fpath); int ds_mgmt_tgt_pool_iterate(int (*cb)(uuid_t uuid, void *arg), void *arg); +int +ds_mgmt_newborn_pool_iterate(int (*cb)(uuid_t uuid, void *arg), void *arg); +int +ds_mgmt_zombie_pool_iterate(int (*cb)(uuid_t uuid, void *arg), void *arg); +int +ds_mgmt_pool_exist(uuid_t uuid); +int +ds_mgmt_tgt_pool_exist(uuid_t uuid, char **path); +int +ds_mgmt_tgt_pool_destroy_ranks(uuid_t pool_uuid, d_rank_list_t *ranks); +int +ds_mgmt_tgt_pool_shard_destroy(uuid_t pool_uuid, int shard_idx, d_rank_t rank); #endif /* __MGMT_SRV_H__ */ diff --git a/src/include/daos_srv/iv.h b/src/include/daos_srv/iv.h index b5ee025ea1c..b453fa121ab 100644 --- a/src/include/daos_srv/iv.h +++ b/src/include/daos_srv/iv.h @@ -295,6 +295,7 @@ enum iv_key { * other servers */ IV_CONT_AGG_EPOCH_BOUNDRY, + IV_CHK, }; int ds_iv_fetch(struct ds_iv_ns *ns, struct ds_iv_key *key, d_sg_list_t *value, @@ -310,10 +311,12 @@ int ds_iv_ns_create(crt_context_t ctx, uuid_t pool_uuid, crt_group_t *grp, unsigned int *ns_id, struct ds_iv_ns **p_iv_ns); void ds_iv_ns_update(struct ds_iv_ns *ns, unsigned int master_rank, uint64_t term); +void ds_iv_ns_cleanup(struct ds_iv_ns *ns); void ds_iv_ns_stop(struct ds_iv_ns *ns); void ds_iv_ns_leader_stop(struct ds_iv_ns *ns); void ds_iv_ns_start(struct ds_iv_ns *ns); void ds_iv_ns_put(struct ds_iv_ns *ns); +void ds_iv_ns_get(struct ds_iv_ns *ns); unsigned int ds_iv_ns_id_get(void *ns); diff --git a/src/include/daos_srv/pool.h b/src/include/daos_srv/pool.h index d4c42feaa65..131bb9ac030 100644 --- a/src/include/daos_srv/pool.h +++ b/src/include/daos_srv/pool.h @@ -1,5 +1,5 @@ /* - * (C) Copyright 2016-2023 Intel Corporation. + * (C) Copyright 2016-2024 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -24,6 +24,10 @@ #include #include #include +#include + +/* Pool service (opaque) */ +struct ds_pool_svc; /** * Each individual object layout format, like oid layout, dkey to group, @@ -173,7 +177,9 @@ struct ds_pool_child { int spc_ref; ABT_eventual spc_ref_eventual; - uint32_t spc_discard_done:1; + uint64_t spc_discard_done:1, + spc_no_storage:1; /* The pool shard has no storage. */ + uint32_t spc_reint_mode; uint32_t *spc_state; /* Pointer to ds_pool->sp_states[i] */ /** @@ -252,7 +258,7 @@ uint32_t ds_pool_child_state(uuid_t pool_uuid, uint32_t tgt_id); int ds_pool_bcast_create(crt_context_t ctx, struct ds_pool *pool, enum daos_module_id module, crt_opcode_t opcode, uint32_t version, crt_rpc_t **rpc, crt_bulk_t bulk_hdl, - d_rank_list_t *excluded_list); + d_rank_list_t *excluded_list, void *priv); int ds_pool_map_buf_get(uuid_t uuid, d_iov_t *iov, uint32_t *map_ver); @@ -265,6 +271,8 @@ int ds_pool_tgt_finish_rebuild(uuid_t pool_uuid, struct pool_target_id_list *lis int ds_pool_tgt_map_update(struct ds_pool *pool, struct pool_buf *buf, unsigned int map_version); +int ds_pool_chk_post(uuid_t uuid); +int ds_pool_start_with_svc(uuid_t uuid); int ds_pool_start(uuid_t uuid); void ds_pool_stop(uuid_t uuid); int ds_pool_extend(uuid_t pool_uuid, int ntargets, const d_rank_list_t *rank_list, int ndomains, @@ -335,7 +343,7 @@ int ds_pool_iv_srv_hdl_fetch(struct ds_pool *pool, uuid_t *pool_hdl_uuid, uuid_t *cont_hdl_uuid); int ds_pool_svc_term_get(uuid_t uuid, uint64_t *term); -int ds_pool_svc_global_map_version_get(uuid_t uuid, uint32_t *global_ver); +int ds_pool_svc_query_map_dist(uuid_t uuid, uint32_t *version, bool *idle); int ds_pool_child_map_refresh_sync(struct ds_pool_child *dpc); @@ -365,6 +373,14 @@ int ds_pool_svc_check_evict(uuid_t pool_uuid, d_rank_list_t *ranks, int ds_pool_target_status_check(struct ds_pool *pool, uint32_t id, uint8_t matched_status, struct pool_target **p_tgt); +int ds_pool_mark_connectable(struct ds_pool_svc *ds_svc); +int ds_pool_svc_load_map(struct ds_pool_svc *ds_svc, struct pool_map **map); +int ds_pool_svc_flush_map(struct ds_pool_svc *ds_svc, struct pool_map *map); +int ds_pool_svc_schedule_reconf(struct ds_pool_svc *svc); +int ds_pool_svc_update_label(struct ds_pool_svc *ds_svc, const char *label); +int ds_pool_svc_evict_all(struct ds_pool_svc *ds_svc); +struct ds_pool *ds_pool_svc2pool(struct ds_pool_svc *ds_svc); +struct cont_svc *ds_pool_ps2cs(struct ds_pool_svc *ds_svc); void ds_pool_disable_exclude(void); void ds_pool_enable_exclude(void); @@ -425,6 +441,29 @@ ds_pool_get_version(struct ds_pool *pool) return ver; } +/** + * Pool service replica clue + * + * Pool service replica info gathered when glancing at a pool. + */ +struct ds_pool_svc_clue { + struct rdb_clue psc_db_clue; + uint32_t psc_map_version; /**< if 0, empty DB replica */ +}; + +/** Pool parent directory */ +enum ds_pool_dir { + DS_POOL_DIR_NORMAL, + DS_POOL_DIR_NEWBORN, + DS_POOL_DIR_ZOMBIE +}; + +enum ds_pool_tgt_status { + DS_POOL_TGT_NONEXIST, + DS_POOL_TGT_EMPTY, + DS_POOL_TGT_NORMAL +}; + int ds_start_chkpt_ult(struct ds_pool_child *child); void @@ -432,4 +471,55 @@ void int ds_pool_lookup_hdl_cred(struct rdb_tx *tx, uuid_t pool_uuid, uuid_t pool_hdl_uuid, d_iov_t *cred); +/** + * Pool clue + * + * Pool shard and service replica (if applicable) info gathered when glancing + * at a pool. The pc_uuid, pc_dir, and pc_rc fields are always valid; the + * pc_svc_clue field is valid only if pc_rc is positive value. + */ +struct ds_pool_clue { + uuid_t pc_uuid; + d_rank_t pc_rank; + enum ds_pool_dir pc_dir; + int pc_rc; + int pc_tgt_nr; + uint32_t pc_label_len; + /* + * DAOS check phase for current pool shard. Different pool shards may claim different + * check phase because some shards may has ever missed the RPC for check phase update. + */ + uint32_t pc_phase; + struct ds_pool_svc_clue *pc_svc_clue; + char *pc_label; + uint32_t *pc_tgt_status; +}; + +void ds_pool_clue_init(uuid_t uuid, enum ds_pool_dir dir, struct ds_pool_clue *clue); +void ds_pool_clue_fini(struct ds_pool_clue *clue); + +/** Array of ds_pool_clue objects */ +struct ds_pool_clues { + struct ds_pool_clue *pcs_array; + int pcs_len; + int pcs_cap; +}; + +/** + * If this callback returns 0, the pool with \a uuid will be glanced at; + * otherwise, the pool with \a uuid will be skipped. + */ +typedef int (*ds_pool_clues_init_filter_t)(uuid_t uuid, void *arg, int *phase); + +int ds_pool_clues_init(ds_pool_clues_init_filter_t filter, void *filter_arg, + struct ds_pool_clues *clues_out); +void ds_pool_clues_fini(struct ds_pool_clues *clues); +void ds_pool_clues_print(struct ds_pool_clues *clues); + +int ds_pool_check_svc_clues(struct ds_pool_clues *clues, int *advice_out); + +int ds_pool_svc_lookup_leader(uuid_t uuid, struct ds_pool_svc **ds_svcp, struct rsvc_hint *hint); + +void ds_pool_svc_put_leader(struct ds_pool_svc *ds_svc); + #endif /* __DAOS_SRV_POOL_H__ */ diff --git a/src/include/daos_srv/ras.h b/src/include/daos_srv/ras.h index 7f67a12d8fd..21c00bd42ca 100644 --- a/src/include/daos_srv/ras.h +++ b/src/include/daos_srv/ras.h @@ -13,6 +13,7 @@ #include #include +#include #define DAOS_RAS_STR_FIELD_SIZE 128 #define DAOS_RAS_ID_FIELD_SIZE 64 @@ -232,4 +233,52 @@ ds_notify_pool_svc_update(uuid_t *pool, d_rank_list_t *svcl, uint64_t version); int ds_notify_swim_rank_dead(d_rank_t rank, uint64_t incarnation); +/** + * List all the known pools from control plane (MS). + * + * \param[out] clp The pools list. + * + * \retval Positive value for the conut of pools. + * Negative value if error. + */ +int +ds_chk_listpool_upcall(struct chk_list_pool **clp); + +/** + * Register the pool to control plane (MS). + * + * \param[in] seq DAOS Check event sequence, unique for the instance. + * \param[in] uuid The pool uuid. + * \param[in] label The pool label, optional. + * \param[in] svcreps Ranks for the pool service. + * + * \retval Zero on success, non-zero otherwise. + */ +int +ds_chk_regpool_upcall(uint64_t seq, uuid_t uuid, char *label, d_rank_list_t *svcreps); + +/** + * Deregister the pool from control plane (MS). + * + * \param[in] seq DAOS Check event sequence, unique for the instance. + * \param[in] uuid The pool uuid. + * + * \retval Zero on success, non-zero otherwise. + */ +int +ds_chk_deregpool_upcall(uint64_t seq, uuid_t uuid); + +/** + * Report inconsistency to control plane (MS). + * + * \param[in] rpt The pointer to Chk__CheckReport. + * + * \retval Zero on success, non-zero otherwise. + */ +int +ds_chk_report_upcall(void *rpt); + +void +ds_chk_free_pool_list(struct chk_list_pool *clp, uint32_t nr); + #endif /* __DAOS_RAS_H_ */ diff --git a/src/include/daos_srv/rdb.h b/src/include/daos_srv/rdb.h index 63581bf84fa..ab8e0af5f6e 100644 --- a/src/include/daos_srv/rdb.h +++ b/src/include/daos_srv/rdb.h @@ -116,6 +116,27 @@ struct rdb_storage; struct rdb_cbs; +/** + * Database clue returned by rdb_glance + * + * Since most fields expose raft/rdb internals that are not required for normal + * RDB usage, we will not attempt to explain them fully here. + */ +struct rdb_clue { + /* Raft clue */ + uint64_t bcl_term; /**< term */ + int bcl_vote; /**< vote */ + d_rank_t bcl_self; /**< self rank */ + uint64_t bcl_last_index; /**< index of last entry */ + uint64_t bcl_last_term; /**< term of last entry */ + uint64_t bcl_base_index; /**< index of base (i.e., snapshot) */ + uint64_t bcl_base_term; /**< term of base (i.e., snapshot) */ + d_rank_list_t *bcl_replicas; /**< replicas at last index */ + + /* Database clue */ + uint64_t bcl_oid_next; /**< next OID */ +}; + /** Database storage methods */ int rdb_create(const char *path, const uuid_t uuid, uint64_t caller_term, size_t size, const d_rank_list_t *replicas, struct rdb_cbs *cbs, void *arg, @@ -124,6 +145,8 @@ int rdb_open(const char *path, const uuid_t uuid, uint64_t caller_term, struct r void *arg, struct rdb_storage **storagep); void rdb_close(struct rdb_storage *storage); int rdb_destroy(const char *path, const uuid_t uuid); +int rdb_glance(struct rdb_storage *storage, struct rdb_clue *clue); +int rdb_dictate(struct rdb_storage *storage); /** Database (opaque) */ struct rdb; diff --git a/src/include/daos_srv/rsvc.h b/src/include/daos_srv/rsvc.h index aeaee9b94e5..5b8348d12c4 100644 --- a/src/include/daos_srv/rsvc.h +++ b/src/include/daos_srv/rsvc.h @@ -1,5 +1,5 @@ /* - * (C) Copyright 2019-2022 Intel Corporation. + * (C) Copyright 2019-2024 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -75,10 +75,10 @@ struct ds_rsvc_class { void (*sc_drain)(struct ds_rsvc *svc); /** - * Distribute the system/pool map in the system/pool. This callback is - * optional. + * Distribute the system/pool map in the system/pool and return its + * version. This callback is optional. */ - int (*sc_map_dist)(struct ds_rsvc *svc); + int (*sc_map_dist)(struct ds_rsvc *svc, uint32_t *version); }; void ds_rsvc_class_register(enum ds_rsvc_class_id id, @@ -113,7 +113,9 @@ struct ds_rsvc { ABT_cond s_state_cv; int s_leader_ref; /* on leader state */ ABT_cond s_leader_ref_cv; - bool s_map_dist; /* has a map dist request? */ + bool s_map_dist; /* has a queued map dist request? */ + bool s_map_dist_inp; /* has a in-progress map dist request? */ + uint32_t s_map_dist_ver; /* highest map version distributed */ ABT_cond s_map_dist_cv; ABT_thread s_map_distd; bool s_map_distd_stop; @@ -123,15 +125,22 @@ int ds_rsvc_start_nodb(enum ds_rsvc_class_id class, d_iov_t *id, uuid_t db_uuid); int ds_rsvc_stop_nodb(enum ds_rsvc_class_id class, d_iov_t *id); +/** Mode of starting a replicated service */ +enum ds_rsvc_start_mode { + DS_RSVC_START, /**< simply start the service */ + DS_RSVC_CREATE, /**< create and start the service */ + DS_RSVC_DICTATE /**< DANGEROUSLY reset and start the service (see rdb_dictate) */ +}; + int ds_rsvc_start(enum ds_rsvc_class_id class, d_iov_t *id, uuid_t db_uuid, uint64_t caller_term, - bool create, size_t size, d_rank_list_t *replicas, void *arg); + enum ds_rsvc_start_mode mode, size_t size, d_rank_list_t *replicas, void *arg); int ds_rsvc_stop(enum ds_rsvc_class_id class, d_iov_t *id, uint64_t caller_term, bool destroy); int ds_rsvc_stop_all(enum ds_rsvc_class_id class); int ds_rsvc_stop_leader(enum ds_rsvc_class_id class, d_iov_t *id, struct rsvc_hint *hint); int ds_rsvc_dist_start(enum ds_rsvc_class_id class, d_iov_t *id, const uuid_t dbid, - const d_rank_list_t *ranks, uint64_t caller_term, bool create, - bool bootstrap, size_t size); + const d_rank_list_t *ranks, uint64_t caller_term, + enum ds_rsvc_start_mode mode, bool bootstrap, size_t size); int ds_rsvc_dist_stop(enum ds_rsvc_class_id class, d_iov_t *id, const d_rank_list_t *ranks, d_rank_list_t *excluded, uint64_t caller_term, bool destroy); enum ds_rsvc_state ds_rsvc_get_state(struct ds_rsvc *svc); @@ -167,5 +176,7 @@ int ds_rsvc_list_attr(struct ds_rsvc *svc, struct rdb_tx *tx, rdb_path_t *path, size_t ds_rsvc_get_md_cap(void); void ds_rsvc_request_map_dist(struct ds_rsvc *svc); +void ds_rsvc_query_map_dist(struct ds_rsvc *svc, uint32_t *version, bool *idle); +void ds_rsvc_wait_map_dist(struct ds_rsvc *svc); #endif /* DAOS_SRV_RSVC_H */ diff --git a/src/include/daos_srv/vea.h b/src/include/daos_srv/vea.h index aeed5ce0445..e431345ef2f 100644 --- a/src/include/daos_srv/vea.h +++ b/src/include/daos_srv/vea.h @@ -20,6 +20,13 @@ #include #include +/* Common free extent structure for both SCM & in-memory index */ +struct vea_free_extent { + uint64_t vfe_blk_off; /* Block offset of the extent */ + uint32_t vfe_blk_cnt; /* Total blocks of the extent */ + uint32_t vfe_age; /* Monotonic timestamp */ +}; + /* Reserved extent(s) */ struct vea_resrvd_ext { /* Link to a list for a series of vea_reserve() calls */ @@ -116,6 +123,8 @@ struct vea_space_info; /* Callback to initialize block device header */ typedef int (*vea_format_callback_t)(void *cb_data); +/* Callback for vea free tree enumeration */ +typedef int (*vea_free_callback_t)(void *cb_arg, struct vea_free_extent *vfe); /** * Initialize the space tracking information on SCM and the header of the @@ -334,4 +343,15 @@ void *vea_metrics_alloc(const char *path, int tgt_id); */ int vea_metrics_count(void); +/** + * Enumerate the free extents/regions vea tracks + * + * \param vsi [IN] In-memory compound index + * \param cb [IN] callback function for each entry + * \param cb_arg [IN] callback arg + * + * \return 0 on success, otherwise error code + */ +int vea_enumerate_free(struct vea_space_info *vsi, vea_free_callback_t cb, void *cb_arg); + #endif /* __VEA_API_H__ */ diff --git a/src/include/daos_srv/vos.h b/src/include/daos_srv/vos.h index 7e2d6f86e88..bdb2f3aa6d1 100644 --- a/src/include/daos_srv/vos.h +++ b/src/include/daos_srv/vos.h @@ -264,6 +264,9 @@ vos_dtx_local_end(struct dtx_handle *dth, int result); int vos_self_init(const char *db_path, bool use_sys_db, int tgt_id); +int +vos_self_init_ext(const char *db_path, bool use_sys_db, int tgt_id, bool nvme_init); + /** * Finalize the environment for a VOS instance * Must be called for clean up at the end of using a vos instance diff --git a/src/include/daos_srv/vos_types.h b/src/include/daos_srv/vos_types.h index b42cdfbac38..4620bdaa436 100644 --- a/src/include/daos_srv/vos_types.h +++ b/src/include/daos_srv/vos_types.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -88,6 +89,8 @@ enum vos_pool_open_flags { VOS_POF_RDB = (1 << 4), /** SYS DB pool */ VOS_POF_SYSDB = (1 << 5), + /** Open the pool for daos check query, that will bypass EXEL flags. */ + VOS_POF_FOR_CHECK_QUERY = (1 << 6), }; enum vos_oi_attr { @@ -130,6 +133,17 @@ struct vos_pool_space { #define NVME_FREE(vps) ((vps)->vps_space.s_free[DAOS_MEDIA_NVME]) #define NVME_SYS(vps) ((vps)->vps_space_sys[DAOS_MEDIA_NVME]) +struct chk_pool_info { + /** DAOS check phase on the pool shard. */ + uint32_t cpi_phase; + /** DAOS check instance status on the pool shard. */ + uint32_t cpi_ins_status; + /** Inconsistency information for DAOS check on the pool shard. */ + struct chk_statistics cpi_statistics; + /** Time information for DAOS check on the pool shard. */ + struct chk_time cpi_time; +}; + /** * pool attributes returned to query */ @@ -140,6 +154,8 @@ typedef struct { struct vos_pool_space pif_space; /** garbage collector statistics */ struct vos_gc_stat pif_gc_stat; + /** DAOS check related information */ + struct chk_pool_info pif_chk; /** TODO */ } vos_pool_info_t; diff --git a/src/include/daos_types.h b/src/include/daos_types.h index 0eb16b5eb31..1844e22bc80 100644 --- a/src/include/daos_types.h +++ b/src/include/daos_types.h @@ -18,6 +18,7 @@ extern "C" { #include #include #include +#include /** uuid_t */ #include @@ -143,6 +144,12 @@ typedef struct { typedef d_iov_t daos_key_t; +static inline bool +daos_key_is_null(daos_key_t key) +{ + return key.iov_buf_len == 0 || key.iov_buf == NULL; +} + /** * Event and event queue */ @@ -226,6 +233,29 @@ typedef struct { uint64_t hi; } daos_obj_id_t; +#define DAOS_UUID_STR_SIZE 37 /* 36 + 1 for '\0' */ + +static inline bool +daos_is_valid_uuid_string(const char *uuid) +{ + const char *p; + int len = DAOS_UUID_STR_SIZE - 1; /* Not include the terminated '\0' */ + int i; + + if (strnlen(uuid, len) != len) + return false; + + for (i = 0, p = uuid; i < len; i++, p++) { + if (i == 8 || i == 13 || i == 18 || i == 23) { + if (*p != '-') + return false; + } else if (!isxdigit(*p)) { + return false; + } + } + + return true; +} /** * Corresponding rank and URI for a DAOS engine */ diff --git a/src/mgmt/SConscript b/src/mgmt/SConscript index 590f332ca54..ed629cb3437 100644 --- a/src/mgmt/SConscript +++ b/src/mgmt/SConscript @@ -1,20 +1,22 @@ +# pylint: disable=consider-using-f-string """Build management server module""" def scons(): """Execute build""" - Import('env', 'prereqs', 'libdaos_tgts') + Import('env', 'prereqs', 'libdaos_tgts', 'chk_pb') env.AppendUnique(LIBPATH=[Dir('.')]) denv = env.Clone() - denv.AppendUnique(CPPPATH=[Dir('.').srcnode()]) + denv.AppendUnique(CPPPATH=[Dir('.').srcnode(), Dir('..').srcnode()]) denv.require('protobufc') pb_objs = denv.SharedObject(['acl.pb-c.c', 'pool.pb-c.c', 'svc.pb-c.c', - 'smd.pb-c.c', 'cont.pb-c.c', 'server.pb-c.c']) + 'smd.pb-c.c', 'cont.pb-c.c', 'server.pb-c.c', + 'check.pb-c.c']) + chk_pb common = denv.SharedObject(['rpc.c']) + pb_objs # Management client library @@ -32,7 +34,7 @@ def scons(): 'srv_pool.c', 'srv_system.c', 'srv_target.c', 'srv_query.c', 'srv_drpc.c', 'srv_util.c', - 'srv_container.c'], + 'srv_container.c', 'srv_chk.c'], install_off='../..') senv.Install('$PREFIX/lib64/daos_srv', mgmt_srv) diff --git a/src/mgmt/check.pb-c.c b/src/mgmt/check.pb-c.c new file mode 100644 index 00000000000..f1dfd1d7102 --- /dev/null +++ b/src/mgmt/check.pb-c.c @@ -0,0 +1,2176 @@ +/* Generated by the protocol buffer compiler. DO NOT EDIT! */ +/* Generated from: check.proto */ + +/* Do not generate deprecated warnings for self */ +#ifndef PROTOBUF_C__NO_DEPRECATED +#define PROTOBUF_C__NO_DEPRECATED +#endif + +#include "check.pb-c.h" +void mgmt__check_inconsist_policy__init + (Mgmt__CheckInconsistPolicy *message) +{ + static const Mgmt__CheckInconsistPolicy init_value = MGMT__CHECK_INCONSIST_POLICY__INIT; + *message = init_value; +} +size_t mgmt__check_inconsist_policy__get_packed_size + (const Mgmt__CheckInconsistPolicy *message) +{ + assert(message->base.descriptor == &mgmt__check_inconsist_policy__descriptor); + return protobuf_c_message_get_packed_size ((const ProtobufCMessage*)(message)); +} +size_t mgmt__check_inconsist_policy__pack + (const Mgmt__CheckInconsistPolicy *message, + uint8_t *out) +{ + assert(message->base.descriptor == &mgmt__check_inconsist_policy__descriptor); + return protobuf_c_message_pack ((const ProtobufCMessage*)message, out); +} +size_t mgmt__check_inconsist_policy__pack_to_buffer + (const Mgmt__CheckInconsistPolicy *message, + ProtobufCBuffer *buffer) +{ + assert(message->base.descriptor == &mgmt__check_inconsist_policy__descriptor); + return protobuf_c_message_pack_to_buffer ((const ProtobufCMessage*)message, buffer); +} +Mgmt__CheckInconsistPolicy * + mgmt__check_inconsist_policy__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data) +{ + return (Mgmt__CheckInconsistPolicy *) + protobuf_c_message_unpack (&mgmt__check_inconsist_policy__descriptor, + allocator, len, data); +} +void mgmt__check_inconsist_policy__free_unpacked + (Mgmt__CheckInconsistPolicy *message, + ProtobufCAllocator *allocator) +{ + if(!message) + return; + assert(message->base.descriptor == &mgmt__check_inconsist_policy__descriptor); + protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator); +} +void mgmt__check_enable_req__init + (Mgmt__CheckEnableReq *message) +{ + static const Mgmt__CheckEnableReq init_value = MGMT__CHECK_ENABLE_REQ__INIT; + *message = init_value; +} +size_t mgmt__check_enable_req__get_packed_size + (const Mgmt__CheckEnableReq *message) +{ + assert(message->base.descriptor == &mgmt__check_enable_req__descriptor); + return protobuf_c_message_get_packed_size ((const ProtobufCMessage*)(message)); +} +size_t mgmt__check_enable_req__pack + (const Mgmt__CheckEnableReq *message, + uint8_t *out) +{ + assert(message->base.descriptor == &mgmt__check_enable_req__descriptor); + return protobuf_c_message_pack ((const ProtobufCMessage*)message, out); +} +size_t mgmt__check_enable_req__pack_to_buffer + (const Mgmt__CheckEnableReq *message, + ProtobufCBuffer *buffer) +{ + assert(message->base.descriptor == &mgmt__check_enable_req__descriptor); + return protobuf_c_message_pack_to_buffer ((const ProtobufCMessage*)message, buffer); +} +Mgmt__CheckEnableReq * + mgmt__check_enable_req__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data) +{ + return (Mgmt__CheckEnableReq *) + protobuf_c_message_unpack (&mgmt__check_enable_req__descriptor, + allocator, len, data); +} +void mgmt__check_enable_req__free_unpacked + (Mgmt__CheckEnableReq *message, + ProtobufCAllocator *allocator) +{ + if(!message) + return; + assert(message->base.descriptor == &mgmt__check_enable_req__descriptor); + protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator); +} +void mgmt__check_disable_req__init + (Mgmt__CheckDisableReq *message) +{ + static const Mgmt__CheckDisableReq init_value = MGMT__CHECK_DISABLE_REQ__INIT; + *message = init_value; +} +size_t mgmt__check_disable_req__get_packed_size + (const Mgmt__CheckDisableReq *message) +{ + assert(message->base.descriptor == &mgmt__check_disable_req__descriptor); + return protobuf_c_message_get_packed_size ((const ProtobufCMessage*)(message)); +} +size_t mgmt__check_disable_req__pack + (const Mgmt__CheckDisableReq *message, + uint8_t *out) +{ + assert(message->base.descriptor == &mgmt__check_disable_req__descriptor); + return protobuf_c_message_pack ((const ProtobufCMessage*)message, out); +} +size_t mgmt__check_disable_req__pack_to_buffer + (const Mgmt__CheckDisableReq *message, + ProtobufCBuffer *buffer) +{ + assert(message->base.descriptor == &mgmt__check_disable_req__descriptor); + return protobuf_c_message_pack_to_buffer ((const ProtobufCMessage*)message, buffer); +} +Mgmt__CheckDisableReq * + mgmt__check_disable_req__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data) +{ + return (Mgmt__CheckDisableReq *) + protobuf_c_message_unpack (&mgmt__check_disable_req__descriptor, + allocator, len, data); +} +void mgmt__check_disable_req__free_unpacked + (Mgmt__CheckDisableReq *message, + ProtobufCAllocator *allocator) +{ + if(!message) + return; + assert(message->base.descriptor == &mgmt__check_disable_req__descriptor); + protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator); +} +void mgmt__check_start_req__init + (Mgmt__CheckStartReq *message) +{ + static const Mgmt__CheckStartReq init_value = MGMT__CHECK_START_REQ__INIT; + *message = init_value; +} +size_t mgmt__check_start_req__get_packed_size + (const Mgmt__CheckStartReq *message) +{ + assert(message->base.descriptor == &mgmt__check_start_req__descriptor); + return protobuf_c_message_get_packed_size ((const ProtobufCMessage*)(message)); +} +size_t mgmt__check_start_req__pack + (const Mgmt__CheckStartReq *message, + uint8_t *out) +{ + assert(message->base.descriptor == &mgmt__check_start_req__descriptor); + return protobuf_c_message_pack ((const ProtobufCMessage*)message, out); +} +size_t mgmt__check_start_req__pack_to_buffer + (const Mgmt__CheckStartReq *message, + ProtobufCBuffer *buffer) +{ + assert(message->base.descriptor == &mgmt__check_start_req__descriptor); + return protobuf_c_message_pack_to_buffer ((const ProtobufCMessage*)message, buffer); +} +Mgmt__CheckStartReq * + mgmt__check_start_req__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data) +{ + return (Mgmt__CheckStartReq *) + protobuf_c_message_unpack (&mgmt__check_start_req__descriptor, + allocator, len, data); +} +void mgmt__check_start_req__free_unpacked + (Mgmt__CheckStartReq *message, + ProtobufCAllocator *allocator) +{ + if(!message) + return; + assert(message->base.descriptor == &mgmt__check_start_req__descriptor); + protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator); +} +void mgmt__check_start_resp__init + (Mgmt__CheckStartResp *message) +{ + static const Mgmt__CheckStartResp init_value = MGMT__CHECK_START_RESP__INIT; + *message = init_value; +} +size_t mgmt__check_start_resp__get_packed_size + (const Mgmt__CheckStartResp *message) +{ + assert(message->base.descriptor == &mgmt__check_start_resp__descriptor); + return protobuf_c_message_get_packed_size ((const ProtobufCMessage*)(message)); +} +size_t mgmt__check_start_resp__pack + (const Mgmt__CheckStartResp *message, + uint8_t *out) +{ + assert(message->base.descriptor == &mgmt__check_start_resp__descriptor); + return protobuf_c_message_pack ((const ProtobufCMessage*)message, out); +} +size_t mgmt__check_start_resp__pack_to_buffer + (const Mgmt__CheckStartResp *message, + ProtobufCBuffer *buffer) +{ + assert(message->base.descriptor == &mgmt__check_start_resp__descriptor); + return protobuf_c_message_pack_to_buffer ((const ProtobufCMessage*)message, buffer); +} +Mgmt__CheckStartResp * + mgmt__check_start_resp__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data) +{ + return (Mgmt__CheckStartResp *) + protobuf_c_message_unpack (&mgmt__check_start_resp__descriptor, + allocator, len, data); +} +void mgmt__check_start_resp__free_unpacked + (Mgmt__CheckStartResp *message, + ProtobufCAllocator *allocator) +{ + if(!message) + return; + assert(message->base.descriptor == &mgmt__check_start_resp__descriptor); + protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator); +} +void mgmt__check_stop_req__init + (Mgmt__CheckStopReq *message) +{ + static const Mgmt__CheckStopReq init_value = MGMT__CHECK_STOP_REQ__INIT; + *message = init_value; +} +size_t mgmt__check_stop_req__get_packed_size + (const Mgmt__CheckStopReq *message) +{ + assert(message->base.descriptor == &mgmt__check_stop_req__descriptor); + return protobuf_c_message_get_packed_size ((const ProtobufCMessage*)(message)); +} +size_t mgmt__check_stop_req__pack + (const Mgmt__CheckStopReq *message, + uint8_t *out) +{ + assert(message->base.descriptor == &mgmt__check_stop_req__descriptor); + return protobuf_c_message_pack ((const ProtobufCMessage*)message, out); +} +size_t mgmt__check_stop_req__pack_to_buffer + (const Mgmt__CheckStopReq *message, + ProtobufCBuffer *buffer) +{ + assert(message->base.descriptor == &mgmt__check_stop_req__descriptor); + return protobuf_c_message_pack_to_buffer ((const ProtobufCMessage*)message, buffer); +} +Mgmt__CheckStopReq * + mgmt__check_stop_req__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data) +{ + return (Mgmt__CheckStopReq *) + protobuf_c_message_unpack (&mgmt__check_stop_req__descriptor, + allocator, len, data); +} +void mgmt__check_stop_req__free_unpacked + (Mgmt__CheckStopReq *message, + ProtobufCAllocator *allocator) +{ + if(!message) + return; + assert(message->base.descriptor == &mgmt__check_stop_req__descriptor); + protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator); +} +void mgmt__check_stop_resp__init + (Mgmt__CheckStopResp *message) +{ + static const Mgmt__CheckStopResp init_value = MGMT__CHECK_STOP_RESP__INIT; + *message = init_value; +} +size_t mgmt__check_stop_resp__get_packed_size + (const Mgmt__CheckStopResp *message) +{ + assert(message->base.descriptor == &mgmt__check_stop_resp__descriptor); + return protobuf_c_message_get_packed_size ((const ProtobufCMessage*)(message)); +} +size_t mgmt__check_stop_resp__pack + (const Mgmt__CheckStopResp *message, + uint8_t *out) +{ + assert(message->base.descriptor == &mgmt__check_stop_resp__descriptor); + return protobuf_c_message_pack ((const ProtobufCMessage*)message, out); +} +size_t mgmt__check_stop_resp__pack_to_buffer + (const Mgmt__CheckStopResp *message, + ProtobufCBuffer *buffer) +{ + assert(message->base.descriptor == &mgmt__check_stop_resp__descriptor); + return protobuf_c_message_pack_to_buffer ((const ProtobufCMessage*)message, buffer); +} +Mgmt__CheckStopResp * + mgmt__check_stop_resp__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data) +{ + return (Mgmt__CheckStopResp *) + protobuf_c_message_unpack (&mgmt__check_stop_resp__descriptor, + allocator, len, data); +} +void mgmt__check_stop_resp__free_unpacked + (Mgmt__CheckStopResp *message, + ProtobufCAllocator *allocator) +{ + if(!message) + return; + assert(message->base.descriptor == &mgmt__check_stop_resp__descriptor); + protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator); +} +void mgmt__check_query_req__init + (Mgmt__CheckQueryReq *message) +{ + static const Mgmt__CheckQueryReq init_value = MGMT__CHECK_QUERY_REQ__INIT; + *message = init_value; +} +size_t mgmt__check_query_req__get_packed_size + (const Mgmt__CheckQueryReq *message) +{ + assert(message->base.descriptor == &mgmt__check_query_req__descriptor); + return protobuf_c_message_get_packed_size ((const ProtobufCMessage*)(message)); +} +size_t mgmt__check_query_req__pack + (const Mgmt__CheckQueryReq *message, + uint8_t *out) +{ + assert(message->base.descriptor == &mgmt__check_query_req__descriptor); + return protobuf_c_message_pack ((const ProtobufCMessage*)message, out); +} +size_t mgmt__check_query_req__pack_to_buffer + (const Mgmt__CheckQueryReq *message, + ProtobufCBuffer *buffer) +{ + assert(message->base.descriptor == &mgmt__check_query_req__descriptor); + return protobuf_c_message_pack_to_buffer ((const ProtobufCMessage*)message, buffer); +} +Mgmt__CheckQueryReq * + mgmt__check_query_req__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data) +{ + return (Mgmt__CheckQueryReq *) + protobuf_c_message_unpack (&mgmt__check_query_req__descriptor, + allocator, len, data); +} +void mgmt__check_query_req__free_unpacked + (Mgmt__CheckQueryReq *message, + ProtobufCAllocator *allocator) +{ + if(!message) + return; + assert(message->base.descriptor == &mgmt__check_query_req__descriptor); + protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator); +} +void mgmt__check_query_time__init + (Mgmt__CheckQueryTime *message) +{ + static const Mgmt__CheckQueryTime init_value = MGMT__CHECK_QUERY_TIME__INIT; + *message = init_value; +} +size_t mgmt__check_query_time__get_packed_size + (const Mgmt__CheckQueryTime *message) +{ + assert(message->base.descriptor == &mgmt__check_query_time__descriptor); + return protobuf_c_message_get_packed_size ((const ProtobufCMessage*)(message)); +} +size_t mgmt__check_query_time__pack + (const Mgmt__CheckQueryTime *message, + uint8_t *out) +{ + assert(message->base.descriptor == &mgmt__check_query_time__descriptor); + return protobuf_c_message_pack ((const ProtobufCMessage*)message, out); +} +size_t mgmt__check_query_time__pack_to_buffer + (const Mgmt__CheckQueryTime *message, + ProtobufCBuffer *buffer) +{ + assert(message->base.descriptor == &mgmt__check_query_time__descriptor); + return protobuf_c_message_pack_to_buffer ((const ProtobufCMessage*)message, buffer); +} +Mgmt__CheckQueryTime * + mgmt__check_query_time__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data) +{ + return (Mgmt__CheckQueryTime *) + protobuf_c_message_unpack (&mgmt__check_query_time__descriptor, + allocator, len, data); +} +void mgmt__check_query_time__free_unpacked + (Mgmt__CheckQueryTime *message, + ProtobufCAllocator *allocator) +{ + if(!message) + return; + assert(message->base.descriptor == &mgmt__check_query_time__descriptor); + protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator); +} +void mgmt__check_query_inconsist__init + (Mgmt__CheckQueryInconsist *message) +{ + static const Mgmt__CheckQueryInconsist init_value = MGMT__CHECK_QUERY_INCONSIST__INIT; + *message = init_value; +} +size_t mgmt__check_query_inconsist__get_packed_size + (const Mgmt__CheckQueryInconsist *message) +{ + assert(message->base.descriptor == &mgmt__check_query_inconsist__descriptor); + return protobuf_c_message_get_packed_size ((const ProtobufCMessage*)(message)); +} +size_t mgmt__check_query_inconsist__pack + (const Mgmt__CheckQueryInconsist *message, + uint8_t *out) +{ + assert(message->base.descriptor == &mgmt__check_query_inconsist__descriptor); + return protobuf_c_message_pack ((const ProtobufCMessage*)message, out); +} +size_t mgmt__check_query_inconsist__pack_to_buffer + (const Mgmt__CheckQueryInconsist *message, + ProtobufCBuffer *buffer) +{ + assert(message->base.descriptor == &mgmt__check_query_inconsist__descriptor); + return protobuf_c_message_pack_to_buffer ((const ProtobufCMessage*)message, buffer); +} +Mgmt__CheckQueryInconsist * + mgmt__check_query_inconsist__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data) +{ + return (Mgmt__CheckQueryInconsist *) + protobuf_c_message_unpack (&mgmt__check_query_inconsist__descriptor, + allocator, len, data); +} +void mgmt__check_query_inconsist__free_unpacked + (Mgmt__CheckQueryInconsist *message, + ProtobufCAllocator *allocator) +{ + if(!message) + return; + assert(message->base.descriptor == &mgmt__check_query_inconsist__descriptor); + protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator); +} +void mgmt__check_query_target__init + (Mgmt__CheckQueryTarget *message) +{ + static const Mgmt__CheckQueryTarget init_value = MGMT__CHECK_QUERY_TARGET__INIT; + *message = init_value; +} +size_t mgmt__check_query_target__get_packed_size + (const Mgmt__CheckQueryTarget *message) +{ + assert(message->base.descriptor == &mgmt__check_query_target__descriptor); + return protobuf_c_message_get_packed_size ((const ProtobufCMessage*)(message)); +} +size_t mgmt__check_query_target__pack + (const Mgmt__CheckQueryTarget *message, + uint8_t *out) +{ + assert(message->base.descriptor == &mgmt__check_query_target__descriptor); + return protobuf_c_message_pack ((const ProtobufCMessage*)message, out); +} +size_t mgmt__check_query_target__pack_to_buffer + (const Mgmt__CheckQueryTarget *message, + ProtobufCBuffer *buffer) +{ + assert(message->base.descriptor == &mgmt__check_query_target__descriptor); + return protobuf_c_message_pack_to_buffer ((const ProtobufCMessage*)message, buffer); +} +Mgmt__CheckQueryTarget * + mgmt__check_query_target__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data) +{ + return (Mgmt__CheckQueryTarget *) + protobuf_c_message_unpack (&mgmt__check_query_target__descriptor, + allocator, len, data); +} +void mgmt__check_query_target__free_unpacked + (Mgmt__CheckQueryTarget *message, + ProtobufCAllocator *allocator) +{ + if(!message) + return; + assert(message->base.descriptor == &mgmt__check_query_target__descriptor); + protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator); +} +void mgmt__check_query_pool__init + (Mgmt__CheckQueryPool *message) +{ + static const Mgmt__CheckQueryPool init_value = MGMT__CHECK_QUERY_POOL__INIT; + *message = init_value; +} +size_t mgmt__check_query_pool__get_packed_size + (const Mgmt__CheckQueryPool *message) +{ + assert(message->base.descriptor == &mgmt__check_query_pool__descriptor); + return protobuf_c_message_get_packed_size ((const ProtobufCMessage*)(message)); +} +size_t mgmt__check_query_pool__pack + (const Mgmt__CheckQueryPool *message, + uint8_t *out) +{ + assert(message->base.descriptor == &mgmt__check_query_pool__descriptor); + return protobuf_c_message_pack ((const ProtobufCMessage*)message, out); +} +size_t mgmt__check_query_pool__pack_to_buffer + (const Mgmt__CheckQueryPool *message, + ProtobufCBuffer *buffer) +{ + assert(message->base.descriptor == &mgmt__check_query_pool__descriptor); + return protobuf_c_message_pack_to_buffer ((const ProtobufCMessage*)message, buffer); +} +Mgmt__CheckQueryPool * + mgmt__check_query_pool__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data) +{ + return (Mgmt__CheckQueryPool *) + protobuf_c_message_unpack (&mgmt__check_query_pool__descriptor, + allocator, len, data); +} +void mgmt__check_query_pool__free_unpacked + (Mgmt__CheckQueryPool *message, + ProtobufCAllocator *allocator) +{ + if(!message) + return; + assert(message->base.descriptor == &mgmt__check_query_pool__descriptor); + protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator); +} +void mgmt__check_query_resp__init + (Mgmt__CheckQueryResp *message) +{ + static const Mgmt__CheckQueryResp init_value = MGMT__CHECK_QUERY_RESP__INIT; + *message = init_value; +} +size_t mgmt__check_query_resp__get_packed_size + (const Mgmt__CheckQueryResp *message) +{ + assert(message->base.descriptor == &mgmt__check_query_resp__descriptor); + return protobuf_c_message_get_packed_size ((const ProtobufCMessage*)(message)); +} +size_t mgmt__check_query_resp__pack + (const Mgmt__CheckQueryResp *message, + uint8_t *out) +{ + assert(message->base.descriptor == &mgmt__check_query_resp__descriptor); + return protobuf_c_message_pack ((const ProtobufCMessage*)message, out); +} +size_t mgmt__check_query_resp__pack_to_buffer + (const Mgmt__CheckQueryResp *message, + ProtobufCBuffer *buffer) +{ + assert(message->base.descriptor == &mgmt__check_query_resp__descriptor); + return protobuf_c_message_pack_to_buffer ((const ProtobufCMessage*)message, buffer); +} +Mgmt__CheckQueryResp * + mgmt__check_query_resp__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data) +{ + return (Mgmt__CheckQueryResp *) + protobuf_c_message_unpack (&mgmt__check_query_resp__descriptor, + allocator, len, data); +} +void mgmt__check_query_resp__free_unpacked + (Mgmt__CheckQueryResp *message, + ProtobufCAllocator *allocator) +{ + if(!message) + return; + assert(message->base.descriptor == &mgmt__check_query_resp__descriptor); + protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator); +} +void mgmt__check_set_policy_req__init + (Mgmt__CheckSetPolicyReq *message) +{ + static const Mgmt__CheckSetPolicyReq init_value = MGMT__CHECK_SET_POLICY_REQ__INIT; + *message = init_value; +} +size_t mgmt__check_set_policy_req__get_packed_size + (const Mgmt__CheckSetPolicyReq *message) +{ + assert(message->base.descriptor == &mgmt__check_set_policy_req__descriptor); + return protobuf_c_message_get_packed_size ((const ProtobufCMessage*)(message)); +} +size_t mgmt__check_set_policy_req__pack + (const Mgmt__CheckSetPolicyReq *message, + uint8_t *out) +{ + assert(message->base.descriptor == &mgmt__check_set_policy_req__descriptor); + return protobuf_c_message_pack ((const ProtobufCMessage*)message, out); +} +size_t mgmt__check_set_policy_req__pack_to_buffer + (const Mgmt__CheckSetPolicyReq *message, + ProtobufCBuffer *buffer) +{ + assert(message->base.descriptor == &mgmt__check_set_policy_req__descriptor); + return protobuf_c_message_pack_to_buffer ((const ProtobufCMessage*)message, buffer); +} +Mgmt__CheckSetPolicyReq * + mgmt__check_set_policy_req__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data) +{ + return (Mgmt__CheckSetPolicyReq *) + protobuf_c_message_unpack (&mgmt__check_set_policy_req__descriptor, + allocator, len, data); +} +void mgmt__check_set_policy_req__free_unpacked + (Mgmt__CheckSetPolicyReq *message, + ProtobufCAllocator *allocator) +{ + if(!message) + return; + assert(message->base.descriptor == &mgmt__check_set_policy_req__descriptor); + protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator); +} +void mgmt__check_prop_req__init + (Mgmt__CheckPropReq *message) +{ + static const Mgmt__CheckPropReq init_value = MGMT__CHECK_PROP_REQ__INIT; + *message = init_value; +} +size_t mgmt__check_prop_req__get_packed_size + (const Mgmt__CheckPropReq *message) +{ + assert(message->base.descriptor == &mgmt__check_prop_req__descriptor); + return protobuf_c_message_get_packed_size ((const ProtobufCMessage*)(message)); +} +size_t mgmt__check_prop_req__pack + (const Mgmt__CheckPropReq *message, + uint8_t *out) +{ + assert(message->base.descriptor == &mgmt__check_prop_req__descriptor); + return protobuf_c_message_pack ((const ProtobufCMessage*)message, out); +} +size_t mgmt__check_prop_req__pack_to_buffer + (const Mgmt__CheckPropReq *message, + ProtobufCBuffer *buffer) +{ + assert(message->base.descriptor == &mgmt__check_prop_req__descriptor); + return protobuf_c_message_pack_to_buffer ((const ProtobufCMessage*)message, buffer); +} +Mgmt__CheckPropReq * + mgmt__check_prop_req__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data) +{ + return (Mgmt__CheckPropReq *) + protobuf_c_message_unpack (&mgmt__check_prop_req__descriptor, + allocator, len, data); +} +void mgmt__check_prop_req__free_unpacked + (Mgmt__CheckPropReq *message, + ProtobufCAllocator *allocator) +{ + if(!message) + return; + assert(message->base.descriptor == &mgmt__check_prop_req__descriptor); + protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator); +} +void mgmt__check_prop_resp__init + (Mgmt__CheckPropResp *message) +{ + static const Mgmt__CheckPropResp init_value = MGMT__CHECK_PROP_RESP__INIT; + *message = init_value; +} +size_t mgmt__check_prop_resp__get_packed_size + (const Mgmt__CheckPropResp *message) +{ + assert(message->base.descriptor == &mgmt__check_prop_resp__descriptor); + return protobuf_c_message_get_packed_size ((const ProtobufCMessage*)(message)); +} +size_t mgmt__check_prop_resp__pack + (const Mgmt__CheckPropResp *message, + uint8_t *out) +{ + assert(message->base.descriptor == &mgmt__check_prop_resp__descriptor); + return protobuf_c_message_pack ((const ProtobufCMessage*)message, out); +} +size_t mgmt__check_prop_resp__pack_to_buffer + (const Mgmt__CheckPropResp *message, + ProtobufCBuffer *buffer) +{ + assert(message->base.descriptor == &mgmt__check_prop_resp__descriptor); + return protobuf_c_message_pack_to_buffer ((const ProtobufCMessage*)message, buffer); +} +Mgmt__CheckPropResp * + mgmt__check_prop_resp__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data) +{ + return (Mgmt__CheckPropResp *) + protobuf_c_message_unpack (&mgmt__check_prop_resp__descriptor, + allocator, len, data); +} +void mgmt__check_prop_resp__free_unpacked + (Mgmt__CheckPropResp *message, + ProtobufCAllocator *allocator) +{ + if(!message) + return; + assert(message->base.descriptor == &mgmt__check_prop_resp__descriptor); + protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator); +} +void mgmt__check_get_policy_req__init + (Mgmt__CheckGetPolicyReq *message) +{ + static const Mgmt__CheckGetPolicyReq init_value = MGMT__CHECK_GET_POLICY_REQ__INIT; + *message = init_value; +} +size_t mgmt__check_get_policy_req__get_packed_size + (const Mgmt__CheckGetPolicyReq *message) +{ + assert(message->base.descriptor == &mgmt__check_get_policy_req__descriptor); + return protobuf_c_message_get_packed_size ((const ProtobufCMessage*)(message)); +} +size_t mgmt__check_get_policy_req__pack + (const Mgmt__CheckGetPolicyReq *message, + uint8_t *out) +{ + assert(message->base.descriptor == &mgmt__check_get_policy_req__descriptor); + return protobuf_c_message_pack ((const ProtobufCMessage*)message, out); +} +size_t mgmt__check_get_policy_req__pack_to_buffer + (const Mgmt__CheckGetPolicyReq *message, + ProtobufCBuffer *buffer) +{ + assert(message->base.descriptor == &mgmt__check_get_policy_req__descriptor); + return protobuf_c_message_pack_to_buffer ((const ProtobufCMessage*)message, buffer); +} +Mgmt__CheckGetPolicyReq * + mgmt__check_get_policy_req__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data) +{ + return (Mgmt__CheckGetPolicyReq *) + protobuf_c_message_unpack (&mgmt__check_get_policy_req__descriptor, + allocator, len, data); +} +void mgmt__check_get_policy_req__free_unpacked + (Mgmt__CheckGetPolicyReq *message, + ProtobufCAllocator *allocator) +{ + if(!message) + return; + assert(message->base.descriptor == &mgmt__check_get_policy_req__descriptor); + protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator); +} +void mgmt__check_get_policy_resp__init + (Mgmt__CheckGetPolicyResp *message) +{ + static const Mgmt__CheckGetPolicyResp init_value = MGMT__CHECK_GET_POLICY_RESP__INIT; + *message = init_value; +} +size_t mgmt__check_get_policy_resp__get_packed_size + (const Mgmt__CheckGetPolicyResp *message) +{ + assert(message->base.descriptor == &mgmt__check_get_policy_resp__descriptor); + return protobuf_c_message_get_packed_size ((const ProtobufCMessage*)(message)); +} +size_t mgmt__check_get_policy_resp__pack + (const Mgmt__CheckGetPolicyResp *message, + uint8_t *out) +{ + assert(message->base.descriptor == &mgmt__check_get_policy_resp__descriptor); + return protobuf_c_message_pack ((const ProtobufCMessage*)message, out); +} +size_t mgmt__check_get_policy_resp__pack_to_buffer + (const Mgmt__CheckGetPolicyResp *message, + ProtobufCBuffer *buffer) +{ + assert(message->base.descriptor == &mgmt__check_get_policy_resp__descriptor); + return protobuf_c_message_pack_to_buffer ((const ProtobufCMessage*)message, buffer); +} +Mgmt__CheckGetPolicyResp * + mgmt__check_get_policy_resp__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data) +{ + return (Mgmt__CheckGetPolicyResp *) + protobuf_c_message_unpack (&mgmt__check_get_policy_resp__descriptor, + allocator, len, data); +} +void mgmt__check_get_policy_resp__free_unpacked + (Mgmt__CheckGetPolicyResp *message, + ProtobufCAllocator *allocator) +{ + if(!message) + return; + assert(message->base.descriptor == &mgmt__check_get_policy_resp__descriptor); + protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator); +} +void mgmt__check_act_req__init + (Mgmt__CheckActReq *message) +{ + static const Mgmt__CheckActReq init_value = MGMT__CHECK_ACT_REQ__INIT; + *message = init_value; +} +size_t mgmt__check_act_req__get_packed_size + (const Mgmt__CheckActReq *message) +{ + assert(message->base.descriptor == &mgmt__check_act_req__descriptor); + return protobuf_c_message_get_packed_size ((const ProtobufCMessage*)(message)); +} +size_t mgmt__check_act_req__pack + (const Mgmt__CheckActReq *message, + uint8_t *out) +{ + assert(message->base.descriptor == &mgmt__check_act_req__descriptor); + return protobuf_c_message_pack ((const ProtobufCMessage*)message, out); +} +size_t mgmt__check_act_req__pack_to_buffer + (const Mgmt__CheckActReq *message, + ProtobufCBuffer *buffer) +{ + assert(message->base.descriptor == &mgmt__check_act_req__descriptor); + return protobuf_c_message_pack_to_buffer ((const ProtobufCMessage*)message, buffer); +} +Mgmt__CheckActReq * + mgmt__check_act_req__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data) +{ + return (Mgmt__CheckActReq *) + protobuf_c_message_unpack (&mgmt__check_act_req__descriptor, + allocator, len, data); +} +void mgmt__check_act_req__free_unpacked + (Mgmt__CheckActReq *message, + ProtobufCAllocator *allocator) +{ + if(!message) + return; + assert(message->base.descriptor == &mgmt__check_act_req__descriptor); + protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator); +} +void mgmt__check_act_resp__init + (Mgmt__CheckActResp *message) +{ + static const Mgmt__CheckActResp init_value = MGMT__CHECK_ACT_RESP__INIT; + *message = init_value; +} +size_t mgmt__check_act_resp__get_packed_size + (const Mgmt__CheckActResp *message) +{ + assert(message->base.descriptor == &mgmt__check_act_resp__descriptor); + return protobuf_c_message_get_packed_size ((const ProtobufCMessage*)(message)); +} +size_t mgmt__check_act_resp__pack + (const Mgmt__CheckActResp *message, + uint8_t *out) +{ + assert(message->base.descriptor == &mgmt__check_act_resp__descriptor); + return protobuf_c_message_pack ((const ProtobufCMessage*)message, out); +} +size_t mgmt__check_act_resp__pack_to_buffer + (const Mgmt__CheckActResp *message, + ProtobufCBuffer *buffer) +{ + assert(message->base.descriptor == &mgmt__check_act_resp__descriptor); + return protobuf_c_message_pack_to_buffer ((const ProtobufCMessage*)message, buffer); +} +Mgmt__CheckActResp * + mgmt__check_act_resp__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data) +{ + return (Mgmt__CheckActResp *) + protobuf_c_message_unpack (&mgmt__check_act_resp__descriptor, + allocator, len, data); +} +void mgmt__check_act_resp__free_unpacked + (Mgmt__CheckActResp *message, + ProtobufCAllocator *allocator) +{ + if(!message) + return; + assert(message->base.descriptor == &mgmt__check_act_resp__descriptor); + protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator); +} +static const ProtobufCFieldDescriptor mgmt__check_inconsist_policy__field_descriptors[2] = +{ + { + "inconsist_cas", + 1, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_ENUM, + 0, /* quantifier_offset */ + offsetof(Mgmt__CheckInconsistPolicy, inconsist_cas), + &chk__check_inconsist_class__descriptor, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "inconsist_act", + 2, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_ENUM, + 0, /* quantifier_offset */ + offsetof(Mgmt__CheckInconsistPolicy, inconsist_act), + &chk__check_inconsist_action__descriptor, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, +}; +static const unsigned mgmt__check_inconsist_policy__field_indices_by_name[] = { + 1, /* field[1] = inconsist_act */ + 0, /* field[0] = inconsist_cas */ +}; +static const ProtobufCIntRange mgmt__check_inconsist_policy__number_ranges[1 + 1] = +{ + { 1, 0 }, + { 0, 2 } +}; +const ProtobufCMessageDescriptor mgmt__check_inconsist_policy__descriptor = +{ + PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, + "mgmt.CheckInconsistPolicy", + "CheckInconsistPolicy", + "Mgmt__CheckInconsistPolicy", + "mgmt", + sizeof(Mgmt__CheckInconsistPolicy), + 2, + mgmt__check_inconsist_policy__field_descriptors, + mgmt__check_inconsist_policy__field_indices_by_name, + 1, mgmt__check_inconsist_policy__number_ranges, + (ProtobufCMessageInit) mgmt__check_inconsist_policy__init, + NULL,NULL,NULL /* reserved[123] */ +}; +static const ProtobufCFieldDescriptor mgmt__check_enable_req__field_descriptors[1] = +{ + { + "sys", + 1, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_STRING, + 0, /* quantifier_offset */ + offsetof(Mgmt__CheckEnableReq, sys), + NULL, + &protobuf_c_empty_string, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, +}; +static const unsigned mgmt__check_enable_req__field_indices_by_name[] = { + 0, /* field[0] = sys */ +}; +static const ProtobufCIntRange mgmt__check_enable_req__number_ranges[1 + 1] = +{ + { 1, 0 }, + { 0, 1 } +}; +const ProtobufCMessageDescriptor mgmt__check_enable_req__descriptor = +{ + PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, + "mgmt.CheckEnableReq", + "CheckEnableReq", + "Mgmt__CheckEnableReq", + "mgmt", + sizeof(Mgmt__CheckEnableReq), + 1, + mgmt__check_enable_req__field_descriptors, + mgmt__check_enable_req__field_indices_by_name, + 1, mgmt__check_enable_req__number_ranges, + (ProtobufCMessageInit) mgmt__check_enable_req__init, + NULL,NULL,NULL /* reserved[123] */ +}; +static const ProtobufCFieldDescriptor mgmt__check_disable_req__field_descriptors[1] = +{ + { + "sys", + 1, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_STRING, + 0, /* quantifier_offset */ + offsetof(Mgmt__CheckDisableReq, sys), + NULL, + &protobuf_c_empty_string, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, +}; +static const unsigned mgmt__check_disable_req__field_indices_by_name[] = { + 0, /* field[0] = sys */ +}; +static const ProtobufCIntRange mgmt__check_disable_req__number_ranges[1 + 1] = +{ + { 1, 0 }, + { 0, 1 } +}; +const ProtobufCMessageDescriptor mgmt__check_disable_req__descriptor = +{ + PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, + "mgmt.CheckDisableReq", + "CheckDisableReq", + "Mgmt__CheckDisableReq", + "mgmt", + sizeof(Mgmt__CheckDisableReq), + 1, + mgmt__check_disable_req__field_descriptors, + mgmt__check_disable_req__field_indices_by_name, + 1, mgmt__check_disable_req__number_ranges, + (ProtobufCMessageInit) mgmt__check_disable_req__init, + NULL,NULL,NULL /* reserved[123] */ +}; +static const ProtobufCFieldDescriptor mgmt__check_start_req__field_descriptors[5] = +{ + { + "sys", + 1, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_STRING, + 0, /* quantifier_offset */ + offsetof(Mgmt__CheckStartReq, sys), + NULL, + &protobuf_c_empty_string, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "flags", + 2, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_UINT32, + 0, /* quantifier_offset */ + offsetof(Mgmt__CheckStartReq, flags), + NULL, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "ranks", + 3, + PROTOBUF_C_LABEL_REPEATED, + PROTOBUF_C_TYPE_UINT32, + offsetof(Mgmt__CheckStartReq, n_ranks), + offsetof(Mgmt__CheckStartReq, ranks), + NULL, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "uuids", + 4, + PROTOBUF_C_LABEL_REPEATED, + PROTOBUF_C_TYPE_STRING, + offsetof(Mgmt__CheckStartReq, n_uuids), + offsetof(Mgmt__CheckStartReq, uuids), + NULL, + &protobuf_c_empty_string, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "policies", + 5, + PROTOBUF_C_LABEL_REPEATED, + PROTOBUF_C_TYPE_MESSAGE, + offsetof(Mgmt__CheckStartReq, n_policies), + offsetof(Mgmt__CheckStartReq, policies), + &mgmt__check_inconsist_policy__descriptor, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, +}; +static const unsigned mgmt__check_start_req__field_indices_by_name[] = { + 1, /* field[1] = flags */ + 4, /* field[4] = policies */ + 2, /* field[2] = ranks */ + 0, /* field[0] = sys */ + 3, /* field[3] = uuids */ +}; +static const ProtobufCIntRange mgmt__check_start_req__number_ranges[1 + 1] = +{ + { 1, 0 }, + { 0, 5 } +}; +const ProtobufCMessageDescriptor mgmt__check_start_req__descriptor = +{ + PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, + "mgmt.CheckStartReq", + "CheckStartReq", + "Mgmt__CheckStartReq", + "mgmt", + sizeof(Mgmt__CheckStartReq), + 5, + mgmt__check_start_req__field_descriptors, + mgmt__check_start_req__field_indices_by_name, + 1, mgmt__check_start_req__number_ranges, + (ProtobufCMessageInit) mgmt__check_start_req__init, + NULL,NULL,NULL /* reserved[123] */ +}; +static const ProtobufCFieldDescriptor mgmt__check_start_resp__field_descriptors[1] = +{ + { + "status", + 1, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_INT32, + 0, /* quantifier_offset */ + offsetof(Mgmt__CheckStartResp, status), + NULL, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, +}; +static const unsigned mgmt__check_start_resp__field_indices_by_name[] = { + 0, /* field[0] = status */ +}; +static const ProtobufCIntRange mgmt__check_start_resp__number_ranges[1 + 1] = +{ + { 1, 0 }, + { 0, 1 } +}; +const ProtobufCMessageDescriptor mgmt__check_start_resp__descriptor = +{ + PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, + "mgmt.CheckStartResp", + "CheckStartResp", + "Mgmt__CheckStartResp", + "mgmt", + sizeof(Mgmt__CheckStartResp), + 1, + mgmt__check_start_resp__field_descriptors, + mgmt__check_start_resp__field_indices_by_name, + 1, mgmt__check_start_resp__number_ranges, + (ProtobufCMessageInit) mgmt__check_start_resp__init, + NULL,NULL,NULL /* reserved[123] */ +}; +static const ProtobufCFieldDescriptor mgmt__check_stop_req__field_descriptors[2] = +{ + { + "sys", + 1, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_STRING, + 0, /* quantifier_offset */ + offsetof(Mgmt__CheckStopReq, sys), + NULL, + &protobuf_c_empty_string, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "uuids", + 2, + PROTOBUF_C_LABEL_REPEATED, + PROTOBUF_C_TYPE_STRING, + offsetof(Mgmt__CheckStopReq, n_uuids), + offsetof(Mgmt__CheckStopReq, uuids), + NULL, + &protobuf_c_empty_string, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, +}; +static const unsigned mgmt__check_stop_req__field_indices_by_name[] = { + 0, /* field[0] = sys */ + 1, /* field[1] = uuids */ +}; +static const ProtobufCIntRange mgmt__check_stop_req__number_ranges[1 + 1] = +{ + { 1, 0 }, + { 0, 2 } +}; +const ProtobufCMessageDescriptor mgmt__check_stop_req__descriptor = +{ + PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, + "mgmt.CheckStopReq", + "CheckStopReq", + "Mgmt__CheckStopReq", + "mgmt", + sizeof(Mgmt__CheckStopReq), + 2, + mgmt__check_stop_req__field_descriptors, + mgmt__check_stop_req__field_indices_by_name, + 1, mgmt__check_stop_req__number_ranges, + (ProtobufCMessageInit) mgmt__check_stop_req__init, + NULL,NULL,NULL /* reserved[123] */ +}; +static const ProtobufCFieldDescriptor mgmt__check_stop_resp__field_descriptors[1] = +{ + { + "status", + 1, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_INT32, + 0, /* quantifier_offset */ + offsetof(Mgmt__CheckStopResp, status), + NULL, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, +}; +static const unsigned mgmt__check_stop_resp__field_indices_by_name[] = { + 0, /* field[0] = status */ +}; +static const ProtobufCIntRange mgmt__check_stop_resp__number_ranges[1 + 1] = +{ + { 1, 0 }, + { 0, 1 } +}; +const ProtobufCMessageDescriptor mgmt__check_stop_resp__descriptor = +{ + PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, + "mgmt.CheckStopResp", + "CheckStopResp", + "Mgmt__CheckStopResp", + "mgmt", + sizeof(Mgmt__CheckStopResp), + 1, + mgmt__check_stop_resp__field_descriptors, + mgmt__check_stop_resp__field_indices_by_name, + 1, mgmt__check_stop_resp__number_ranges, + (ProtobufCMessageInit) mgmt__check_stop_resp__init, + NULL,NULL,NULL /* reserved[123] */ +}; +static const ProtobufCFieldDescriptor mgmt__check_query_req__field_descriptors[4] = +{ + { + "sys", + 1, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_STRING, + 0, /* quantifier_offset */ + offsetof(Mgmt__CheckQueryReq, sys), + NULL, + &protobuf_c_empty_string, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "uuids", + 2, + PROTOBUF_C_LABEL_REPEATED, + PROTOBUF_C_TYPE_STRING, + offsetof(Mgmt__CheckQueryReq, n_uuids), + offsetof(Mgmt__CheckQueryReq, uuids), + NULL, + &protobuf_c_empty_string, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "shallow", + 3, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_BOOL, + 0, /* quantifier_offset */ + offsetof(Mgmt__CheckQueryReq, shallow), + NULL, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "seqs", + 4, + PROTOBUF_C_LABEL_REPEATED, + PROTOBUF_C_TYPE_UINT64, + offsetof(Mgmt__CheckQueryReq, n_seqs), + offsetof(Mgmt__CheckQueryReq, seqs), + NULL, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, +}; +static const unsigned mgmt__check_query_req__field_indices_by_name[] = { + 3, /* field[3] = seqs */ + 2, /* field[2] = shallow */ + 0, /* field[0] = sys */ + 1, /* field[1] = uuids */ +}; +static const ProtobufCIntRange mgmt__check_query_req__number_ranges[1 + 1] = +{ + { 1, 0 }, + { 0, 4 } +}; +const ProtobufCMessageDescriptor mgmt__check_query_req__descriptor = +{ + PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, + "mgmt.CheckQueryReq", + "CheckQueryReq", + "Mgmt__CheckQueryReq", + "mgmt", + sizeof(Mgmt__CheckQueryReq), + 4, + mgmt__check_query_req__field_descriptors, + mgmt__check_query_req__field_indices_by_name, + 1, mgmt__check_query_req__number_ranges, + (ProtobufCMessageInit) mgmt__check_query_req__init, + NULL,NULL,NULL /* reserved[123] */ +}; +static const ProtobufCFieldDescriptor mgmt__check_query_time__field_descriptors[2] = +{ + { + "start_time", + 1, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_UINT64, + 0, /* quantifier_offset */ + offsetof(Mgmt__CheckQueryTime, start_time), + NULL, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "misc_time", + 2, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_UINT64, + 0, /* quantifier_offset */ + offsetof(Mgmt__CheckQueryTime, misc_time), + NULL, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, +}; +static const unsigned mgmt__check_query_time__field_indices_by_name[] = { + 1, /* field[1] = misc_time */ + 0, /* field[0] = start_time */ +}; +static const ProtobufCIntRange mgmt__check_query_time__number_ranges[1 + 1] = +{ + { 1, 0 }, + { 0, 2 } +}; +const ProtobufCMessageDescriptor mgmt__check_query_time__descriptor = +{ + PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, + "mgmt.CheckQueryTime", + "CheckQueryTime", + "Mgmt__CheckQueryTime", + "mgmt", + sizeof(Mgmt__CheckQueryTime), + 2, + mgmt__check_query_time__field_descriptors, + mgmt__check_query_time__field_indices_by_name, + 1, mgmt__check_query_time__number_ranges, + (ProtobufCMessageInit) mgmt__check_query_time__init, + NULL,NULL,NULL /* reserved[123] */ +}; +static const ProtobufCFieldDescriptor mgmt__check_query_inconsist__field_descriptors[4] = +{ + { + "total", + 1, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_UINT32, + 0, /* quantifier_offset */ + offsetof(Mgmt__CheckQueryInconsist, total), + NULL, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "repaired", + 2, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_UINT32, + 0, /* quantifier_offset */ + offsetof(Mgmt__CheckQueryInconsist, repaired), + NULL, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "ignored", + 3, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_UINT32, + 0, /* quantifier_offset */ + offsetof(Mgmt__CheckQueryInconsist, ignored), + NULL, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "failed", + 4, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_UINT32, + 0, /* quantifier_offset */ + offsetof(Mgmt__CheckQueryInconsist, failed), + NULL, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, +}; +static const unsigned mgmt__check_query_inconsist__field_indices_by_name[] = { + 3, /* field[3] = failed */ + 2, /* field[2] = ignored */ + 1, /* field[1] = repaired */ + 0, /* field[0] = total */ +}; +static const ProtobufCIntRange mgmt__check_query_inconsist__number_ranges[1 + 1] = +{ + { 1, 0 }, + { 0, 4 } +}; +const ProtobufCMessageDescriptor mgmt__check_query_inconsist__descriptor = +{ + PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, + "mgmt.CheckQueryInconsist", + "CheckQueryInconsist", + "Mgmt__CheckQueryInconsist", + "mgmt", + sizeof(Mgmt__CheckQueryInconsist), + 4, + mgmt__check_query_inconsist__field_descriptors, + mgmt__check_query_inconsist__field_indices_by_name, + 1, mgmt__check_query_inconsist__number_ranges, + (ProtobufCMessageInit) mgmt__check_query_inconsist__init, + NULL,NULL,NULL /* reserved[123] */ +}; +static const ProtobufCFieldDescriptor mgmt__check_query_target__field_descriptors[5] = +{ + { + "rank", + 1, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_UINT32, + 0, /* quantifier_offset */ + offsetof(Mgmt__CheckQueryTarget, rank), + NULL, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "target", + 2, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_UINT32, + 0, /* quantifier_offset */ + offsetof(Mgmt__CheckQueryTarget, target), + NULL, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "status", + 3, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_ENUM, + 0, /* quantifier_offset */ + offsetof(Mgmt__CheckQueryTarget, status), + &chk__check_inst_status__descriptor, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "inconsistency", + 4, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_MESSAGE, + 0, /* quantifier_offset */ + offsetof(Mgmt__CheckQueryTarget, inconsistency), + &mgmt__check_query_inconsist__descriptor, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "time", + 5, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_MESSAGE, + 0, /* quantifier_offset */ + offsetof(Mgmt__CheckQueryTarget, time), + &mgmt__check_query_time__descriptor, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, +}; +static const unsigned mgmt__check_query_target__field_indices_by_name[] = { + 3, /* field[3] = inconsistency */ + 0, /* field[0] = rank */ + 2, /* field[2] = status */ + 1, /* field[1] = target */ + 4, /* field[4] = time */ +}; +static const ProtobufCIntRange mgmt__check_query_target__number_ranges[1 + 1] = +{ + { 1, 0 }, + { 0, 5 } +}; +const ProtobufCMessageDescriptor mgmt__check_query_target__descriptor = +{ + PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, + "mgmt.CheckQueryTarget", + "CheckQueryTarget", + "Mgmt__CheckQueryTarget", + "mgmt", + sizeof(Mgmt__CheckQueryTarget), + 5, + mgmt__check_query_target__field_descriptors, + mgmt__check_query_target__field_indices_by_name, + 1, mgmt__check_query_target__number_ranges, + (ProtobufCMessageInit) mgmt__check_query_target__init, + NULL,NULL,NULL /* reserved[123] */ +}; +static const ProtobufCFieldDescriptor mgmt__check_query_pool__field_descriptors[6] = +{ + { + "uuid", + 1, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_STRING, + 0, /* quantifier_offset */ + offsetof(Mgmt__CheckQueryPool, uuid), + NULL, + &protobuf_c_empty_string, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "status", + 2, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_ENUM, + 0, /* quantifier_offset */ + offsetof(Mgmt__CheckQueryPool, status), + &chk__check_pool_status__descriptor, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "phase", + 3, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_ENUM, + 0, /* quantifier_offset */ + offsetof(Mgmt__CheckQueryPool, phase), + &chk__check_scan_phase__descriptor, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "inconsistency", + 4, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_MESSAGE, + 0, /* quantifier_offset */ + offsetof(Mgmt__CheckQueryPool, inconsistency), + &mgmt__check_query_inconsist__descriptor, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "time", + 5, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_MESSAGE, + 0, /* quantifier_offset */ + offsetof(Mgmt__CheckQueryPool, time), + &mgmt__check_query_time__descriptor, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "targets", + 6, + PROTOBUF_C_LABEL_REPEATED, + PROTOBUF_C_TYPE_MESSAGE, + offsetof(Mgmt__CheckQueryPool, n_targets), + offsetof(Mgmt__CheckQueryPool, targets), + &mgmt__check_query_target__descriptor, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, +}; +static const unsigned mgmt__check_query_pool__field_indices_by_name[] = { + 3, /* field[3] = inconsistency */ + 2, /* field[2] = phase */ + 1, /* field[1] = status */ + 5, /* field[5] = targets */ + 4, /* field[4] = time */ + 0, /* field[0] = uuid */ +}; +static const ProtobufCIntRange mgmt__check_query_pool__number_ranges[1 + 1] = +{ + { 1, 0 }, + { 0, 6 } +}; +const ProtobufCMessageDescriptor mgmt__check_query_pool__descriptor = +{ + PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, + "mgmt.CheckQueryPool", + "CheckQueryPool", + "Mgmt__CheckQueryPool", + "mgmt", + sizeof(Mgmt__CheckQueryPool), + 6, + mgmt__check_query_pool__field_descriptors, + mgmt__check_query_pool__field_indices_by_name, + 1, mgmt__check_query_pool__number_ranges, + (ProtobufCMessageInit) mgmt__check_query_pool__init, + NULL,NULL,NULL /* reserved[123] */ +}; +static const ProtobufCFieldDescriptor mgmt__check_query_resp__field_descriptors[7] = +{ + { + "req_status", + 1, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_INT32, + 0, /* quantifier_offset */ + offsetof(Mgmt__CheckQueryResp, req_status), + NULL, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "ins_status", + 2, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_ENUM, + 0, /* quantifier_offset */ + offsetof(Mgmt__CheckQueryResp, ins_status), + &chk__check_inst_status__descriptor, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "ins_phase", + 3, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_ENUM, + 0, /* quantifier_offset */ + offsetof(Mgmt__CheckQueryResp, ins_phase), + &chk__check_scan_phase__descriptor, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "inconsistency", + 4, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_MESSAGE, + 0, /* quantifier_offset */ + offsetof(Mgmt__CheckQueryResp, inconsistency), + &mgmt__check_query_inconsist__descriptor, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "time", + 5, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_MESSAGE, + 0, /* quantifier_offset */ + offsetof(Mgmt__CheckQueryResp, time), + &mgmt__check_query_time__descriptor, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "pools", + 6, + PROTOBUF_C_LABEL_REPEATED, + PROTOBUF_C_TYPE_MESSAGE, + offsetof(Mgmt__CheckQueryResp, n_pools), + offsetof(Mgmt__CheckQueryResp, pools), + &mgmt__check_query_pool__descriptor, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "reports", + 7, + PROTOBUF_C_LABEL_REPEATED, + PROTOBUF_C_TYPE_MESSAGE, + offsetof(Mgmt__CheckQueryResp, n_reports), + offsetof(Mgmt__CheckQueryResp, reports), + &chk__check_report__descriptor, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, +}; +static const unsigned mgmt__check_query_resp__field_indices_by_name[] = { + 3, /* field[3] = inconsistency */ + 2, /* field[2] = ins_phase */ + 1, /* field[1] = ins_status */ + 5, /* field[5] = pools */ + 6, /* field[6] = reports */ + 0, /* field[0] = req_status */ + 4, /* field[4] = time */ +}; +static const ProtobufCIntRange mgmt__check_query_resp__number_ranges[1 + 1] = +{ + { 1, 0 }, + { 0, 7 } +}; +const ProtobufCMessageDescriptor mgmt__check_query_resp__descriptor = +{ + PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, + "mgmt.CheckQueryResp", + "CheckQueryResp", + "Mgmt__CheckQueryResp", + "mgmt", + sizeof(Mgmt__CheckQueryResp), + 7, + mgmt__check_query_resp__field_descriptors, + mgmt__check_query_resp__field_indices_by_name, + 1, mgmt__check_query_resp__number_ranges, + (ProtobufCMessageInit) mgmt__check_query_resp__init, + NULL,NULL,NULL /* reserved[123] */ +}; +static const ProtobufCFieldDescriptor mgmt__check_set_policy_req__field_descriptors[3] = +{ + { + "sys", + 1, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_STRING, + 0, /* quantifier_offset */ + offsetof(Mgmt__CheckSetPolicyReq, sys), + NULL, + &protobuf_c_empty_string, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "flags", + 2, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_UINT32, + 0, /* quantifier_offset */ + offsetof(Mgmt__CheckSetPolicyReq, flags), + NULL, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "policies", + 3, + PROTOBUF_C_LABEL_REPEATED, + PROTOBUF_C_TYPE_MESSAGE, + offsetof(Mgmt__CheckSetPolicyReq, n_policies), + offsetof(Mgmt__CheckSetPolicyReq, policies), + &mgmt__check_inconsist_policy__descriptor, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, +}; +static const unsigned mgmt__check_set_policy_req__field_indices_by_name[] = { + 1, /* field[1] = flags */ + 2, /* field[2] = policies */ + 0, /* field[0] = sys */ +}; +static const ProtobufCIntRange mgmt__check_set_policy_req__number_ranges[1 + 1] = +{ + { 1, 0 }, + { 0, 3 } +}; +const ProtobufCMessageDescriptor mgmt__check_set_policy_req__descriptor = +{ + PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, + "mgmt.CheckSetPolicyReq", + "CheckSetPolicyReq", + "Mgmt__CheckSetPolicyReq", + "mgmt", + sizeof(Mgmt__CheckSetPolicyReq), + 3, + mgmt__check_set_policy_req__field_descriptors, + mgmt__check_set_policy_req__field_indices_by_name, + 1, mgmt__check_set_policy_req__number_ranges, + (ProtobufCMessageInit) mgmt__check_set_policy_req__init, + NULL,NULL,NULL /* reserved[123] */ +}; +static const ProtobufCFieldDescriptor mgmt__check_prop_req__field_descriptors[1] = +{ + { + "sys", + 1, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_STRING, + 0, /* quantifier_offset */ + offsetof(Mgmt__CheckPropReq, sys), + NULL, + &protobuf_c_empty_string, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, +}; +static const unsigned mgmt__check_prop_req__field_indices_by_name[] = { + 0, /* field[0] = sys */ +}; +static const ProtobufCIntRange mgmt__check_prop_req__number_ranges[1 + 1] = +{ + { 1, 0 }, + { 0, 1 } +}; +const ProtobufCMessageDescriptor mgmt__check_prop_req__descriptor = +{ + PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, + "mgmt.CheckPropReq", + "CheckPropReq", + "Mgmt__CheckPropReq", + "mgmt", + sizeof(Mgmt__CheckPropReq), + 1, + mgmt__check_prop_req__field_descriptors, + mgmt__check_prop_req__field_indices_by_name, + 1, mgmt__check_prop_req__number_ranges, + (ProtobufCMessageInit) mgmt__check_prop_req__init, + NULL,NULL,NULL /* reserved[123] */ +}; +static const ProtobufCFieldDescriptor mgmt__check_prop_resp__field_descriptors[3] = +{ + { + "status", + 1, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_INT32, + 0, /* quantifier_offset */ + offsetof(Mgmt__CheckPropResp, status), + NULL, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "flags", + 2, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_UINT32, + 0, /* quantifier_offset */ + offsetof(Mgmt__CheckPropResp, flags), + NULL, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "policies", + 3, + PROTOBUF_C_LABEL_REPEATED, + PROTOBUF_C_TYPE_MESSAGE, + offsetof(Mgmt__CheckPropResp, n_policies), + offsetof(Mgmt__CheckPropResp, policies), + &mgmt__check_inconsist_policy__descriptor, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, +}; +static const unsigned mgmt__check_prop_resp__field_indices_by_name[] = { + 1, /* field[1] = flags */ + 2, /* field[2] = policies */ + 0, /* field[0] = status */ +}; +static const ProtobufCIntRange mgmt__check_prop_resp__number_ranges[1 + 1] = +{ + { 1, 0 }, + { 0, 3 } +}; +const ProtobufCMessageDescriptor mgmt__check_prop_resp__descriptor = +{ + PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, + "mgmt.CheckPropResp", + "CheckPropResp", + "Mgmt__CheckPropResp", + "mgmt", + sizeof(Mgmt__CheckPropResp), + 3, + mgmt__check_prop_resp__field_descriptors, + mgmt__check_prop_resp__field_indices_by_name, + 1, mgmt__check_prop_resp__number_ranges, + (ProtobufCMessageInit) mgmt__check_prop_resp__init, + NULL,NULL,NULL /* reserved[123] */ +}; +static const ProtobufCFieldDescriptor mgmt__check_get_policy_req__field_descriptors[3] = +{ + { + "sys", + 1, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_STRING, + 0, /* quantifier_offset */ + offsetof(Mgmt__CheckGetPolicyReq, sys), + NULL, + &protobuf_c_empty_string, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "classes", + 2, + PROTOBUF_C_LABEL_REPEATED, + PROTOBUF_C_TYPE_ENUM, + offsetof(Mgmt__CheckGetPolicyReq, n_classes), + offsetof(Mgmt__CheckGetPolicyReq, classes), + &chk__check_inconsist_class__descriptor, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "last_used", + 3, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_BOOL, + 0, /* quantifier_offset */ + offsetof(Mgmt__CheckGetPolicyReq, last_used), + NULL, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, +}; +static const unsigned mgmt__check_get_policy_req__field_indices_by_name[] = { + 1, /* field[1] = classes */ + 2, /* field[2] = last_used */ + 0, /* field[0] = sys */ +}; +static const ProtobufCIntRange mgmt__check_get_policy_req__number_ranges[1 + 1] = +{ + { 1, 0 }, + { 0, 3 } +}; +const ProtobufCMessageDescriptor mgmt__check_get_policy_req__descriptor = +{ + PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, + "mgmt.CheckGetPolicyReq", + "CheckGetPolicyReq", + "Mgmt__CheckGetPolicyReq", + "mgmt", + sizeof(Mgmt__CheckGetPolicyReq), + 3, + mgmt__check_get_policy_req__field_descriptors, + mgmt__check_get_policy_req__field_indices_by_name, + 1, mgmt__check_get_policy_req__number_ranges, + (ProtobufCMessageInit) mgmt__check_get_policy_req__init, + NULL,NULL,NULL /* reserved[123] */ +}; +static const ProtobufCFieldDescriptor mgmt__check_get_policy_resp__field_descriptors[3] = +{ + { + "status", + 1, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_INT32, + 0, /* quantifier_offset */ + offsetof(Mgmt__CheckGetPolicyResp, status), + NULL, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "flags", + 2, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_UINT32, + 0, /* quantifier_offset */ + offsetof(Mgmt__CheckGetPolicyResp, flags), + NULL, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "policies", + 3, + PROTOBUF_C_LABEL_REPEATED, + PROTOBUF_C_TYPE_MESSAGE, + offsetof(Mgmt__CheckGetPolicyResp, n_policies), + offsetof(Mgmt__CheckGetPolicyResp, policies), + &mgmt__check_inconsist_policy__descriptor, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, +}; +static const unsigned mgmt__check_get_policy_resp__field_indices_by_name[] = { + 1, /* field[1] = flags */ + 2, /* field[2] = policies */ + 0, /* field[0] = status */ +}; +static const ProtobufCIntRange mgmt__check_get_policy_resp__number_ranges[1 + 1] = +{ + { 1, 0 }, + { 0, 3 } +}; +const ProtobufCMessageDescriptor mgmt__check_get_policy_resp__descriptor = +{ + PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, + "mgmt.CheckGetPolicyResp", + "CheckGetPolicyResp", + "Mgmt__CheckGetPolicyResp", + "mgmt", + sizeof(Mgmt__CheckGetPolicyResp), + 3, + mgmt__check_get_policy_resp__field_descriptors, + mgmt__check_get_policy_resp__field_indices_by_name, + 1, mgmt__check_get_policy_resp__number_ranges, + (ProtobufCMessageInit) mgmt__check_get_policy_resp__init, + NULL,NULL,NULL /* reserved[123] */ +}; +static const ProtobufCFieldDescriptor mgmt__check_act_req__field_descriptors[4] = +{ + { + "sys", + 1, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_STRING, + 0, /* quantifier_offset */ + offsetof(Mgmt__CheckActReq, sys), + NULL, + &protobuf_c_empty_string, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "seq", + 2, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_UINT64, + 0, /* quantifier_offset */ + offsetof(Mgmt__CheckActReq, seq), + NULL, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "act", + 3, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_ENUM, + 0, /* quantifier_offset */ + offsetof(Mgmt__CheckActReq, act), + &chk__check_inconsist_action__descriptor, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "for_all", + 4, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_BOOL, + 0, /* quantifier_offset */ + offsetof(Mgmt__CheckActReq, for_all), + NULL, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, +}; +static const unsigned mgmt__check_act_req__field_indices_by_name[] = { + 2, /* field[2] = act */ + 3, /* field[3] = for_all */ + 1, /* field[1] = seq */ + 0, /* field[0] = sys */ +}; +static const ProtobufCIntRange mgmt__check_act_req__number_ranges[1 + 1] = +{ + { 1, 0 }, + { 0, 4 } +}; +const ProtobufCMessageDescriptor mgmt__check_act_req__descriptor = +{ + PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, + "mgmt.CheckActReq", + "CheckActReq", + "Mgmt__CheckActReq", + "mgmt", + sizeof(Mgmt__CheckActReq), + 4, + mgmt__check_act_req__field_descriptors, + mgmt__check_act_req__field_indices_by_name, + 1, mgmt__check_act_req__number_ranges, + (ProtobufCMessageInit) mgmt__check_act_req__init, + NULL,NULL,NULL /* reserved[123] */ +}; +static const ProtobufCFieldDescriptor mgmt__check_act_resp__field_descriptors[1] = +{ + { + "status", + 1, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_INT32, + 0, /* quantifier_offset */ + offsetof(Mgmt__CheckActResp, status), + NULL, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, +}; +static const unsigned mgmt__check_act_resp__field_indices_by_name[] = { + 0, /* field[0] = status */ +}; +static const ProtobufCIntRange mgmt__check_act_resp__number_ranges[1 + 1] = +{ + { 1, 0 }, + { 0, 1 } +}; +const ProtobufCMessageDescriptor mgmt__check_act_resp__descriptor = +{ + PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, + "mgmt.CheckActResp", + "CheckActResp", + "Mgmt__CheckActResp", + "mgmt", + sizeof(Mgmt__CheckActResp), + 1, + mgmt__check_act_resp__field_descriptors, + mgmt__check_act_resp__field_indices_by_name, + 1, mgmt__check_act_resp__number_ranges, + (ProtobufCMessageInit) mgmt__check_act_resp__init, + NULL,NULL,NULL /* reserved[123] */ +}; diff --git a/src/mgmt/check.pb-c.h b/src/mgmt/check.pb-c.h new file mode 100644 index 00000000000..2d756017592 --- /dev/null +++ b/src/mgmt/check.pb-c.h @@ -0,0 +1,1014 @@ +/* Generated by the protocol buffer compiler. DO NOT EDIT! */ +/* Generated from: check.proto */ + +#ifndef PROTOBUF_C_check_2eproto__INCLUDED +#define PROTOBUF_C_check_2eproto__INCLUDED + +#include + +PROTOBUF_C__BEGIN_DECLS + +#if PROTOBUF_C_VERSION_NUMBER < 1003000 +# error This file was generated by a newer version of protoc-c which is incompatible with your libprotobuf-c headers. Please update your headers. +#elif 1003000 < PROTOBUF_C_MIN_COMPILER_VERSION +# error This file was generated by an older version of protoc-c which is incompatible with your libprotobuf-c headers. Please regenerate this file with a newer version of protoc-c. +#endif + +#include "chk/chk.pb-c.h" + +typedef struct _Mgmt__CheckInconsistPolicy Mgmt__CheckInconsistPolicy; +typedef struct _Mgmt__CheckEnableReq Mgmt__CheckEnableReq; +typedef struct _Mgmt__CheckDisableReq Mgmt__CheckDisableReq; +typedef struct _Mgmt__CheckStartReq Mgmt__CheckStartReq; +typedef struct _Mgmt__CheckStartResp Mgmt__CheckStartResp; +typedef struct _Mgmt__CheckStopReq Mgmt__CheckStopReq; +typedef struct _Mgmt__CheckStopResp Mgmt__CheckStopResp; +typedef struct _Mgmt__CheckQueryReq Mgmt__CheckQueryReq; +typedef struct _Mgmt__CheckQueryTime Mgmt__CheckQueryTime; +typedef struct _Mgmt__CheckQueryInconsist Mgmt__CheckQueryInconsist; +typedef struct _Mgmt__CheckQueryTarget Mgmt__CheckQueryTarget; +typedef struct _Mgmt__CheckQueryPool Mgmt__CheckQueryPool; +typedef struct _Mgmt__CheckQueryResp Mgmt__CheckQueryResp; +typedef struct _Mgmt__CheckSetPolicyReq Mgmt__CheckSetPolicyReq; +typedef struct _Mgmt__CheckPropReq Mgmt__CheckPropReq; +typedef struct _Mgmt__CheckPropResp Mgmt__CheckPropResp; +typedef struct _Mgmt__CheckGetPolicyReq Mgmt__CheckGetPolicyReq; +typedef struct _Mgmt__CheckGetPolicyResp Mgmt__CheckGetPolicyResp; +typedef struct _Mgmt__CheckActReq Mgmt__CheckActReq; +typedef struct _Mgmt__CheckActResp Mgmt__CheckActResp; + + +/* --- enums --- */ + + +/* --- messages --- */ + +/* + * The pairs for kinds of inconsistency and related repair action. The control plane need to + * generate such policy array from some configuration file either via command line option or + * some default location, such as /etc/daos/daos_check.yml. Such policy arrge will be passed + * to DAOS engine when start check and cannot changed during check scanning, but can be list + * via 'dmg check prop' - see CheckPropResp. + */ +struct _Mgmt__CheckInconsistPolicy +{ + ProtobufCMessage base; + /* + * See CheckInconsistClass. + */ + Chk__CheckInconsistClass inconsist_cas; + /* + * See CheckInconsistAction. + */ + Chk__CheckInconsistAction inconsist_act; +}; +#define MGMT__CHECK_INCONSIST_POLICY__INIT \ + { PROTOBUF_C_MESSAGE_INIT (&mgmt__check_inconsist_policy__descriptor) \ + , CHK__CHECK_INCONSIST_CLASS__CIC_NONE, CHK__CHECK_INCONSIST_ACTION__CIA_DEFAULT } + + +struct _Mgmt__CheckEnableReq +{ + ProtobufCMessage base; + char *sys; +}; +#define MGMT__CHECK_ENABLE_REQ__INIT \ + { PROTOBUF_C_MESSAGE_INIT (&mgmt__check_enable_req__descriptor) \ + , (char *)protobuf_c_empty_string } + + +struct _Mgmt__CheckDisableReq +{ + ProtobufCMessage base; + char *sys; +}; +#define MGMT__CHECK_DISABLE_REQ__INIT \ + { PROTOBUF_C_MESSAGE_INIT (&mgmt__check_disable_req__descriptor) \ + , (char *)protobuf_c_empty_string } + + +/* + * For 'dmg check start'. + */ +struct _Mgmt__CheckStartReq +{ + ProtobufCMessage base; + /* + * DAOS system identifier. + */ + char *sys; + /* + * See CheckFlag. + */ + uint32_t flags; + /* + * The list of ranks to start DAOS check. Cannot be empty. + * The control plane will generate the ranks list and guarantee that any rank in the system + * is either will participate in check or has been excluded. Otherwise, partial ranks check + * may cause some unexpected and unrecoverable result unless the specified pool(s) does not + * exist on those missed rank(s). + */ + size_t n_ranks; + uint32_t *ranks; + /* + * UUID for the pools for which to start DAOS check. + * If empty, then start DAOS check for all pools in the system. + */ + size_t n_uuids; + char **uuids; + /* + * Policy array for handling inconsistency. + */ + size_t n_policies; + Mgmt__CheckInconsistPolicy **policies; +}; +#define MGMT__CHECK_START_REQ__INIT \ + { PROTOBUF_C_MESSAGE_INIT (&mgmt__check_start_req__descriptor) \ + , (char *)protobuf_c_empty_string, 0, 0,NULL, 0,NULL, 0,NULL } + + +/* + * CheckStartResp returns the result of check start. + */ +struct _Mgmt__CheckStartResp +{ + ProtobufCMessage base; + /* + * DAOS error code. + */ + int32_t status; +}; +#define MGMT__CHECK_START_RESP__INIT \ + { PROTOBUF_C_MESSAGE_INIT (&mgmt__check_start_resp__descriptor) \ + , 0 } + + +/* + * For 'dmg check stop'. + */ +struct _Mgmt__CheckStopReq +{ + ProtobufCMessage base; + /* + * DAOS system identifier. + */ + char *sys; + /* + * UUID for the pools for which to stop DAOS check. + * If empty, then stop check for all pools in the system. + */ + size_t n_uuids; + char **uuids; +}; +#define MGMT__CHECK_STOP_REQ__INIT \ + { PROTOBUF_C_MESSAGE_INIT (&mgmt__check_stop_req__descriptor) \ + , (char *)protobuf_c_empty_string, 0,NULL } + + +/* + * CheckStopResp returns the result of check stop. + */ +struct _Mgmt__CheckStopResp +{ + ProtobufCMessage base; + /* + * DAOS error code. + */ + int32_t status; +}; +#define MGMT__CHECK_STOP_RESP__INIT \ + { PROTOBUF_C_MESSAGE_INIT (&mgmt__check_stop_resp__descriptor) \ + , 0 } + + +/* + * For 'dmg check query'. + */ +struct _Mgmt__CheckQueryReq +{ + ProtobufCMessage base; + /* + * DAOS system identifier. + */ + char *sys; + /* + * UUID for the pools for which to query DAOS check. + * If empty, then query DAOS check for all pools in the system. + */ + size_t n_uuids; + char **uuids; + /* + * shallow query (findings only) + */ + protobuf_c_boolean shallow; + /* + * return findings with these sequences (implies shallow) + */ + size_t n_seqs; + uint64_t *seqs; +}; +#define MGMT__CHECK_QUERY_REQ__INIT \ + { PROTOBUF_C_MESSAGE_INIT (&mgmt__check_query_req__descriptor) \ + , (char *)protobuf_c_empty_string, 0,NULL, 0, 0,NULL } + + +/* + * Time information on related component: system, pool or target. + */ +struct _Mgmt__CheckQueryTime +{ + ProtobufCMessage base; + /* + * The time of check instance being started on the component. + */ + uint64_t start_time; + /* + * If the check instance is still running on the component, then it is the estimated + * remaining time to complete the check on the component. Otherwise, it is the time + * of the check instance completed, failed or stopped on the component. + */ + uint64_t misc_time; +}; +#define MGMT__CHECK_QUERY_TIME__INIT \ + { PROTOBUF_C_MESSAGE_INIT (&mgmt__check_query_time__descriptor) \ + , 0, 0 } + + +/* + * Inconsistency statistics on related component: system, pool or target. + */ +struct _Mgmt__CheckQueryInconsist +{ + ProtobufCMessage base; + /* + * The count of total found inconsistency on the component. + */ + uint32_t total; + /* + * The count of repaired inconsistency on the component. + */ + uint32_t repaired; + /* + * The count of ignored inconsistency on the component. + */ + uint32_t ignored; + /* + * The count of fail to repaired inconsistency on the component. + */ + uint32_t failed; +}; +#define MGMT__CHECK_QUERY_INCONSIST__INIT \ + { PROTOBUF_C_MESSAGE_INIT (&mgmt__check_query_inconsist__descriptor) \ + , 0, 0, 0, 0 } + + +/* + * Check query result for the pool shard on the target. + */ +struct _Mgmt__CheckQueryTarget +{ + ProtobufCMessage base; + /* + * Rank ID. + */ + uint32_t rank; + /* + * Target index in the rank. + */ + uint32_t target; + /* + * Check instance status on this target - see CheckInstStatus. + */ + Chk__CheckInstStatus status; + /* + * Inconsistency statistics during the phases range + * [CSP_DTX_RESYNC, CSP_AGGREGATION] for the pool shard on the target. + */ + Mgmt__CheckQueryInconsist *inconsistency; + /* + * Time information for the pool shard on the target if applicable. + */ + Mgmt__CheckQueryTime *time; +}; +#define MGMT__CHECK_QUERY_TARGET__INIT \ + { PROTOBUF_C_MESSAGE_INIT (&mgmt__check_query_target__descriptor) \ + , 0, 0, CHK__CHECK_INST_STATUS__CIS_INIT, NULL, NULL } + + +/* + * Check query result for the pool. + */ +struct _Mgmt__CheckQueryPool +{ + ProtobufCMessage base; + /* + * Pool UUID. + */ + char *uuid; + /* + * Pool status - see CheckPoolStatus. + */ + Chk__CheckPoolStatus status; + /* + * Scan phase - see CheckScanPhase. + */ + Chk__CheckScanPhase phase; + /* + * Inconsistency statistics during the phases range + * [CSP_POOL_MBS, CSP_CONT_CLEANUP] for the pool. + */ + Mgmt__CheckQueryInconsist *inconsistency; + /* + * Time information for the pool if applicable. + */ + Mgmt__CheckQueryTime *time; + /* + * Per target based query result for the phases since CSP_DTX_RESYNC. + */ + size_t n_targets; + Mgmt__CheckQueryTarget **targets; +}; +#define MGMT__CHECK_QUERY_POOL__INIT \ + { PROTOBUF_C_MESSAGE_INIT (&mgmt__check_query_pool__descriptor) \ + , (char *)protobuf_c_empty_string, CHK__CHECK_POOL_STATUS__CPS_UNCHECKED, CHK__CHECK_SCAN_PHASE__CSP_PREPARE, NULL, NULL, 0,NULL } + + +/* + * CheckQueryResp returns DAOS check status for required pool(s) or the whole system. + * Depend on the dmg command line option, the control plane needs to reorganize the query + * results with summary (of pool shards from targets) and different detailed information. + */ +struct _Mgmt__CheckQueryResp +{ + ProtobufCMessage base; + /* + * DAOS error code. + */ + int32_t req_status; + /* + * The whole check instance status depends on the each engine status: + * As long as one target is in CIS_RUNNING, then the instance is CIS_RUNNING. + * Otherwise, in turn with the status of CIS_FAILED, CIS_STOPPED, CIS_IMPLICATED, + * CIS_COMPLETED, CIS_PAUSED, CIS_INIT. + */ + Chk__CheckInstStatus ins_status; + /* + * Scan phase - see CheckScanPhase. Before moving to CSP_POOL_MBS, the check + * instance status is maintained on the check leader. And then multiple pools + * can be processed in parallel, so the instance phase for different pools may + * be different, see CheckQueryPool::phase. + */ + Chk__CheckScanPhase ins_phase; + /* + * Inconsistency statistics during the phases range + * [CSP_PREPARE, CSP_POOL_LIST] for the whole system. + */ + Mgmt__CheckQueryInconsist *inconsistency; + /* + * Time information for the whole system if applicable. + */ + Mgmt__CheckQueryTime *time; + /* + * Per pool based query result for the phases since CSP_POOL_MBS. + */ + size_t n_pools; + Mgmt__CheckQueryPool **pools; + /* + * Inconsistency reports to be displayed + */ + size_t n_reports; + Chk__CheckReport **reports; +}; +#define MGMT__CHECK_QUERY_RESP__INIT \ + { PROTOBUF_C_MESSAGE_INIT (&mgmt__check_query_resp__descriptor) \ + , 0, CHK__CHECK_INST_STATUS__CIS_INIT, CHK__CHECK_SCAN_PHASE__CSP_PREPARE, NULL, NULL, 0,NULL, 0,NULL } + + +/* + * For 'dmg check set-policy' + */ +struct _Mgmt__CheckSetPolicyReq +{ + ProtobufCMessage base; + /* + * DAOS system identifier. + */ + char *sys; + /* + * The flags when start check - see CheckFlag. + */ + uint32_t flags; + /* + * Inconsistency policy array. + */ + size_t n_policies; + Mgmt__CheckInconsistPolicy **policies; +}; +#define MGMT__CHECK_SET_POLICY_REQ__INIT \ + { PROTOBUF_C_MESSAGE_INIT (&mgmt__check_set_policy_req__descriptor) \ + , (char *)protobuf_c_empty_string, 0, 0,NULL } + + +/* + * To allow daos_server to query check leader properties + */ +struct _Mgmt__CheckPropReq +{ + ProtobufCMessage base; + /* + * DAOS system identifier. + */ + char *sys; +}; +#define MGMT__CHECK_PROP_REQ__INIT \ + { PROTOBUF_C_MESSAGE_INIT (&mgmt__check_prop_req__descriptor) \ + , (char *)protobuf_c_empty_string } + + +/* + * CheckPropResp returns the result of check prop and the properties when start check. + */ +struct _Mgmt__CheckPropResp +{ + ProtobufCMessage base; + /* + * DAOS error code. + */ + int32_t status; + /* + * The flags when start check - see CheckFlag. + */ + uint32_t flags; + /* + * Inconsistency policy array. + */ + size_t n_policies; + Mgmt__CheckInconsistPolicy **policies; +}; +#define MGMT__CHECK_PROP_RESP__INIT \ + { PROTOBUF_C_MESSAGE_INIT (&mgmt__check_prop_resp__descriptor) \ + , 0, 0, 0,NULL } + + +/* + * For 'dmg check get-policy' + */ +struct _Mgmt__CheckGetPolicyReq +{ + ProtobufCMessage base; + /* + * DAOS system identifier. + */ + char *sys; + size_t n_classes; + Chk__CheckInconsistClass *classes; + protobuf_c_boolean last_used; +}; +#define MGMT__CHECK_GET_POLICY_REQ__INIT \ + { PROTOBUF_C_MESSAGE_INIT (&mgmt__check_get_policy_req__descriptor) \ + , (char *)protobuf_c_empty_string, 0,NULL, 0 } + + +/* + * CheckGetPolicyResp returns the result of check prop and the properties when start check. + * NB: Dupe of CheckPropResp currently; may consolidate if they don't diverge. + */ +struct _Mgmt__CheckGetPolicyResp +{ + ProtobufCMessage base; + /* + * DAOS error code. + */ + int32_t status; + /* + * The flags when start check - see CheckFlag. + */ + uint32_t flags; + /* + * Inconsistency policy array. + */ + size_t n_policies; + Mgmt__CheckInconsistPolicy **policies; +}; +#define MGMT__CHECK_GET_POLICY_RESP__INIT \ + { PROTOBUF_C_MESSAGE_INIT (&mgmt__check_get_policy_resp__descriptor) \ + , 0, 0, 0,NULL } + + +/* + * For the admin's decision from DAOS check interaction. + */ +struct _Mgmt__CheckActReq +{ + ProtobufCMessage base; + /* + * DAOS system identifier. + */ + char *sys; + /* + * DAOS RAS event sequence - see RASEvent::extended_info::check_info::chk_inconsist_seq. + */ + uint64_t seq; + /* + * The decision from RASEvent::extended_info::check_info::chk_opts. + */ + Chk__CheckInconsistAction act; + /* + * The same action is applicable to the same type of inconsistency. + */ + protobuf_c_boolean for_all; +}; +#define MGMT__CHECK_ACT_REQ__INIT \ + { PROTOBUF_C_MESSAGE_INIT (&mgmt__check_act_req__descriptor) \ + , (char *)protobuf_c_empty_string, 0, CHK__CHECK_INCONSIST_ACTION__CIA_DEFAULT, 0 } + + +/* + * CheckActResp returns the result of executing admin's decision. + */ +struct _Mgmt__CheckActResp +{ + ProtobufCMessage base; + /* + * DAOS error code. + */ + int32_t status; +}; +#define MGMT__CHECK_ACT_RESP__INIT \ + { PROTOBUF_C_MESSAGE_INIT (&mgmt__check_act_resp__descriptor) \ + , 0 } + + +/* Mgmt__CheckInconsistPolicy methods */ +void mgmt__check_inconsist_policy__init + (Mgmt__CheckInconsistPolicy *message); +size_t mgmt__check_inconsist_policy__get_packed_size + (const Mgmt__CheckInconsistPolicy *message); +size_t mgmt__check_inconsist_policy__pack + (const Mgmt__CheckInconsistPolicy *message, + uint8_t *out); +size_t mgmt__check_inconsist_policy__pack_to_buffer + (const Mgmt__CheckInconsistPolicy *message, + ProtobufCBuffer *buffer); +Mgmt__CheckInconsistPolicy * + mgmt__check_inconsist_policy__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data); +void mgmt__check_inconsist_policy__free_unpacked + (Mgmt__CheckInconsistPolicy *message, + ProtobufCAllocator *allocator); +/* Mgmt__CheckEnableReq methods */ +void mgmt__check_enable_req__init + (Mgmt__CheckEnableReq *message); +size_t mgmt__check_enable_req__get_packed_size + (const Mgmt__CheckEnableReq *message); +size_t mgmt__check_enable_req__pack + (const Mgmt__CheckEnableReq *message, + uint8_t *out); +size_t mgmt__check_enable_req__pack_to_buffer + (const Mgmt__CheckEnableReq *message, + ProtobufCBuffer *buffer); +Mgmt__CheckEnableReq * + mgmt__check_enable_req__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data); +void mgmt__check_enable_req__free_unpacked + (Mgmt__CheckEnableReq *message, + ProtobufCAllocator *allocator); +/* Mgmt__CheckDisableReq methods */ +void mgmt__check_disable_req__init + (Mgmt__CheckDisableReq *message); +size_t mgmt__check_disable_req__get_packed_size + (const Mgmt__CheckDisableReq *message); +size_t mgmt__check_disable_req__pack + (const Mgmt__CheckDisableReq *message, + uint8_t *out); +size_t mgmt__check_disable_req__pack_to_buffer + (const Mgmt__CheckDisableReq *message, + ProtobufCBuffer *buffer); +Mgmt__CheckDisableReq * + mgmt__check_disable_req__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data); +void mgmt__check_disable_req__free_unpacked + (Mgmt__CheckDisableReq *message, + ProtobufCAllocator *allocator); +/* Mgmt__CheckStartReq methods */ +void mgmt__check_start_req__init + (Mgmt__CheckStartReq *message); +size_t mgmt__check_start_req__get_packed_size + (const Mgmt__CheckStartReq *message); +size_t mgmt__check_start_req__pack + (const Mgmt__CheckStartReq *message, + uint8_t *out); +size_t mgmt__check_start_req__pack_to_buffer + (const Mgmt__CheckStartReq *message, + ProtobufCBuffer *buffer); +Mgmt__CheckStartReq * + mgmt__check_start_req__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data); +void mgmt__check_start_req__free_unpacked + (Mgmt__CheckStartReq *message, + ProtobufCAllocator *allocator); +/* Mgmt__CheckStartResp methods */ +void mgmt__check_start_resp__init + (Mgmt__CheckStartResp *message); +size_t mgmt__check_start_resp__get_packed_size + (const Mgmt__CheckStartResp *message); +size_t mgmt__check_start_resp__pack + (const Mgmt__CheckStartResp *message, + uint8_t *out); +size_t mgmt__check_start_resp__pack_to_buffer + (const Mgmt__CheckStartResp *message, + ProtobufCBuffer *buffer); +Mgmt__CheckStartResp * + mgmt__check_start_resp__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data); +void mgmt__check_start_resp__free_unpacked + (Mgmt__CheckStartResp *message, + ProtobufCAllocator *allocator); +/* Mgmt__CheckStopReq methods */ +void mgmt__check_stop_req__init + (Mgmt__CheckStopReq *message); +size_t mgmt__check_stop_req__get_packed_size + (const Mgmt__CheckStopReq *message); +size_t mgmt__check_stop_req__pack + (const Mgmt__CheckStopReq *message, + uint8_t *out); +size_t mgmt__check_stop_req__pack_to_buffer + (const Mgmt__CheckStopReq *message, + ProtobufCBuffer *buffer); +Mgmt__CheckStopReq * + mgmt__check_stop_req__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data); +void mgmt__check_stop_req__free_unpacked + (Mgmt__CheckStopReq *message, + ProtobufCAllocator *allocator); +/* Mgmt__CheckStopResp methods */ +void mgmt__check_stop_resp__init + (Mgmt__CheckStopResp *message); +size_t mgmt__check_stop_resp__get_packed_size + (const Mgmt__CheckStopResp *message); +size_t mgmt__check_stop_resp__pack + (const Mgmt__CheckStopResp *message, + uint8_t *out); +size_t mgmt__check_stop_resp__pack_to_buffer + (const Mgmt__CheckStopResp *message, + ProtobufCBuffer *buffer); +Mgmt__CheckStopResp * + mgmt__check_stop_resp__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data); +void mgmt__check_stop_resp__free_unpacked + (Mgmt__CheckStopResp *message, + ProtobufCAllocator *allocator); +/* Mgmt__CheckQueryReq methods */ +void mgmt__check_query_req__init + (Mgmt__CheckQueryReq *message); +size_t mgmt__check_query_req__get_packed_size + (const Mgmt__CheckQueryReq *message); +size_t mgmt__check_query_req__pack + (const Mgmt__CheckQueryReq *message, + uint8_t *out); +size_t mgmt__check_query_req__pack_to_buffer + (const Mgmt__CheckQueryReq *message, + ProtobufCBuffer *buffer); +Mgmt__CheckQueryReq * + mgmt__check_query_req__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data); +void mgmt__check_query_req__free_unpacked + (Mgmt__CheckQueryReq *message, + ProtobufCAllocator *allocator); +/* Mgmt__CheckQueryTime methods */ +void mgmt__check_query_time__init + (Mgmt__CheckQueryTime *message); +size_t mgmt__check_query_time__get_packed_size + (const Mgmt__CheckQueryTime *message); +size_t mgmt__check_query_time__pack + (const Mgmt__CheckQueryTime *message, + uint8_t *out); +size_t mgmt__check_query_time__pack_to_buffer + (const Mgmt__CheckQueryTime *message, + ProtobufCBuffer *buffer); +Mgmt__CheckQueryTime * + mgmt__check_query_time__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data); +void mgmt__check_query_time__free_unpacked + (Mgmt__CheckQueryTime *message, + ProtobufCAllocator *allocator); +/* Mgmt__CheckQueryInconsist methods */ +void mgmt__check_query_inconsist__init + (Mgmt__CheckQueryInconsist *message); +size_t mgmt__check_query_inconsist__get_packed_size + (const Mgmt__CheckQueryInconsist *message); +size_t mgmt__check_query_inconsist__pack + (const Mgmt__CheckQueryInconsist *message, + uint8_t *out); +size_t mgmt__check_query_inconsist__pack_to_buffer + (const Mgmt__CheckQueryInconsist *message, + ProtobufCBuffer *buffer); +Mgmt__CheckQueryInconsist * + mgmt__check_query_inconsist__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data); +void mgmt__check_query_inconsist__free_unpacked + (Mgmt__CheckQueryInconsist *message, + ProtobufCAllocator *allocator); +/* Mgmt__CheckQueryTarget methods */ +void mgmt__check_query_target__init + (Mgmt__CheckQueryTarget *message); +size_t mgmt__check_query_target__get_packed_size + (const Mgmt__CheckQueryTarget *message); +size_t mgmt__check_query_target__pack + (const Mgmt__CheckQueryTarget *message, + uint8_t *out); +size_t mgmt__check_query_target__pack_to_buffer + (const Mgmt__CheckQueryTarget *message, + ProtobufCBuffer *buffer); +Mgmt__CheckQueryTarget * + mgmt__check_query_target__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data); +void mgmt__check_query_target__free_unpacked + (Mgmt__CheckQueryTarget *message, + ProtobufCAllocator *allocator); +/* Mgmt__CheckQueryPool methods */ +void mgmt__check_query_pool__init + (Mgmt__CheckQueryPool *message); +size_t mgmt__check_query_pool__get_packed_size + (const Mgmt__CheckQueryPool *message); +size_t mgmt__check_query_pool__pack + (const Mgmt__CheckQueryPool *message, + uint8_t *out); +size_t mgmt__check_query_pool__pack_to_buffer + (const Mgmt__CheckQueryPool *message, + ProtobufCBuffer *buffer); +Mgmt__CheckQueryPool * + mgmt__check_query_pool__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data); +void mgmt__check_query_pool__free_unpacked + (Mgmt__CheckQueryPool *message, + ProtobufCAllocator *allocator); +/* Mgmt__CheckQueryResp methods */ +void mgmt__check_query_resp__init + (Mgmt__CheckQueryResp *message); +size_t mgmt__check_query_resp__get_packed_size + (const Mgmt__CheckQueryResp *message); +size_t mgmt__check_query_resp__pack + (const Mgmt__CheckQueryResp *message, + uint8_t *out); +size_t mgmt__check_query_resp__pack_to_buffer + (const Mgmt__CheckQueryResp *message, + ProtobufCBuffer *buffer); +Mgmt__CheckQueryResp * + mgmt__check_query_resp__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data); +void mgmt__check_query_resp__free_unpacked + (Mgmt__CheckQueryResp *message, + ProtobufCAllocator *allocator); +/* Mgmt__CheckSetPolicyReq methods */ +void mgmt__check_set_policy_req__init + (Mgmt__CheckSetPolicyReq *message); +size_t mgmt__check_set_policy_req__get_packed_size + (const Mgmt__CheckSetPolicyReq *message); +size_t mgmt__check_set_policy_req__pack + (const Mgmt__CheckSetPolicyReq *message, + uint8_t *out); +size_t mgmt__check_set_policy_req__pack_to_buffer + (const Mgmt__CheckSetPolicyReq *message, + ProtobufCBuffer *buffer); +Mgmt__CheckSetPolicyReq * + mgmt__check_set_policy_req__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data); +void mgmt__check_set_policy_req__free_unpacked + (Mgmt__CheckSetPolicyReq *message, + ProtobufCAllocator *allocator); +/* Mgmt__CheckPropReq methods */ +void mgmt__check_prop_req__init + (Mgmt__CheckPropReq *message); +size_t mgmt__check_prop_req__get_packed_size + (const Mgmt__CheckPropReq *message); +size_t mgmt__check_prop_req__pack + (const Mgmt__CheckPropReq *message, + uint8_t *out); +size_t mgmt__check_prop_req__pack_to_buffer + (const Mgmt__CheckPropReq *message, + ProtobufCBuffer *buffer); +Mgmt__CheckPropReq * + mgmt__check_prop_req__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data); +void mgmt__check_prop_req__free_unpacked + (Mgmt__CheckPropReq *message, + ProtobufCAllocator *allocator); +/* Mgmt__CheckPropResp methods */ +void mgmt__check_prop_resp__init + (Mgmt__CheckPropResp *message); +size_t mgmt__check_prop_resp__get_packed_size + (const Mgmt__CheckPropResp *message); +size_t mgmt__check_prop_resp__pack + (const Mgmt__CheckPropResp *message, + uint8_t *out); +size_t mgmt__check_prop_resp__pack_to_buffer + (const Mgmt__CheckPropResp *message, + ProtobufCBuffer *buffer); +Mgmt__CheckPropResp * + mgmt__check_prop_resp__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data); +void mgmt__check_prop_resp__free_unpacked + (Mgmt__CheckPropResp *message, + ProtobufCAllocator *allocator); +/* Mgmt__CheckGetPolicyReq methods */ +void mgmt__check_get_policy_req__init + (Mgmt__CheckGetPolicyReq *message); +size_t mgmt__check_get_policy_req__get_packed_size + (const Mgmt__CheckGetPolicyReq *message); +size_t mgmt__check_get_policy_req__pack + (const Mgmt__CheckGetPolicyReq *message, + uint8_t *out); +size_t mgmt__check_get_policy_req__pack_to_buffer + (const Mgmt__CheckGetPolicyReq *message, + ProtobufCBuffer *buffer); +Mgmt__CheckGetPolicyReq * + mgmt__check_get_policy_req__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data); +void mgmt__check_get_policy_req__free_unpacked + (Mgmt__CheckGetPolicyReq *message, + ProtobufCAllocator *allocator); +/* Mgmt__CheckGetPolicyResp methods */ +void mgmt__check_get_policy_resp__init + (Mgmt__CheckGetPolicyResp *message); +size_t mgmt__check_get_policy_resp__get_packed_size + (const Mgmt__CheckGetPolicyResp *message); +size_t mgmt__check_get_policy_resp__pack + (const Mgmt__CheckGetPolicyResp *message, + uint8_t *out); +size_t mgmt__check_get_policy_resp__pack_to_buffer + (const Mgmt__CheckGetPolicyResp *message, + ProtobufCBuffer *buffer); +Mgmt__CheckGetPolicyResp * + mgmt__check_get_policy_resp__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data); +void mgmt__check_get_policy_resp__free_unpacked + (Mgmt__CheckGetPolicyResp *message, + ProtobufCAllocator *allocator); +/* Mgmt__CheckActReq methods */ +void mgmt__check_act_req__init + (Mgmt__CheckActReq *message); +size_t mgmt__check_act_req__get_packed_size + (const Mgmt__CheckActReq *message); +size_t mgmt__check_act_req__pack + (const Mgmt__CheckActReq *message, + uint8_t *out); +size_t mgmt__check_act_req__pack_to_buffer + (const Mgmt__CheckActReq *message, + ProtobufCBuffer *buffer); +Mgmt__CheckActReq * + mgmt__check_act_req__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data); +void mgmt__check_act_req__free_unpacked + (Mgmt__CheckActReq *message, + ProtobufCAllocator *allocator); +/* Mgmt__CheckActResp methods */ +void mgmt__check_act_resp__init + (Mgmt__CheckActResp *message); +size_t mgmt__check_act_resp__get_packed_size + (const Mgmt__CheckActResp *message); +size_t mgmt__check_act_resp__pack + (const Mgmt__CheckActResp *message, + uint8_t *out); +size_t mgmt__check_act_resp__pack_to_buffer + (const Mgmt__CheckActResp *message, + ProtobufCBuffer *buffer); +Mgmt__CheckActResp * + mgmt__check_act_resp__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data); +void mgmt__check_act_resp__free_unpacked + (Mgmt__CheckActResp *message, + ProtobufCAllocator *allocator); +/* --- per-message closures --- */ + +typedef void (*Mgmt__CheckInconsistPolicy_Closure) + (const Mgmt__CheckInconsistPolicy *message, + void *closure_data); +typedef void (*Mgmt__CheckEnableReq_Closure) + (const Mgmt__CheckEnableReq *message, + void *closure_data); +typedef void (*Mgmt__CheckDisableReq_Closure) + (const Mgmt__CheckDisableReq *message, + void *closure_data); +typedef void (*Mgmt__CheckStartReq_Closure) + (const Mgmt__CheckStartReq *message, + void *closure_data); +typedef void (*Mgmt__CheckStartResp_Closure) + (const Mgmt__CheckStartResp *message, + void *closure_data); +typedef void (*Mgmt__CheckStopReq_Closure) + (const Mgmt__CheckStopReq *message, + void *closure_data); +typedef void (*Mgmt__CheckStopResp_Closure) + (const Mgmt__CheckStopResp *message, + void *closure_data); +typedef void (*Mgmt__CheckQueryReq_Closure) + (const Mgmt__CheckQueryReq *message, + void *closure_data); +typedef void (*Mgmt__CheckQueryTime_Closure) + (const Mgmt__CheckQueryTime *message, + void *closure_data); +typedef void (*Mgmt__CheckQueryInconsist_Closure) + (const Mgmt__CheckQueryInconsist *message, + void *closure_data); +typedef void (*Mgmt__CheckQueryTarget_Closure) + (const Mgmt__CheckQueryTarget *message, + void *closure_data); +typedef void (*Mgmt__CheckQueryPool_Closure) + (const Mgmt__CheckQueryPool *message, + void *closure_data); +typedef void (*Mgmt__CheckQueryResp_Closure) + (const Mgmt__CheckQueryResp *message, + void *closure_data); +typedef void (*Mgmt__CheckSetPolicyReq_Closure) + (const Mgmt__CheckSetPolicyReq *message, + void *closure_data); +typedef void (*Mgmt__CheckPropReq_Closure) + (const Mgmt__CheckPropReq *message, + void *closure_data); +typedef void (*Mgmt__CheckPropResp_Closure) + (const Mgmt__CheckPropResp *message, + void *closure_data); +typedef void (*Mgmt__CheckGetPolicyReq_Closure) + (const Mgmt__CheckGetPolicyReq *message, + void *closure_data); +typedef void (*Mgmt__CheckGetPolicyResp_Closure) + (const Mgmt__CheckGetPolicyResp *message, + void *closure_data); +typedef void (*Mgmt__CheckActReq_Closure) + (const Mgmt__CheckActReq *message, + void *closure_data); +typedef void (*Mgmt__CheckActResp_Closure) + (const Mgmt__CheckActResp *message, + void *closure_data); + +/* --- services --- */ + + +/* --- descriptors --- */ + +extern const ProtobufCMessageDescriptor mgmt__check_inconsist_policy__descriptor; +extern const ProtobufCMessageDescriptor mgmt__check_enable_req__descriptor; +extern const ProtobufCMessageDescriptor mgmt__check_disable_req__descriptor; +extern const ProtobufCMessageDescriptor mgmt__check_start_req__descriptor; +extern const ProtobufCMessageDescriptor mgmt__check_start_resp__descriptor; +extern const ProtobufCMessageDescriptor mgmt__check_stop_req__descriptor; +extern const ProtobufCMessageDescriptor mgmt__check_stop_resp__descriptor; +extern const ProtobufCMessageDescriptor mgmt__check_query_req__descriptor; +extern const ProtobufCMessageDescriptor mgmt__check_query_time__descriptor; +extern const ProtobufCMessageDescriptor mgmt__check_query_inconsist__descriptor; +extern const ProtobufCMessageDescriptor mgmt__check_query_target__descriptor; +extern const ProtobufCMessageDescriptor mgmt__check_query_pool__descriptor; +extern const ProtobufCMessageDescriptor mgmt__check_query_resp__descriptor; +extern const ProtobufCMessageDescriptor mgmt__check_set_policy_req__descriptor; +extern const ProtobufCMessageDescriptor mgmt__check_prop_req__descriptor; +extern const ProtobufCMessageDescriptor mgmt__check_prop_resp__descriptor; +extern const ProtobufCMessageDescriptor mgmt__check_get_policy_req__descriptor; +extern const ProtobufCMessageDescriptor mgmt__check_get_policy_resp__descriptor; +extern const ProtobufCMessageDescriptor mgmt__check_act_req__descriptor; +extern const ProtobufCMessageDescriptor mgmt__check_act_resp__descriptor; + +PROTOBUF_C__END_DECLS + + +#endif /* PROTOBUF_C_check_2eproto__INCLUDED */ diff --git a/src/mgmt/drpc_internal.h b/src/mgmt/drpc_internal.h index 4f612ddad33..e47f992e6a5 100644 --- a/src/mgmt/drpc_internal.h +++ b/src/mgmt/drpc_internal.h @@ -104,4 +104,19 @@ ds_mgmt_drpc_cont_set_owner(Drpc__Call *drpc_req, Drpc__Response *drpc_resp); void ds_mgmt_drpc_group_update(Drpc__Call *drpc_req, Drpc__Response *drpc_resp); +void +ds_mgmt_drpc_check_start(Drpc__Call *drpc_req, Drpc__Response *drpc_resp); + +void +ds_mgmt_drpc_check_stop(Drpc__Call *drpc_req, Drpc__Response *drpc_resp); + +void +ds_mgmt_drpc_check_query(Drpc__Call *drpc_req, Drpc__Response *drpc_resp); + +void +ds_mgmt_drpc_check_prop(Drpc__Call *drpc_req, Drpc__Response *drpc_resp); + +void +ds_mgmt_drpc_check_act(Drpc__Call *drpc_req, Drpc__Response *drpc_resp); + #endif /* __MGMT_DRPC_INTERNAL_H__ */ diff --git a/src/mgmt/rpc.c b/src/mgmt/rpc.c index 5be9912281a..db95722b8c9 100644 --- a/src/mgmt/rpc.c +++ b/src/mgmt/rpc.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2016-2021 Intel Corporation. + * (C) Copyright 2016-2022 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -44,6 +44,9 @@ CRT_RPC_DEFINE(mgmt_tgt_map_update, DAOS_ISEQ_MGMT_TGT_MAP_UPDATE, CRT_RPC_DEFINE(mgmt_get_bs_state, DAOS_ISEQ_MGMT_GET_BS_STATE, DAOS_OSEQ_MGMT_GET_BS_STATE) +CRT_RPC_DEFINE(mgmt_tgt_shard_destroy, DAOS_ISEQ_MGMT_TGT_SHARD_DESTROY, + DAOS_OSEQ_MGMT_TGT_SHARD_DESTROY) + /* Define for cont_rpcs[] array population below. * See MGMT_PROTO_*_RPC_LIST macro definition */ diff --git a/src/mgmt/rpc.h b/src/mgmt/rpc.h index 5ea43be32a1..cb832ec6455 100644 --- a/src/mgmt/rpc.h +++ b/src/mgmt/rpc.h @@ -18,7 +18,7 @@ * These are for daos_rpc::dr_opc and DAOS_RPC_OPCODE(opc, ...) rather than * crt_req_create(..., opc, ...). See daos_rpc.h. */ -#define DAOS_MGMT_VERSION 2 +#define DAOS_MGMT_VERSION 3 /* LIST of internal RPCS in form of: * OPCODE, flags, FMT, handler, corpc_hdlr, */ @@ -39,7 +39,9 @@ X(MGMT_TGT_PROFILE, 0, &CQF_mgmt_profile, ds_mgmt_tgt_profile_hdlr, NULL) \ X(MGMT_TGT_MAP_UPDATE, 0, &CQF_mgmt_tgt_map_update, ds_mgmt_hdlr_tgt_map_update, \ &ds_mgmt_hdlr_tgt_map_update_co_ops) \ - X(MGMT_TGT_MARK, 0, &CQF_mgmt_mark, ds_mgmt_tgt_mark_hdlr, NULL) + X(MGMT_TGT_MARK, 0, &CQF_mgmt_mark, ds_mgmt_tgt_mark_hdlr, NULL) \ + X(MGMT_TGT_SHARD_DESTROY, 0, &CQF_mgmt_tgt_shard_destroy, ds_mgmt_hdlr_tgt_shard_destroy, \ + NULL) /* Define for RPC enum population below */ #define X(a, ...) a, @@ -187,4 +189,16 @@ CRT_RPC_DECLARE(mgmt_mark, DAOS_ISEQ_MGMT_MARK, DAOS_OSEQ_MGMT_MARK) CRT_RPC_DECLARE(mgmt_get_bs_state, DAOS_ISEQ_MGMT_GET_BS_STATE, DAOS_OSEQ_MGMT_GET_BS_STATE) +#define DAOS_ISEQ_MGMT_TGT_SHARD_DESTROY /* input fields */ \ + ((uuid_t) (tsdi_pool_uuid) CRT_VAR) \ + ((int32_t) (tsdi_shard_idx) CRT_VAR) \ + ((uint32_t) (tsdi_padding) CRT_VAR) + +#define DAOS_OSEQ_MGMT_TGT_SHARD_DESTROY /* output fields */ \ + ((int32_t) (tsdo_rc) CRT_VAR) \ + ((uint32_t) (tsdo_padding) CRT_VAR) + +CRT_RPC_DECLARE(mgmt_tgt_shard_destroy, DAOS_ISEQ_MGMT_TGT_SHARD_DESTROY, + DAOS_OSEQ_MGMT_TGT_SHARD_DESTROY) + #endif /* __MGMT_RPC_H__ */ diff --git a/src/mgmt/srv.c b/src/mgmt/srv.c index 2293bfb0355..1c55a8230a5 100644 --- a/src/mgmt/srv.c +++ b/src/mgmt/srv.c @@ -150,6 +150,21 @@ process_drpc_request(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) case DRPC_METHOD_MGMT_LED_MANAGE: ds_mgmt_drpc_dev_manage_led(drpc_req, drpc_resp); break; + case DRPC_METHOD_MGMT_CHK_START: + ds_mgmt_drpc_check_start(drpc_req, drpc_resp); + break; + case DRPC_METHOD_MGMT_CHK_STOP: + ds_mgmt_drpc_check_stop(drpc_req, drpc_resp); + break; + case DRPC_METHOD_MGMT_CHK_QUERY: + ds_mgmt_drpc_check_query(drpc_req, drpc_resp); + break; + case DRPC_METHOD_MGMT_CHK_PROP: + ds_mgmt_drpc_check_prop(drpc_req, drpc_resp); + break; + case DRPC_METHOD_MGMT_CHK_ACT: + ds_mgmt_drpc_check_act(drpc_req, drpc_resp); + break; default: drpc_resp->status = DRPC__STATUS__UNKNOWN_METHOD; D_ERROR("Unknown method\n"); diff --git a/src/mgmt/srv_chk.c b/src/mgmt/srv_chk.c new file mode 100644 index 00000000000..a9c1ce1c3b0 --- /dev/null +++ b/src/mgmt/srv_chk.c @@ -0,0 +1,128 @@ +/** + * (C) Copyright 2022 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ +/* + * ds_mgmt: Check Methods + */ +#define D_LOGFAC DD_FAC(mgmt) + +#include +#include +#include + +#include "srv_internal.h" + +static int +ds_mgmt_chk_parse_uuid(int pool_nr, char **pools, uuid_t **p_uuids) +{ + uuid_t *uuids = NULL; + int rc = 0; + int i; + + if (pool_nr != 0) { + D_ALLOC_ARRAY(uuids, pool_nr); + if (uuids == NULL) + D_GOTO(out, rc = -DER_NOMEM); + + for (i = 0; i < pool_nr; i++) { + rc = uuid_parse(pools[i], uuids[i]); + if (rc != 0) { + D_ERROR("Failed to parse pool %s: "DF_RC"\n", pools[i], DP_RC(rc)); + D_GOTO(out, rc); + } + } + } + +out: + if (rc != 0) + D_FREE(uuids); + else + *p_uuids = uuids; + + return rc; +} + +int +ds_mgmt_check_start(uint32_t rank_nr, d_rank_t *ranks, uint32_t policy_nr, + Mgmt__CheckInconsistPolicy **policies, int32_t pool_nr, char **pools, + uint32_t flags, int32_t phase) +{ + uuid_t *uuids = NULL; + struct chk_policy *ply = NULL; + int rc = 0; + int i; + + rc = ds_mgmt_chk_parse_uuid(pool_nr, pools, &uuids); + if (rc != 0) + goto out; + + if (policy_nr != 0) { + D_ALLOC_ARRAY(ply, policy_nr); + if (ply == NULL) + D_GOTO(out, rc = -DER_NOMEM); + + for (i = 0; i < policy_nr; i++) { + ply[i].cp_class = policies[i]->inconsist_cas; + ply[i].cp_action = policies[i]->inconsist_act; + } + } + + rc = chk_leader_start(rank_nr, ranks, policy_nr, ply, pool_nr, uuids, flags, phase); + +out: + D_FREE(uuids); + D_FREE(ply); + + return rc; +} + +int +ds_mgmt_check_stop(int32_t pool_nr, char **pools) +{ + uuid_t *uuids = NULL; + int rc; + + rc = ds_mgmt_chk_parse_uuid(pool_nr, pools, &uuids); + if (rc == 0) { + rc = chk_leader_stop(pool_nr, uuids); + D_FREE(uuids); + } + + return rc; +} + +int +ds_mgmt_check_query(int32_t pool_nr, char **pools, chk_query_head_cb_t head_cb, + chk_query_pool_cb_t pool_cb, void *buf) +{ + uuid_t *uuids = NULL; + int rc; + + rc = ds_mgmt_chk_parse_uuid(pool_nr, pools, &uuids); + if (rc == 0) { + rc = chk_leader_query(pool_nr, uuids, head_cb, pool_cb, buf); + D_FREE(uuids); + } + + return rc; +} + +int +ds_mgmt_check_prop(chk_prop_cb_t prop_cb, void *buf) +{ + return chk_leader_prop(prop_cb, buf); +} + +int +ds_mgmt_check_act(uint64_t seq, uint32_t act, bool for_all) +{ + return chk_leader_act(seq, act, for_all); +} + +bool +ds_mgmt_check_enabled(void) +{ + return engine_in_check(); +} diff --git a/src/mgmt/srv_drpc.c b/src/mgmt/srv_drpc.c index 620c8b954ee..a840aec93f2 100644 --- a/src/mgmt/srv_drpc.c +++ b/src/mgmt/srv_drpc.c @@ -20,6 +20,7 @@ #include "pool.pb-c.h" #include "cont.pb-c.h" #include "server.pb-c.h" +#include "check.pb-c.h" #include "srv_internal.h" #include "drpc_internal.h" @@ -2501,3 +2502,476 @@ ds_mgmt_drpc_cont_set_owner(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) mgmt__cont_set_owner_req__free_unpacked(req, &alloc.alloc); } + +/* + * NOTE: It is the control plane to choose the check leader and generate the rank list. + * There are some requirements for the rank list: + * 1. There are no repeated ranks in the list. + * 2. Better to sort ranks in the list that will much speedup searching rank in the list. + */ +void +ds_mgmt_drpc_check_start(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) +{ + struct drpc_alloc alloc = PROTO_ALLOCATOR_INIT(alloc); + Mgmt__CheckStartReq *req = NULL; + Mgmt__CheckStartResp resp = MGMT__CHECK_START_RESP__INIT; + uint8_t *body; + size_t len; + int rc = 0; + + if (!ds_mgmt_check_enabled()) { + D_ERROR("Not in check mode\n"); + drpc_resp->status = DRPC__STATUS__UNKNOWN_MODULE; + return; + } + + req = mgmt__check_start_req__unpack(&alloc.alloc, drpc_req->body.len, drpc_req->body.data); + if (alloc.oom || req == NULL) { + D_ERROR("Failed to unpack req (start check)\n"); + drpc_resp->status = DRPC__STATUS__FAILED_UNMARSHAL_PAYLOAD; + return; + } + + D_INFO("Received request to start check\n"); + + rc = ds_mgmt_check_start(req->n_ranks, req->ranks, req->n_policies, req->policies, + req->n_uuids, req->uuids, req->flags, -1 /* phase */); + if (rc < 0) + D_ERROR("Failed to start check: "DF_RC"\n", DP_RC(rc)); + + resp.status = rc; + len = mgmt__check_start_resp__get_packed_size(&resp); + D_ALLOC(body, len); + if (body == NULL) { + D_ERROR("Failed to allocate response body (start check)\n"); + drpc_resp->status = DRPC__STATUS__FAILED_MARSHAL; + } else { + mgmt__check_start_resp__pack(&resp, body); + drpc_resp->body.len = len; + drpc_resp->body.data = body; + } + + mgmt__check_start_req__free_unpacked(req, &alloc.alloc); +} + +/* + * It is the control plane's duty to guarantee that if the check leader is still available, + * then the CHK_STOP dRPC needs to be sent to the check leader. + */ +void +ds_mgmt_drpc_check_stop(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) +{ + struct drpc_alloc alloc = PROTO_ALLOCATOR_INIT(alloc); + Mgmt__CheckStopReq *req = NULL; + Mgmt__CheckStopResp resp = MGMT__CHECK_STOP_RESP__INIT; + uint8_t *body; + size_t len; + int rc = 0; + + if (!ds_mgmt_check_enabled()) { + D_ERROR("Not in check mode\n"); + drpc_resp->status = DRPC__STATUS__UNKNOWN_MODULE; + return; + } + + req = mgmt__check_stop_req__unpack(&alloc.alloc, drpc_req->body.len, drpc_req->body.data); + if (alloc.oom || req == NULL) { + D_ERROR("Failed to unpack req (stop check)\n"); + drpc_resp->status = DRPC__STATUS__FAILED_UNMARSHAL_PAYLOAD; + return; + } + + D_INFO("Received request to stop check\n"); + + rc = ds_mgmt_check_stop(req->n_uuids, req->uuids); + if (rc != 0) + D_ERROR("Failed to stop check: "DF_RC"\n", DP_RC(rc)); + + resp.status = rc; + len = mgmt__check_stop_resp__get_packed_size(&resp); + D_ALLOC(body, len); + if (body == NULL) { + D_ERROR("Failed to allocate response body (stop check)\n"); + drpc_resp->status = DRPC__STATUS__FAILED_MARSHAL; + } else { + mgmt__check_stop_resp__pack(&resp, body); + drpc_resp->body.len = len; + drpc_resp->body.data = body; + } + + mgmt__check_stop_req__free_unpacked(req, &alloc.alloc); +} + +static void +ds_chk_query_free(Mgmt__CheckQueryResp *resp) +{ + Mgmt__CheckQueryPool *pool; + Mgmt__CheckQueryTarget *target; + int i; + int j; + + D_FREE(resp->inconsistency); + D_FREE(resp->time); + + if (resp->pools != NULL) { + for (i = 0; i < resp->n_pools; i++) { + pool = resp->pools[i]; + if (pool == NULL) + break; + + D_FREE(pool->uuid); + D_FREE(pool->inconsistency); + D_FREE(pool->time); + if (pool->targets != NULL) { + for (j = 0; j < pool->n_targets; j++) { + target = pool->targets[j]; + if (target == NULL) + break; + + D_FREE(target->inconsistency); + D_FREE(target->time); + + D_FREE(target); + } + + D_FREE(pool->targets); + } + + D_FREE(pool); + } + + D_FREE(resp->pools); + } +} + +static int +ds_chk_copy_inconsistency(Mgmt__CheckQueryInconsist **dst, struct chk_statistics *src) +{ + int rc = 0; + + D_ALLOC_PTR(*dst); + if (*dst == NULL) + D_GOTO(out, rc = -DER_NOMEM); + + mgmt__check_query_inconsist__init(*dst); + (*dst)->total = src->cs_total; + (*dst)->repaired = src->cs_repaired; + (*dst)->ignored = src->cs_ignored; + (*dst)->failed = src->cs_failed; + +out: + return rc; +} + +static int +ds_chk_copy_time(Mgmt__CheckQueryTime **dst, struct chk_time *src) +{ + int rc = 0; + + D_ALLOC_PTR(*dst); + if (*dst == NULL) + D_GOTO(out, rc = -DER_NOMEM); + + mgmt__check_query_time__init(*dst); + (*dst)->start_time = src->ct_start_time; + (*dst)->misc_time = src->ct_stop_time; + +out: + return rc; +} + +static int +ds_chk_query_head_cb(uint32_t ins_status, uint32_t ins_phase, struct chk_statistics *inconsistency, + struct chk_time *time, size_t n_pools, void *buf) +{ + Mgmt__CheckQueryResp *resp = buf; + int rc = 0; + + resp->ins_status = ins_status; + resp->ins_phase = ins_phase; + + rc = ds_chk_copy_inconsistency(&resp->inconsistency, inconsistency); + if (resp->inconsistency == NULL) + D_GOTO(out, rc = -DER_NOMEM); + + rc = ds_chk_copy_time(&resp->time, time); + if (resp->time == NULL) + D_GOTO(out, rc = -DER_NOMEM); + + D_ALLOC_ARRAY(resp->pools, n_pools); + if (resp->pools == NULL) + D_GOTO(out, rc = -DER_NOMEM); + + resp->n_pools = n_pools; + +out: + if (rc != 0) + ds_chk_query_free(resp); + + return rc; +} + +static int +ds_chk_query_pool_cb(struct chk_query_pool_shard *shard, uint32_t idx, void *buf) +{ + Mgmt__CheckQueryResp *resp = buf; + Mgmt__CheckQueryPool *pool; + Mgmt__CheckQueryTarget *target; + int rc; + int i; + + D_ALLOC_PTR(pool); + if (pool == NULL) + D_GOTO(out, rc = -DER_NOMEM); + + mgmt__check_query_pool__init(pool); + resp->pools[idx] = pool; + D_ASPRINTF(pool->uuid, DF_UUIDF, DP_UUID(shard->cqps_uuid)); + if (pool->uuid == NULL) + D_GOTO(out, rc = -DER_NOMEM); + + pool->status = shard->cqps_status; + pool->phase = shard->cqps_phase; + rc = ds_chk_copy_inconsistency(&pool->inconsistency, &shard->cqps_statistics); + if (pool->inconsistency == NULL) + D_GOTO(out, rc = -DER_NOMEM); + + rc = ds_chk_copy_time(&pool->time, &shard->cqps_time); + if (pool->time == NULL) + D_GOTO(out, rc = -DER_NOMEM); + + D_ALLOC_ARRAY(pool->targets, shard->cqps_target_nr); + if (pool->targets == NULL) + D_GOTO(out, rc = -DER_NOMEM); + + pool->n_targets = shard->cqps_target_nr; + for (i = 0; i < shard->cqps_target_nr; i++) { + D_ALLOC_PTR(target); + if (target == NULL) + D_GOTO(out, rc = -DER_NOMEM); + + mgmt__check_query_target__init(target); + pool->targets[i] = target; + target->rank = shard->cqps_targets[i].cqt_rank; + target->target = shard->cqps_targets[i].cqt_tgt; + target->status = shard->cqps_targets[i].cqt_ins_status; + rc = ds_chk_copy_inconsistency(&target->inconsistency, + &shard->cqps_targets[i].cqt_statistics); + if (target->inconsistency == NULL) + D_GOTO(out, rc = -DER_NOMEM); + + rc = ds_chk_copy_time(&target->time, &shard->cqps_targets[i].cqt_time); + if (target->time == NULL) + D_GOTO(out, rc = -DER_NOMEM); + } + +out: + if (rc != 0) + ds_chk_query_free(resp); + + return rc; +} + +/* + * NOTE: One pool may have M pool shards on M daos engines. Each of them has each own status and + * summary in the qurey result. They have the same UUID and contiguous each other. Control + * plane can decide how to show them to the admin based on the qurey option. + * + * Similarly, each pool shard may have N vos target on the engine. Then there will be M * N + * vos targets for the whole pool. Each of them has each own check summary in qurey result. + * It is the control plane's duty to re-organize related result before showing to the admin. + * + * If some required pool is absence in the qurey result, then means that it does not exist. + * + * On the other hand, it is the control plane's duty to guarantee that if the check leader + * is still available, the CHK_QUERY dRPC needs to be sent to the check leader. Otherwise, + * the query result may be not inaccurate. + */ +void +ds_mgmt_drpc_check_query(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) +{ + struct drpc_alloc alloc = PROTO_ALLOCATOR_INIT(alloc); + Mgmt__CheckQueryReq *req = NULL; + Mgmt__CheckQueryResp resp = MGMT__CHECK_QUERY_RESP__INIT; + uuid_t *pools = NULL; + uint8_t *body; + size_t len; + int rc = 0; + + if (!ds_mgmt_check_enabled()) { + D_ERROR("Not in check mode\n"); + drpc_resp->status = DRPC__STATUS__UNKNOWN_MODULE; + return; + } + + req = mgmt__check_query_req__unpack(&alloc.alloc, drpc_req->body.len, drpc_req->body.data); + if (alloc.oom || req == NULL) { + D_ERROR("Failed to unpack req (query check)\n"); + drpc_resp->status = DRPC__STATUS__FAILED_UNMARSHAL_PAYLOAD; + return; + } + + D_INFO("Received request to query check\n"); + + rc = ds_mgmt_check_query(req->n_uuids, req->uuids, ds_chk_query_head_cb, + ds_chk_query_pool_cb, &resp); + if (rc != 0) + D_ERROR("Failed to query check: "DF_RC"\n", DP_RC(rc)); + + resp.req_status = rc; + len = mgmt__check_query_resp__get_packed_size(&resp); + D_ALLOC(body, len); + if (body == NULL) { + D_ERROR("Failed to allocate response body (query check)\n"); + drpc_resp->status = DRPC__STATUS__FAILED_MARSHAL; + } else { + mgmt__check_query_resp__pack(&resp, body); + drpc_resp->body.len = len; + drpc_resp->body.data = body; + } + + D_FREE(pools); + ds_chk_query_free(&resp); + mgmt__check_query_req__free_unpacked(req, &alloc.alloc); +} + +static void +ds_chk_prob_free(Mgmt__CheckInconsistPolicy **policies, uint32_t policy_nr) +{ + int i; + + if (policies != NULL) { + for (i = 0; i < policy_nr; i++) + D_FREE(policies[i]); + D_FREE(policies); + } +} + +#define ALL_CHK_POLICY CHK__CHECK_INCONSIST_CLASS__CIC_UNKNOWN + +static int +ds_chk_prop_cb(void *buf, struct chk_policy *policies, int cnt, uint32_t flags) +{ + Mgmt__CheckInconsistPolicy **ply = NULL; + Mgmt__CheckPropResp *resp = buf; + int rc = 0; + int i = 0; + + D_ALLOC_ARRAY(ply, cnt); + if (ply == NULL) + return -DER_NOMEM; + + for (i = 0; i < cnt; i++) { + D_ALLOC(ply[i], sizeof(*ply[i])); + if (ply[i] == NULL) + D_GOTO(out, rc = -DER_NOMEM); + + mgmt__check_inconsist_policy__init(ply[i]); + if (policies[i].cp_class == 0 && cnt == ALL_CHK_POLICY) + ply[i]->inconsist_cas = i; + else + ply[i]->inconsist_cas = policies[i].cp_class; + ply[i]->inconsist_act = policies[i].cp_action; + } + + +out: + if (rc != 0) { + ds_chk_prob_free(ply, i); + } else { + resp->policies = ply; + resp->n_policies = cnt; + resp->flags = flags; + } + + return rc; +} + +void +ds_mgmt_drpc_check_prop(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) +{ + struct drpc_alloc alloc = PROTO_ALLOCATOR_INIT(alloc); + Mgmt__CheckPropReq *req = NULL; + Mgmt__CheckPropResp resp = MGMT__CHECK_PROP_RESP__INIT; + uint8_t *body; + size_t len; + int rc = 0; + + if (!ds_mgmt_check_enabled()) { + D_ERROR("Not in check mode\n"); + drpc_resp->status = DRPC__STATUS__UNKNOWN_MODULE; + return; + } + + req = mgmt__check_prop_req__unpack(&alloc.alloc, drpc_req->body.len, drpc_req->body.data); + if (alloc.oom || req == NULL) { + D_ERROR("Failed to unpack req (get property for check)\n"); + drpc_resp->status = DRPC__STATUS__FAILED_UNMARSHAL_PAYLOAD; + return; + } + + D_INFO("Received request to get property for check\n"); + + rc = ds_mgmt_check_prop(ds_chk_prop_cb, &resp); + if (rc != 0) + D_ERROR("Failed to set get property for check: "DF_RC"\n", DP_RC(rc)); + + resp.status = rc; + len = mgmt__check_prop_resp__get_packed_size(&resp); + D_ALLOC(body, len); + if (body == NULL) { + D_ERROR("Failed to allocate response body (get property for check)\n"); + drpc_resp->status = DRPC__STATUS__FAILED_MARSHAL; + } else { + mgmt__check_prop_resp__pack(&resp, body); + drpc_resp->body.len = len; + drpc_resp->body.data = body; + } + + ds_chk_prob_free(resp.policies, resp.n_policies); + mgmt__check_prop_req__free_unpacked(req, &alloc.alloc); +} + +void +ds_mgmt_drpc_check_act(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) +{ + struct drpc_alloc alloc = PROTO_ALLOCATOR_INIT(alloc); + Mgmt__CheckActReq *req = NULL; + Mgmt__CheckActResp resp = MGMT__CHECK_ACT_RESP__INIT; + uint8_t *body; + size_t len; + int rc = 0; + + if (!ds_mgmt_check_enabled()) { + D_ERROR("Not in check mode\n"); + drpc_resp->status = DRPC__STATUS__UNKNOWN_MODULE; + return; + } + + req = mgmt__check_act_req__unpack(&alloc.alloc, drpc_req->body.len, drpc_req->body.data); + if (alloc.oom || req == NULL) { + D_ERROR("Failed to unpack req (set action for check)\n"); + drpc_resp->status = DRPC__STATUS__FAILED_UNMARSHAL_PAYLOAD; + return; + } + + D_INFO("Received request to set action for check\n"); + + rc = ds_mgmt_check_act(req->seq, req->act, req->for_all); + if (rc != 0) + D_ERROR("Failed to set action for check: "DF_RC"\n", DP_RC(rc)); + + resp.status = rc; + len = mgmt__check_act_resp__get_packed_size(&resp); + D_ALLOC(body, len); + if (body == NULL) { + D_ERROR("Failed to allocate response body (set action for check)\n"); + drpc_resp->status = DRPC__STATUS__FAILED_MARSHAL; + } else { + mgmt__check_act_resp__pack(&resp, body); + drpc_resp->body.len = len; + drpc_resp->body.data = body; + } + + mgmt__check_act_req__free_unpacked(req, &alloc.alloc); +} diff --git a/src/mgmt/srv_internal.h b/src/mgmt/srv_internal.h index f9ca3182705..9e0d2a5691f 100644 --- a/src/mgmt/srv_internal.h +++ b/src/mgmt/srv_internal.h @@ -21,9 +21,11 @@ #include #include #include +#include #include #include +#include "check.pb-c.h" #include "svc.pb-c.h" #include "smd.pb-c.h" #include "rpc.h" @@ -107,6 +109,17 @@ int ds_mgmt_cont_set_owner(uuid_t pool_uuid, d_rank_list_t *svc_ranks, uuid_t cont_uuid, const char *user, const char *group); +/** srv_chk.c */ +int ds_mgmt_check_start(uint32_t rank_nr, d_rank_t *ranks, uint32_t policy_nr, + Mgmt__CheckInconsistPolicy **policies, int pool_nr, char **pools, + uint32_t flags, int phase); +int ds_mgmt_check_stop(int pool_nr, char **pools); +int ds_mgmt_check_query(int pool_nr, char **pools, chk_query_head_cb_t head_cb, + chk_query_pool_cb_t pool_cb, void *buf); +int ds_mgmt_check_prop(chk_prop_cb_t prop_cb, void *buf); +int ds_mgmt_check_act(uint64_t seq, uint32_t act, bool for_all); +bool ds_mgmt_check_enabled(void); + /** srv_query.c */ /* Device health stats from nvme_stats */ @@ -133,6 +146,7 @@ int ds_mgmt_tgt_setup(void); void ds_mgmt_tgt_cleanup(void); void ds_mgmt_hdlr_tgt_create(crt_rpc_t *rpc_req); void ds_mgmt_hdlr_tgt_destroy(crt_rpc_t *rpc_req); +void ds_mgmt_hdlr_tgt_shard_destroy(crt_rpc_t *rpc_req); int ds_mgmt_tgt_create_aggregator(crt_rpc_t *source, crt_rpc_t *result, void *priv); int ds_mgmt_tgt_create_post_reply(crt_rpc_t *rpc, void *priv); diff --git a/src/mgmt/srv_pool.c b/src/mgmt/srv_pool.c index 5f99c9ef406..b4d1b9a28b7 100644 --- a/src/mgmt/srv_pool.c +++ b/src/mgmt/srv_pool.c @@ -17,7 +17,7 @@ #include "srv_internal.h" /** Destroy the pool on the specified ranks. */ -static int +int ds_mgmt_tgt_pool_destroy_ranks(uuid_t pool_uuid, d_rank_list_t *filter_ranks) { crt_rpc_t *td_req; @@ -649,3 +649,51 @@ ds_mgmt_pool_get_prop(uuid_t pool_uuid, d_rank_list_t *svc_ranks, out: return rc; } + +/** + * Destroy the specified pool shard on the specified storage rank + */ +int +ds_mgmt_tgt_pool_shard_destroy(uuid_t pool_uuid, int shard_idx, d_rank_t rank) +{ + crt_rpc_t *req = NULL; + struct mgmt_tgt_shard_destroy_in *tsdi; + struct mgmt_tgt_shard_destroy_out *tsdo; + crt_endpoint_t tgt_ep; + crt_opcode_t opc; + int rc; + + tgt_ep.ep_grp = NULL; + tgt_ep.ep_rank = rank; + tgt_ep.ep_tag = daos_rpc_tag(DAOS_REQ_MGMT, 0); + + opc = DAOS_RPC_OPCODE(MGMT_TGT_SHARD_DESTROY, DAOS_MGMT_MODULE, + DAOS_MGMT_VERSION); + + rc = crt_req_create(dss_get_module_info()->dmi_ctx, &tgt_ep, opc, &req); + if (rc != 0) + goto out; + + tsdi = crt_req_get(req); + D_ASSERT(tsdi != NULL); + + uuid_copy(tsdi->tsdi_pool_uuid, pool_uuid); + tsdi->tsdi_shard_idx = shard_idx; + + rc = dss_rpc_send(req); + if (rc != 0) + goto out; + + tsdo = crt_reply_get(req); + rc = tsdo->tsdo_rc; + +out: + if (req != NULL) + crt_req_decref(req); + + if (rc != 0) + D_ERROR("Failed to destroy pool "DF_UUIDF" shard %u on rank %u: "DF_RC"\n", + DP_UUID(pool_uuid), shard_idx, rank, DP_RC(rc)); + + return rc; +} diff --git a/src/mgmt/srv_system.c b/src/mgmt/srv_system.c index 62c4bca4c94..38e4f0e702c 100644 --- a/src/mgmt/srv_system.c +++ b/src/mgmt/srv_system.c @@ -260,7 +260,7 @@ map_update_bcast(crt_context_t ctx, struct mgmt_svc *svc, uint32_t map_version, } static int -mgmt_svc_map_dist_cb(struct ds_rsvc *rsvc) +mgmt_svc_map_dist_cb(struct ds_rsvc *rsvc, uint32_t *version) { struct mgmt_svc *svc = mgmt_svc_obj(rsvc); struct dss_module_info *info = dss_get_module_info(); @@ -283,10 +283,12 @@ mgmt_svc_map_dist_cb(struct ds_rsvc *rsvc) rc = map_update_bcast(info->dmi_ctx, svc, map_version, n_map_servers, map_servers); - free_server_list(map_servers, n_map_servers); + if (rc != 0) + return rc; - return rc; + *version = map_version; + return 0; } static struct ds_rsvc_class mgmt_svc_rsvc_class = { diff --git a/src/mgmt/srv_target.c b/src/mgmt/srv_target.c index 88027388965..43ed7f9dbc1 100644 --- a/src/mgmt/srv_target.c +++ b/src/mgmt/srv_target.c @@ -252,18 +252,86 @@ ds_mgmt_tgt_pool_iterate(int (*cb)(uuid_t uuid, void *arg), void *arg) return common_pool_iterate(dss_storage_path, cb, arg); } -static int -newborn_pool_iterate(int (*cb)(uuid_t uuid, void *arg), void *arg) +int +ds_mgmt_newborn_pool_iterate(int (*cb)(uuid_t uuid, void *arg), void *arg) { return common_pool_iterate(newborns_path, cb, arg); } -static int -zombie_pool_iterate(int (*cb)(uuid_t uuid, void *arg), void *arg) +int +ds_mgmt_zombie_pool_iterate(int (*cb)(uuid_t uuid, void *arg), void *arg) { return common_pool_iterate(zombies_path, cb, arg); } +static int +ds_mgmt_pool_exist_internal(uuid_t uuid, const char *dir, const char *fname, int *idx, char **out) +{ + char *path = NULL; + int rc; + + rc = path_gen(uuid, dss_storage_path, fname, idx, &path); + if (rc != 0) + goto out; + + rc = access(path, F_OK); + if (rc >= 0) + D_GOTO(out, rc = 1); + + if (errno == ENOENT) + D_GOTO(out, rc = 0); + + D_ERROR("Failed to check existence for "DF_UUID" with name %s, idx %d: "DF_RC"\n", + DP_UUID(uuid), fname != NULL ? fname : "", idx != NULL ? *idx : -1, + DP_RC(rc)); + +out: + if (rc > 0 && out != NULL) + *out = path; + else + D_FREE(path); + return rc; +} + +int +ds_mgmt_pool_exist(uuid_t uuid) +{ + int rc; + + rc = ds_mgmt_pool_exist_internal(uuid, dss_storage_path, NULL, NULL, NULL); + if (rc != 0) + goto out; + + rc = ds_mgmt_pool_exist_internal(uuid, newborns_path, NULL, NULL, NULL); + if (rc != 0) + goto out; + + rc = ds_mgmt_pool_exist_internal(uuid, zombies_path, NULL, NULL, NULL); + +out: + return rc; +} + +int +ds_mgmt_tgt_pool_exist(uuid_t uuid, char **path) +{ + int tid = dss_get_module_info()->dmi_tgt_id; + int rc; + + rc = ds_mgmt_pool_exist_internal(uuid, dss_storage_path, VOS_FILE, &tid, path); + if (rc != 0) + goto out; + + rc = ds_mgmt_pool_exist_internal(uuid, newborns_path, VOS_FILE, &tid, path); + if (rc != 0) + goto out; + + rc = ds_mgmt_pool_exist_internal(uuid, zombies_path, VOS_FILE, &tid, path); + +out: + return rc; +} + struct dead_pool { d_list_t dp_link; uuid_t dp_uuid; @@ -350,7 +418,7 @@ cleanup_leftover_pools(bool zombie_only) D_INIT_LIST_HEAD(&dead_list); - rc = zombie_pool_iterate(cleanup_leftover_cb, &dead_list); + rc = ds_mgmt_zombie_pool_iterate(cleanup_leftover_cb, &dead_list); if (rc) D_ERROR("failed to delete SPDK blobs for ZOMBIES pools: " "%d, will try again\n", rc); @@ -359,7 +427,7 @@ cleanup_leftover_pools(bool zombie_only) if (zombie_only) return; - rc = newborn_pool_iterate(cleanup_leftover_cb, &dead_list); + rc = ds_mgmt_newborn_pool_iterate(cleanup_leftover_cb, &dead_list); if (rc) D_ERROR("failed to delete SPDK blobs for NEWBORNS pools: " "%d, will try again\n", rc); @@ -1494,3 +1562,43 @@ ds_mgmt_tgt_map_update_aggregator(crt_rpc_t *source, crt_rpc_t *result, out_result->tm_rc += out_source->tm_rc; return 0; } + +/** + * RPC handler for pool shard destroy + */ +void +ds_mgmt_hdlr_tgt_shard_destroy(crt_rpc_t *req) +{ + struct mgmt_tgt_shard_destroy_in *tsdi; + struct mgmt_tgt_shard_destroy_out *tsdo; + char *path = NULL; + int rc = 0; + + tsdi = crt_req_get(req); + tsdo = crt_reply_get(req); + + /* + * The being destroyed one must be down or downout, or not in the pool map. + * It is the RPC sponsor (PS leader)'s duty to guarantee that. Need not to + * stop the pool service. + */ + + rc = ds_mgmt_tgt_file(tsdi->tsdi_pool_uuid, VOS_FILE, &tsdi->tsdi_shard_idx, &path); + if (rc == 0) { + rc = unlink(path); + if (rc < 0) { + if (errno == ENOENT) + rc = 0; + else + rc = daos_errno2der(errno); + } + + D_FREE(path); + } + + D_DEBUG(DB_MGMT, "Processed rpc %p to destroy pool "DF_UUIDF" shard %u: "DF_RC"\n", + req, DP_UUID(tsdi->tsdi_pool_uuid), tsdi->tsdi_shard_idx, DP_RC(rc)); + + tsdo->tsdo_rc = rc; + crt_reply_send(req); +} diff --git a/src/mgmt/svc.pb-c.c b/src/mgmt/svc.pb-c.c index 4297f1fd914..fdacbe3831b 100644 --- a/src/mgmt/svc.pb-c.c +++ b/src/mgmt/svc.pb-c.c @@ -840,7 +840,7 @@ const ProtobufCMessageDescriptor mgmt__group_update_resp__descriptor = (ProtobufCMessageInit) mgmt__group_update_resp__init, NULL,NULL,NULL /* reserved[123] */ }; -static const ProtobufCFieldDescriptor mgmt__join_req__field_descriptors[11] = +static const ProtobufCFieldDescriptor mgmt__join_req__field_descriptors[12] = { { "sys", @@ -974,9 +974,22 @@ static const ProtobufCFieldDescriptor mgmt__join_req__field_descriptors[11] = 0, /* flags */ 0,NULL,NULL /* reserved1,reserved2, etc */ }, + { + "check_mode", + 12, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_BOOL, + 0, /* quantifier_offset */ + offsetof(Mgmt__JoinReq, check_mode), + NULL, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, }; static const unsigned mgmt__join_req__field_indices_by_name[] = { 5, /* field[5] = addr */ + 11, /* field[11] = check_mode */ 7, /* field[7] = idx */ 8, /* field[8] = incarnation */ 4, /* field[4] = nctxs */ @@ -991,7 +1004,7 @@ static const unsigned mgmt__join_req__field_indices_by_name[] = { static const ProtobufCIntRange mgmt__join_req__number_ranges[1 + 1] = { { 1, 0 }, - { 0, 11 } + { 0, 12 } }; const ProtobufCMessageDescriptor mgmt__join_req__descriptor = { @@ -1001,23 +1014,25 @@ const ProtobufCMessageDescriptor mgmt__join_req__descriptor = "Mgmt__JoinReq", "mgmt", sizeof(Mgmt__JoinReq), - 11, + 12, mgmt__join_req__field_descriptors, mgmt__join_req__field_indices_by_name, 1, mgmt__join_req__number_ranges, (ProtobufCMessageInit) mgmt__join_req__init, NULL,NULL,NULL /* reserved[123] */ }; -static const ProtobufCEnumValue mgmt__join_resp__state__enum_values_by_number[2] = +static const ProtobufCEnumValue mgmt__join_resp__state__enum_values_by_number[3] = { { "IN", "MGMT__JOIN_RESP__STATE__IN", 0 }, { "OUT", "MGMT__JOIN_RESP__STATE__OUT", 1 }, + { "CHECK", "MGMT__JOIN_RESP__STATE__CHECK", 2 }, }; static const ProtobufCIntRange mgmt__join_resp__state__value_ranges[] = { -{0, 0},{0, 2} +{0, 0},{0, 3} }; -static const ProtobufCEnumValueIndex mgmt__join_resp__state__enum_values_by_name[2] = +static const ProtobufCEnumValueIndex mgmt__join_resp__state__enum_values_by_name[3] = { + { "CHECK", 2 }, { "IN", 0 }, { "OUT", 1 }, }; @@ -1028,9 +1043,9 @@ const ProtobufCEnumDescriptor mgmt__join_resp__state__descriptor = "State", "Mgmt__JoinResp__State", "mgmt", - 2, + 3, mgmt__join_resp__state__enum_values_by_number, - 2, + 3, mgmt__join_resp__state__enum_values_by_name, 1, mgmt__join_resp__state__value_ranges, diff --git a/src/mgmt/svc.pb-c.h b/src/mgmt/svc.pb-c.h index 4e22f7f13f7..10a8da4ca77 100644 --- a/src/mgmt/svc.pb-c.h +++ b/src/mgmt/svc.pb-c.h @@ -43,7 +43,11 @@ typedef enum _Mgmt__JoinResp__State { /* * Server excluded from the system. */ - MGMT__JOIN_RESP__STATE__OUT = 1 + MGMT__JOIN_RESP__STATE__OUT = 1, + /* + * Server should start in checker mode. + */ + MGMT__JOIN_RESP__STATE__CHECK = 2 PROTOBUF_C__FORCE_ENUM_TO_BE_INT_SIZE(MGMT__JOIN_RESP__STATE) } Mgmt__JoinResp__State; @@ -151,10 +155,14 @@ struct _Mgmt__JoinReq */ size_t n_secondary_nctxs; uint32_t *secondary_nctxs; + /* + * rank started in check mode + */ + protobuf_c_boolean check_mode; }; #define MGMT__JOIN_REQ__INIT \ { PROTOBUF_C_MESSAGE_INIT (&mgmt__join_req__descriptor) \ - , (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, 0, (char *)protobuf_c_empty_string, 0, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, 0, 0, 0,NULL, 0,NULL } + , (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, 0, (char *)protobuf_c_empty_string, 0, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, 0, 0, 0,NULL, 0,NULL, 0 } struct _Mgmt__JoinResp diff --git a/src/mgmt/tests/mocks.c b/src/mgmt/tests/mocks.c index 80f95891c8d..3b62d64d6a5 100644 --- a/src/mgmt/tests/mocks.c +++ b/src/mgmt/tests/mocks.c @@ -612,3 +612,42 @@ mock_ds_mgmt_dev_set_faulty_setup(void) ds_mgmt_dev_set_faulty_return = 0; uuid_clear(ds_mgmt_dev_set_faulty_uuid); } + +int +ds_mgmt_check_start(uint32_t rank_nr, d_rank_t *ranks, uint32_t policy_nr, + Mgmt__CheckInconsistPolicy **policies, int pool_nr, char **pools, + uint32_t flags, int phase) +{ + return 0; +} + +int +ds_mgmt_check_stop(int pool_nr, char **pools) +{ + return 0; +} + +int +ds_mgmt_check_query(int pool_nr, char **pools, chk_query_head_cb_t head_cb, + chk_query_pool_cb_t pool_cb, void *buf) +{ + return 0; +} + +int +ds_mgmt_check_prop(chk_prop_cb_t prop_cb, void *buf) +{ + return 0; +} + +int +ds_mgmt_check_act(uint64_t seq, uint32_t act, bool for_all) +{ + return 0; +} + +bool +ds_mgmt_check_enabled(void) +{ + return true; +} diff --git a/src/mgmt/tests/srv_drpc_tests.c b/src/mgmt/tests/srv_drpc_tests.c index a9081af8d2d..4d577880201 100644 --- a/src/mgmt/tests/srv_drpc_tests.c +++ b/src/mgmt/tests/srv_drpc_tests.c @@ -21,6 +21,7 @@ #include "../acl.pb-c.h" #include "../pool.pb-c.h" #include "../cont.pb-c.h" +#include "../check.pb-c.h" #include "../svc.pb-c.h" #include "../server.pb-c.h" #include "../srv_internal.h" @@ -118,7 +119,13 @@ test_mgmt_drpc_handlers_bad_call_payload(void **state) expect_failure_for_bad_call_payload(ds_mgmt_drpc_dev_replace); expect_failure_for_bad_call_payload(ds_mgmt_drpc_pool_list_cont); expect_failure_for_bad_call_payload(ds_mgmt_drpc_cont_set_owner); + expect_failure_for_bad_call_payload(ds_mgmt_drpc_pool_upgrade); expect_failure_for_bad_call_payload(ds_mgmt_drpc_group_update); + expect_failure_for_bad_call_payload(ds_mgmt_drpc_check_start); + expect_failure_for_bad_call_payload(ds_mgmt_drpc_check_stop); + expect_failure_for_bad_call_payload(ds_mgmt_drpc_check_query); + expect_failure_for_bad_call_payload(ds_mgmt_drpc_check_prop); + expect_failure_for_bad_call_payload(ds_mgmt_drpc_check_act); } static daos_prop_t * @@ -2624,7 +2631,7 @@ test_drpc_pool_upgrade_success(void **state) D_FREE(resp.body.data); } -/* +/*/ * LED manage test setup */ static int @@ -2930,6 +2937,51 @@ test_drpc_dev_set_faulty_success(void **state) D_FREE(resp.body.data); } +/* + * dRPC check start tests + */ + +static void +test_drpc_check_start_success(void **state) +{ +} + +/* + * dRPC check stop tests + */ + +static void +test_drpc_check_stop_success(void **state) +{ +} + +/* + * dRPC check query tests + */ + +static void +test_drpc_check_query_success(void **state) +{ +} + +/* + * dRPC check prop tests + */ + +static void +test_drpc_check_prop_success(void **state) +{ +} + +/* + * dRPC check act tests + */ + +static void +test_drpc_check_act_success(void **state) +{ +} + #define ACL_TEST(x) cmocka_unit_test_setup_teardown(x, \ drpc_pool_acl_setup, \ drpc_pool_acl_teardown) @@ -2988,6 +3040,16 @@ test_drpc_dev_set_faulty_success(void **state) #define SET_FAULTY_TEST(x) cmocka_unit_test_setup(x, drpc_dev_set_faulty_setup) +#define CHECK_START_TEST(x) cmocka_unit_test(x) + +#define CHECK_STOP_TEST(x) cmocka_unit_test(x) + +#define CHECK_QUERY_TEST(x) cmocka_unit_test(x) + +#define CHECK_PROP_TEST(x) cmocka_unit_test(x) + +#define CHECK_ACT_TEST(x) cmocka_unit_test(x) + int main(void) @@ -3064,6 +3126,11 @@ main(void) SET_FAULTY_TEST(test_drpc_dev_set_faulty_bad_uuid), SET_FAULTY_TEST(test_drpc_dev_set_faulty_fails), SET_FAULTY_TEST(test_drpc_dev_set_faulty_success), + CHECK_START_TEST(test_drpc_check_start_success), + CHECK_STOP_TEST(test_drpc_check_stop_success), + CHECK_QUERY_TEST(test_drpc_check_query_success), + CHECK_PROP_TEST(test_drpc_check_prop_success), + CHECK_ACT_TEST(test_drpc_check_act_success), }; return cmocka_run_group_tests_name("mgmt_srv_drpc", tests, NULL, NULL); diff --git a/src/object/srv_obj_migrate.c b/src/object/srv_obj_migrate.c index 35bf2abcfd1..77e727e7cb9 100644 --- a/src/object/srv_obj_migrate.c +++ b/src/object/srv_obj_migrate.c @@ -472,7 +472,8 @@ migrate_pool_tls_create_one(void *data) struct migrate_pool_tls_create_arg *arg = data; struct obj_tls *tls = obj_tls_get(); struct migrate_pool_tls *pool_tls; - int rc; + struct ds_pool_child *pool_child = NULL; + int rc = 0; pool_tls = migrate_pool_tls_lookup(arg->pool_uuid, arg->version, arg->generation); if (pool_tls != NULL) { @@ -483,6 +484,16 @@ migrate_pool_tls_create_one(void *data) return 0; } + pool_child = ds_pool_child_lookup(arg->pool_uuid); + if (pool_child == NULL) { + D_ASSERTF(dss_get_module_info()->dmi_xs_id == 0, + "Cannot find the pool "DF_UUIDF"\n", DP_UUID(arg->pool_uuid)); + } else if (unlikely(pool_child->spc_no_storage)) { + D_DEBUG(DB_REBUILD, DF_UUID" "DF_UUID" lost pool shard, ver %d, skip.\n", + DP_UUID(arg->pool_uuid), DP_UUID(arg->pool_hdl_uuid), arg->version); + D_GOTO(out, rc = 0); + } + D_ALLOC_PTR(pool_tls); if (pool_tls == NULL) D_GOTO(out, rc = -DER_NOMEM); @@ -557,6 +568,9 @@ migrate_pool_tls_create_one(void *data) if (rc && pool_tls) migrate_pool_tls_destroy(pool_tls); + if (pool_child != NULL) + ds_pool_child_put(pool_child); + return rc; } diff --git a/src/pool/SConscript b/src/pool/SConscript index 035142df377..d1535dd0cc2 100644 --- a/src/pool/SConscript +++ b/src/pool/SConscript @@ -27,7 +27,8 @@ def scons(): 'srv_target.c', 'srv_util.c', 'srv_iv.c', 'srv_cli.c', 'srv_pool_scrub_ult.c', 'srv_pool_map.c', - 'srv_metrics.c', 'srv_pool_chkpt.c', common], + 'srv_metrics.c', 'srv_pool_chkpt.c', + 'srv_pool_check.c', common], install_off="../..") senv.Install('$PREFIX/lib64/daos_srv', ds_pool) diff --git a/src/pool/srv.c b/src/pool/srv.c index c164ddef3a8..086fcae7d2d 100644 --- a/src/pool/srv.c +++ b/src/pool/srv.c @@ -77,9 +77,12 @@ setup(void) { bool start = true; - d_getenv_bool("DAOS_START_POOL_SVC", &start); - if (start) - return ds_pool_start_all(); + if (!engine_in_check()) { + d_getenv_bool("DAOS_START_POOL_SVC", &start); + if (start) + return ds_pool_start_all(); + } + return 0; } diff --git a/src/pool/srv_internal.h b/src/pool/srv_internal.h index f806f7442c7..dff5cb74898 100644 --- a/src/pool/srv_internal.h +++ b/src/pool/srv_internal.h @@ -146,6 +146,9 @@ struct pool_map_refresh_ult_arg { void ds_pool_rsvc_class_register(void); void ds_pool_rsvc_class_unregister(void); uint32_t ds_pool_get_vos_pool_df_version(uint32_t pool_global_version); +char *ds_pool_svc_rdb_path(const uuid_t pool_uuid); +int ds_pool_svc_load(struct rdb_tx *tx, uuid_t uuid, rdb_path_t *root, uint32_t *global_version_out, + struct pool_buf **map_buf_out, uint32_t *map_version_out); int ds_pool_start_all(void); int ds_pool_stop_all(void); int ds_pool_hdl_is_from_srv(struct ds_pool *pool, uuid_t hdl); diff --git a/src/pool/srv_pool.c b/src/pool/srv_pool.c index d6bf72d9e9e..adff5b7f7a2 100644 --- a/src/pool/srv_pool.c +++ b/src/pool/srv_pool.c @@ -182,7 +182,6 @@ struct pool_svc { bool ps_force_notify; /* MS of PS membership */ struct pool_svc_sched ps_reconf_sched; struct pool_svc_sched ps_rfcheck_sched; /* Check all containers RF for the pool */ - uint32_t ps_global_map_version; /* global pool map version on all targets */ uint32_t ps_ops_enabled; /* cached ds_pool_prop_svc_ops_enabled */ uint32_t ps_ops_max; /* cached ds_pool_prop_svc_ops_max */ uint32_t ps_ops_age; /* cached ds_pool_prop_svc_ops_age */ @@ -211,6 +210,18 @@ static int find_hdls_to_evict(struct rdb_tx *tx, struct pool_svc *svc, uuid_t **hdl_uuids, size_t *hdl_uuids_size, int *n_hdl_uuids, char *machine); +static inline struct pool_svc * +pool_ds2svc(struct ds_pool_svc *ds_svc) +{ + return (struct pool_svc *)ds_svc; +} + +static inline struct ds_pool_svc * +pool_svc2ds(struct pool_svc *svc) +{ + return (struct ds_pool_svc *)svc; +} + static struct pool_svc * pool_svc_obj(struct ds_rsvc *rsvc) { @@ -322,8 +333,8 @@ pool_svc_rdb_path_common(const uuid_t pool_uuid, const char *suffix) } /* Return a pool service RDB path. */ -static char * -pool_svc_rdb_path(const uuid_t pool_uuid) +char * +ds_pool_svc_rdb_path(const uuid_t pool_uuid) { return pool_svc_rdb_path_common(pool_uuid, ""); } @@ -983,7 +994,7 @@ ds_pool_svc_dist_create(const uuid_t pool_uuid, int ntargets, const char *group, d_iov_set(&psid, (void *)pool_uuid, sizeof(uuid_t)); rc = ds_rsvc_dist_start(DS_RSVC_CLASS_POOL, &psid, pool_uuid, ranks, RDB_NIL_TERM, - true /* create */, true /* bootstrap */, ds_rsvc_get_md_cap()); + DS_RSVC_CREATE, true /* bootstrap */, ds_rsvc_get_md_cap()); if (rc != 0) D_GOTO(out_ranks, rc); @@ -1103,7 +1114,7 @@ pool_svc_locate_cb(d_iov_t *id, char **path) if (id->iov_len != sizeof(uuid_t)) return -DER_INVAL; - s = pool_svc_rdb_path(id->iov_buf); + s = ds_pool_svc_rdb_path(id->iov_buf); if (s == NULL) return -DER_NOMEM; *path = s; @@ -1404,11 +1415,13 @@ init_events(struct pool_svc *svc) D_ASSERT(events->pse_handler == ABT_THREAD_NULL); D_ASSERT(events->pse_stop == false); - rc = crt_register_event_cb(ds_pool_crt_event_cb, svc); - if (rc != 0) { - D_ERROR(DF_UUID": failed to register event callback: "DF_RC"\n", - DP_UUID(svc->ps_uuid), DP_RC(rc)); - goto err; + if (!engine_in_check()) { + rc = crt_register_event_cb(ds_pool_crt_event_cb, svc); + if (rc != 0) { + D_ERROR(DF_UUID": failed to register event callback: "DF_RC"\n", + DP_UUID(svc->ps_uuid), DP_RC(rc)); + goto err; + } } /* @@ -1433,7 +1446,8 @@ init_events(struct pool_svc *svc) return 0; err_cb: - crt_unregister_event_cb(ds_pool_crt_event_cb, svc); + if (!engine_in_check()) + crt_unregister_event_cb(ds_pool_crt_event_cb, svc); discard_events(&events->pse_queue); err: return rc; @@ -1447,7 +1461,8 @@ fini_events(struct pool_svc *svc) D_ASSERT(events->pse_handler != ABT_THREAD_NULL); - crt_unregister_event_cb(ds_pool_crt_event_cb, svc); + if (!engine_in_check()) + crt_unregister_event_cb(ds_pool_crt_event_cb, svc); ABT_mutex_lock(events->pse_mutex); events->pse_stop = true; @@ -1530,45 +1545,43 @@ primary_group_initialized(void) } /* - * Read the DB for map_buf, map_version, and prop. Callers are responsible for - * freeing *map_buf and *prop. + * Check the layout versions and read the pool map. If the DB is empty, return + * positive error number DER_UNINIT. If the return value is 0, the caller is + * responsible for freeing *map_buf_out with D_FREE eventually. */ -static int -read_db_for_stepping_up(struct pool_svc *svc, struct pool_buf **map_buf, - uint32_t *map_version, daos_prop_t **prop) +int +ds_pool_svc_load(struct rdb_tx *tx, uuid_t uuid, rdb_path_t *root, uint32_t *global_version_out, + struct pool_buf **map_buf_out, uint32_t *map_version_out) { - struct rdb_tx tx; + uuid_t uuid_tmp; d_iov_t value; + uint32_t global_version; + struct pool_buf *map_buf; + uint32_t map_version; bool version_exists = false; - bool rdb_size_ok = false; - uint32_t svc_ops_enabled = 0; - uint32_t svc_ops_max = 0; - uint32_t svc_ops_age = 0; - uint64_t rdb_size; - struct daos_prop_entry *svc_rf_entry; int rc; - rc = rdb_tx_begin(svc->ps_rsvc.s_db, svc->ps_rsvc.s_term, &tx); - if (rc != 0) - goto out; - ABT_rwlock_rdlock(svc->ps_lock); + /* + * For the ds_notify_ras_eventf calls below, use a copy to avoid + * casting the uuid pointer. + */ + uuid_copy(uuid_tmp, uuid); /* Check the layout version. */ - d_iov_set(&value, &svc->ps_global_version, sizeof(svc->ps_global_version)); - rc = rdb_tx_lookup(&tx, &svc->ps_root, &ds_pool_prop_global_version, &value); + d_iov_set(&value, &global_version, sizeof(global_version)); + rc = rdb_tx_lookup(tx, root, &ds_pool_prop_global_version, &value); if (rc == -DER_NONEXIST) { /* * This DB may be new or incompatible. Check the existence of * the pool map to find out which is the case. (See the * references to version_exists below.) */ - D_DEBUG(DB_MD, DF_UUID": no layout version\n", - DP_UUID(svc->ps_uuid)); + D_DEBUG(DB_MD, DF_UUID": no layout version\n", DP_UUID(uuid)); goto check_map; } else if (rc != 0) { D_ERROR(DF_UUID": failed to look up layout version: "DF_RC"\n", - DP_UUID(svc->ps_uuid), DP_RC(rc)); - goto out_lock; + DP_UUID(uuid), DP_RC(rc)); + goto out; } version_exists = true; @@ -1576,23 +1589,23 @@ read_db_for_stepping_up(struct pool_svc *svc, struct pool_buf **map_buf, * downgrading the DAOS software of an upgraded pool report * a proper RAS error. */ - if (svc->ps_global_version > DAOS_POOL_GLOBAL_VERSION) { + if (global_version > DAOS_POOL_GLOBAL_VERSION) { ds_notify_ras_eventf(RAS_POOL_DF_INCOMPAT, RAS_TYPE_INFO, RAS_SEV_ERROR, NULL /* hwid */, NULL /* rank */, NULL /* inc */, NULL /* jobid */, - &svc->ps_uuid, NULL /* cont */, + &uuid_tmp, NULL /* cont */, NULL /* objid */, NULL /* ctlop */, NULL /* data */, "incompatible layout version: %u larger than " - "%u", svc->ps_global_version, + "%u", global_version, DAOS_POOL_GLOBAL_VERSION); rc = -DER_DF_INCOMPT; - goto out_lock; + goto out; } check_map: - rc = read_map_buf(&tx, &svc->ps_root, map_buf, map_version); + rc = read_map_buf(tx, root, &map_buf, &map_version); if (rc != 0) { if (rc == -DER_NONEXIST && !version_exists) { /* @@ -1600,28 +1613,67 @@ read_db_for_stepping_up(struct pool_svc *svc, struct pool_buf **map_buf, * exists, then the pool map must also exist; * otherwise, it is an error. */ - D_DEBUG(DB_MD, DF_UUID": new db\n", - DP_UUID(svc->ps_uuid)); - rc = + DER_UNINIT; + D_DEBUG(DB_MD, DF_UUID": new db\n", DP_UUID(uuid)); + rc = DER_UNINIT; /* positive error number */ } else { - D_ERROR(DF_UUID": failed to read pool map buffer: "DF_RC - "\n", DP_UUID(svc->ps_uuid), DP_RC(rc)); + D_ERROR(DF_UUID": failed to read pool map buffer: "DF_RC"\n", + DP_UUID(uuid), DP_RC(rc)); } - goto out_lock; + goto out; } if (!version_exists) /* This could also be a 1.x pool, which we assume nobody cares. */ - D_DEBUG(DB_MD, DF_UUID": assuming 2.0\n", DP_UUID(svc->ps_uuid)); + D_DEBUG(DB_MD, DF_UUID": assuming 2.0\n", DP_UUID(uuid)); - rc = pool_prop_read(&tx, svc, DAOS_PO_QUERY_PROP_ALL, prop); - if (rc != 0) { - D_ERROR(DF_UUID": cannot get properties: "DF_RC"\n", DP_UUID(svc->ps_uuid), - DP_RC(rc)); + D_ASSERTF(rc == 0, DF_RC"\n", DP_RC(rc)); + *global_version_out = global_version; + *map_buf_out = map_buf; + *map_version_out = map_version; +out: + return rc; +} + +/* + * Read the DB for map_buf, map_version, and prop. If the return value is 0, + * the caller is responsible for freeing *map_buf_out and *prop_out eventually. + */ +static int +read_db_for_stepping_up(struct pool_svc *svc, struct pool_buf **map_buf_out, + uint32_t *map_version_out, daos_prop_t **prop_out) +{ + struct rdb_tx tx; + d_iov_t value; + struct pool_buf *map_buf; + struct daos_prop_entry *svc_rf_entry; + daos_prop_t *prop = NULL; + uint32_t svc_ops_enabled = 0; + uint32_t svc_ops_max = 0; + uint32_t svc_ops_age = 0; + uint32_t map_version; + uint64_t rdb_size; + bool rdb_size_ok = false; + int rc; + + rc = rdb_tx_begin(svc->ps_rsvc.s_db, svc->ps_rsvc.s_term, &tx); + if (rc != 0) + goto out; + ABT_rwlock_rdlock(svc->ps_lock); + + rc = ds_pool_svc_load(&tx, svc->ps_uuid, &svc->ps_root, &svc->ps_global_version, &map_buf, + &map_version); + if (rc != 0) goto out_lock; + + rc = pool_prop_read(&tx, svc, DAOS_PO_QUERY_PROP_ALL, &prop); + if (rc != 0) { + D_ERROR(DF_UUID": failed to read pool properties: "DF_RC"\n", + DP_UUID(svc->ps_uuid), DP_RC(rc)); + daos_prop_free(prop); + goto out_map_buf; } - svc_rf_entry = daos_prop_entry_get(*prop, DAOS_PROP_PO_SVC_REDUN_FAC); + svc_rf_entry = daos_prop_entry_get(prop, DAOS_PROP_PO_SVC_REDUN_FAC); D_ASSERT(svc_rf_entry != NULL); if (daos_prop_is_set(svc_rf_entry)) svc->ps_svc_rf = svc_rf_entry->dpe_val; @@ -1671,6 +1723,14 @@ read_db_for_stepping_up(struct pool_svc *svc, struct pool_buf **map_buf, DP_UUID(svc->ps_uuid), svc_ops_enabled ? "enabled" : "disabled", rdb_size, rdb_size_ok ? ">=" : "<", DUP_OP_MIN_RDB_SIZE, svc_ops_max, svc_ops_age); + D_ASSERTF(rc == 0, DF_RC"\n", DP_RC(rc)); + *map_buf_out = map_buf; + *map_version_out = map_version; + *prop_out = prop; + +out_map_buf: + if (rc != 0) + D_FREE(map_buf); out_lock: ABT_rwlock_unlock(svc->ps_lock); rdb_tx_end(&tx); @@ -1769,9 +1829,9 @@ pool_svc_check_node_status(struct pool_svc *svc) } while (0) static int pool_svc_schedule(struct pool_svc *svc, struct pool_svc_sched *sched, - void (*func)(void *), void *arg); + void (*func)(void *), void *arg, bool for_chk); static int pool_svc_schedule_reconf(struct pool_svc *svc, struct pool_map *map, - uint32_t map_version_for, bool sync_remove); + uint32_t map_version_for, bool sync_remove, bool for_chk); static void pool_svc_rfcheck_ult(void *arg); static int @@ -1779,7 +1839,7 @@ pool_svc_step_up_cb(struct ds_rsvc *rsvc) { struct pool_svc *svc = pool_svc_obj(rsvc); struct pool_buf *map_buf = NULL; - uint32_t map_version; + uint32_t map_version = 0; uuid_t pool_hdl_uuid; uuid_t cont_hdl_uuid; daos_prop_t *prop = NULL; @@ -1832,7 +1892,8 @@ pool_svc_step_up_cb(struct ds_rsvc *rsvc) * reconfigurations or the last MS notification. */ svc->ps_force_notify = true; - rc = pool_svc_schedule_reconf(svc, NULL /* map */, map_version, false /* sync_remove */); + rc = pool_svc_schedule_reconf(svc, NULL /* map */, map_version, false /* sync_remove */, + false /* for_chk */); if (rc == -DER_OP_CANCELED) { DL_INFO(rc, DF_UUID": not scheduling pool service reconfiguration", DP_UUID(svc->ps_uuid)); @@ -1842,7 +1903,8 @@ pool_svc_step_up_cb(struct ds_rsvc *rsvc) goto out; } - rc = pool_svc_schedule(svc, &svc->ps_rfcheck_sched, pool_svc_rfcheck_ult, NULL /* arg */); + rc = pool_svc_schedule(svc, &svc->ps_rfcheck_sched, pool_svc_rfcheck_ult, NULL /* arg */, + false /* for_chk */); if (rc != 0) { DL_ERROR(rc, DF_UUID": failed to schedule RF check", DP_UUID(svc->ps_uuid)); goto out; @@ -1930,7 +1992,7 @@ pool_svc_drain_cb(struct ds_rsvc *rsvc) } static int -pool_svc_map_dist_cb(struct ds_rsvc *rsvc) +pool_svc_map_dist_cb(struct ds_rsvc *rsvc, uint32_t *version) { struct pool_svc *svc = pool_svc_obj(rsvc); struct rdb_tx tx; @@ -1957,7 +2019,8 @@ pool_svc_map_dist_cb(struct ds_rsvc *rsvc) map_version); D_GOTO(out, rc); } - svc->ps_global_map_version = max(svc->ps_global_map_version, map_version); + + *version = map_version; out: if (map_buf != NULL) D_FREE(map_buf); @@ -2031,6 +2094,28 @@ pool_svc_put_leader(struct pool_svc *svc) ds_rsvc_put_leader(&svc->ps_rsvc); } +int +ds_pool_svc_lookup_leader(uuid_t uuid, struct ds_pool_svc **ds_svcp, struct rsvc_hint *hint) +{ + struct pool_svc *svc = NULL; + int rc; + + rc = pool_svc_lookup_leader(uuid, &svc, hint); + if (rc == 0) + *ds_svcp = pool_svc2ds(svc); + + return rc; +} + +void +ds_pool_svc_put_leader(struct ds_pool_svc *ds_svc) +{ + struct pool_svc *svc = pool_ds2svc(ds_svc); + + if (svc != NULL) + ds_rsvc_put_leader(&svc->ps_rsvc); +} + /** Look up container service \a pool_uuid. */ int ds_pool_cont_svc_lookup_leader(uuid_t pool_uuid, struct cont_svc **svcp, @@ -2135,7 +2220,7 @@ start_one(uuid_t uuid, void *varg) * Check if an RDB file exists, to avoid unnecessary error messages * from the ds_rsvc_start() call. */ - path = pool_svc_rdb_path(uuid); + path = ds_pool_svc_rdb_path(uuid); if (path == NULL) { D_ERROR(DF_UUID": failed to allocate rdb path\n", DP_UUID(uuid)); @@ -2151,8 +2236,8 @@ start_one(uuid_t uuid, void *varg) } d_iov_set(&id, uuid, sizeof(uuid_t)); - ds_rsvc_start(DS_RSVC_CLASS_POOL, &id, uuid, RDB_NIL_TERM, false /* create */, 0 /* size */, - NULL /* replicas */, NULL /* arg */); + ds_rsvc_start(DS_RSVC_CLASS_POOL, &id, uuid, RDB_NIL_TERM, DS_RSVC_START /* mode */, + 0 /* size */, NULL /* replicas */, NULL /* arg */); return 0; } @@ -2168,6 +2253,12 @@ pool_start_all(void *arg) DP_RC(rc)); } +int +ds_pool_start_with_svc(uuid_t uuid) +{ + return start_one(uuid, NULL); +} + /* Note that this function is currently called from the main xstream. */ int ds_pool_start_all(void) @@ -2238,7 +2329,7 @@ bcast_create(crt_context_t ctx, struct pool_svc *svc, crt_opcode_t opcode, crt_bulk_t bulk_hdl, crt_rpc_t **rpc) { return ds_pool_bcast_create(ctx, svc->ps_pool, DAOS_POOL_MODULE, opcode, - DAOS_POOL_VERSION, rpc, bulk_hdl, NULL); + DAOS_POOL_VERSION, rpc, bulk_hdl, NULL, NULL); } /** @@ -5541,6 +5632,29 @@ pool_upgrade_props(struct rdb_tx *tx, struct pool_svc *svc, return rc; } +static int +ds_pool_mark_connectable_internal(struct rdb_tx *tx, struct pool_svc *svc) +{ + d_iov_t value; + uint32_t connectable = 0; + int rc; + + d_iov_set(&value, &connectable, sizeof(connectable)); + rc = rdb_tx_lookup(tx, &svc->ps_root, &ds_pool_prop_connectable, &value); + if ((rc == 0 && connectable == 0) || rc == -DER_NONEXIST) { + connectable = 1; + rc = rdb_tx_update(tx, &svc->ps_root, &ds_pool_prop_connectable, &value); + if (rc == 0) + rc = 1; + } + + if (rc < 0) + D_ERROR("Failed to mark connectable of pool "DF_UUIDF": "DF_RC"\n", + DP_UUID(svc->ps_uuid), DP_RC(rc)); + + return rc; +} + static int __ds_pool_mark_upgrade_completed(uuid_t pool_uuid, struct pool_svc *svc, int rc) { @@ -5549,7 +5663,6 @@ __ds_pool_mark_upgrade_completed(uuid_t pool_uuid, struct pool_svc *svc, int rc) uint32_t upgrade_status; uint32_t global_version; uint32_t obj_version; - uint32_t connectable; int rc1; daos_prop_t *prop = NULL; @@ -5601,11 +5714,8 @@ __ds_pool_mark_upgrade_completed(uuid_t pool_uuid, struct pool_svc *svc, int rc) D_GOTO(out_tx, rc1); } - connectable = 1; - d_iov_set(&value, &connectable, sizeof(connectable)); - rc1 = rdb_tx_update(&tx, &svc->ps_root, &ds_pool_prop_connectable, - &value); - if (rc1) { + rc1 = ds_pool_mark_connectable_internal(&tx, svc); + if (rc1 < 0) { D_ERROR(DF_UUID": failed to set connectable of pool " "%d.\n", DP_UUID(pool_uuid), rc1); D_GOTO(out_tx, rc1); @@ -6553,13 +6663,18 @@ pool_svc_reconf_ult(void *varg) static int pool_svc_schedule(struct pool_svc *svc, struct pool_svc_sched *sched, void (*func)(void *), - void *arg) + void *arg, bool for_chk) { enum ds_rsvc_state state; int rc; D_DEBUG(DB_MD, DF_UUID": begin\n", DP_UUID(svc->ps_uuid)); + if (!for_chk && engine_in_check()) { + D_DEBUG(DB_MD, DF_UUID": end: skip in check mode\n", DP_UUID(svc->ps_uuid)); + return 0; + } + /* * Avoid scheduling when the PS is stepping down * and has already called sched_cancel_and_wait. @@ -6594,6 +6709,28 @@ pool_svc_schedule(struct pool_svc *svc, struct pool_svc_sched *sched, void (*fun return 0; } +/** + * Schedule PS reconfigurations (if necessary). This is currently for the chk + * module only. + */ +int +ds_pool_svc_schedule_reconf(struct ds_pool_svc *svc) +{ + struct pool_svc *s = pool_ds2svc(svc); + int rc; + + /* + * Pass 1 as map_version_for, since there shall be no other + * reconfiguration in progress. + */ + rc = pool_svc_schedule_reconf(s, NULL /* map */, 1 /* map_version_for */, + false /* sync_remove */, true /* for_chk */); + if (rc != 0) + DL_ERROR(rc, DF_UUID": failed to schedule pool service reconfiguration", + DP_UUID(s->ps_uuid)); + return rc; +} + static int pool_find_all_targets_by_addr(struct pool_map *map, struct pool_target_addr_list *list, struct pool_target_id_list *tgt_list, @@ -6652,7 +6789,7 @@ pool_svc_rfcheck_ult(void *arg) */ static int pool_svc_schedule_reconf(struct pool_svc *svc, struct pool_map *map, uint32_t map_version_for, - bool sync_remove) + bool sync_remove, bool for_chk) { struct pool_svc_reconf_arg *reconf_arg; uint32_t v; @@ -6690,7 +6827,7 @@ pool_svc_schedule_reconf(struct pool_svc *svc, struct pool_map *map, uint32_t ma * If successful, this call passes the ownership of reconf_arg to * pool_svc_reconf_ult. */ - rc = pool_svc_schedule(svc, &svc->ps_reconf_sched, pool_svc_reconf_ult, reconf_arg); + rc = pool_svc_schedule(svc, &svc->ps_reconf_sched, pool_svc_reconf_ult, reconf_arg, false); if (rc != 0) { D_FREE(reconf_arg); return rc; @@ -6874,7 +7011,8 @@ pool_svc_update_map_internal(struct pool_svc *svc, unsigned int opc, * Remove all undesired PS replicas (if any) before committing map, so * that the set of PS replicas remains a subset of the pool groups. */ - rc = pool_svc_schedule_reconf(svc, map, 0 /* map_version_for */, true /* sync_remove */); + rc = pool_svc_schedule_reconf(svc, map, 0 /* map_version_for */, true /* sync_remove */, + false /* for_chk */); if (rc != 0) { DL_ERROR(rc, DF_UUID": failed to remove undesired pool service replicas", DP_UUID(svc->ps_uuid)); @@ -6906,14 +7044,15 @@ pool_svc_update_map_internal(struct pool_svc *svc, unsigned int opc, ds_rsvc_request_map_dist(&svc->ps_rsvc); - rc = pool_svc_schedule_reconf(svc, NULL /* map */, map_version, false /* sync_remove */); + rc = pool_svc_schedule_reconf(svc, NULL /* map */, map_version, false /* sync_remove */, + false /* for_chk */); if (rc != 0) DL_INFO(rc, DF_UUID": failed to schedule pool service reconfiguration", DP_UUID(svc->ps_uuid)); if (opc == MAP_EXCLUDE) { rc = pool_svc_schedule(svc, &svc->ps_rfcheck_sched, pool_svc_rfcheck_ult, - NULL /* arg */); + NULL /* arg */, false /* for_chk */); if (rc != 0) DL_INFO(rc, DF_UUID": failed to schedule RF check", DP_UUID(svc->ps_uuid)); } @@ -7947,7 +8086,7 @@ ds_pool_iv_ns_update(struct ds_pool *pool, unsigned int master_rank, } int -ds_pool_svc_global_map_version_get(uuid_t uuid, uint32_t *version) +ds_pool_svc_query_map_dist(uuid_t uuid, uint32_t *version, bool *idle) { struct pool_svc *svc; int rc; @@ -7956,7 +8095,7 @@ ds_pool_svc_global_map_version_get(uuid_t uuid, uint32_t *version) if (rc != 0) return rc; - *version = svc->ps_global_map_version; + ds_rsvc_query_map_dist(&svc->ps_rsvc, version, idle); pool_svc_put_leader(svc); return 0; @@ -8552,6 +8691,230 @@ ds_pool_lookup_hdl_cred(struct rdb_tx *tx, uuid_t pool_uuid, uuid_t pool_hdl_uui return rc; } +int +ds_pool_mark_connectable(struct ds_pool_svc *ds_svc) +{ + struct pool_svc *svc = pool_ds2svc(ds_svc); + struct rdb_tx tx; + int rc; + + rc = rdb_tx_begin(svc->ps_rsvc.s_db, svc->ps_rsvc.s_term, &tx); + if (rc == 0) { + ABT_rwlock_wrlock(svc->ps_lock); + rc = ds_pool_mark_connectable_internal(&tx, svc); + if (rc > 0) + rc = rdb_tx_commit(&tx); + ABT_rwlock_unlock(svc->ps_lock); + rdb_tx_end(&tx); + } + + return rc; +} + +int +ds_pool_svc_load_map(struct ds_pool_svc *ds_svc, struct pool_map **map) +{ + struct pool_svc *svc = pool_ds2svc(ds_svc); + struct rdb_tx tx = { 0 }; + int rc; + + rc = rdb_tx_begin(svc->ps_rsvc.s_db, svc->ps_rsvc.s_term, &tx); + if (rc == 0) { + ABT_rwlock_rdlock(svc->ps_lock); + rc = read_map(&tx, &svc->ps_root, map); + ABT_rwlock_unlock(svc->ps_lock); + rdb_tx_end(&tx); + } + + if (rc != 0) + D_ERROR("Failed to load pool map for pool "DF_UUIDF": "DF_RC"\n", + DP_UUID(svc->ps_uuid), DP_RC(rc)); + + return rc; +} + +int +ds_pool_svc_flush_map(struct ds_pool_svc *ds_svc, struct pool_map *map) +{ + struct pool_svc *svc = pool_ds2svc(ds_svc); + struct pool_buf *buf = NULL; + struct rdb_tx tx = { 0 }; + uint32_t version; + int rc = 0; + bool locked = false; + + version = pool_map_get_version(map); + rc = rdb_tx_begin(svc->ps_rsvc.s_db, svc->ps_rsvc.s_term, &tx); + if (rc != 0) { + D_ERROR("Failed to begin TX for flush pool "DF_UUIDF" map with version %u: " + DF_RC"\n", DP_UUID(svc->ps_uuid), version, DP_RC(rc)); + goto out; + } + + ABT_rwlock_wrlock(svc->ps_lock); + locked = true; + + rc = pool_buf_extract(map, &buf); + if (rc != 0) { + D_ERROR("Failed to extract buf for flush pool "DF_UUIDF" map with version %u: " + DF_RC"\n", DP_UUID(svc->ps_uuid), version, DP_RC(rc)); + goto out_lock; + } + + rc = write_map_buf(&tx, &svc->ps_root, buf, version); + if (rc != 0) { + D_ERROR("Failed to write buf for flush pool "DF_UUIDF" map with version %u: " + DF_RC"\n", DP_UUID(svc->ps_uuid), version, DP_RC(rc)); + goto out_buf; + } + + rc = rdb_tx_commit(&tx); + if (rc != 0) { + D_ERROR("Failed to commit TX for flush pool "DF_UUIDF" map with version %u: " + DF_RC"\n", DP_UUID(svc->ps_uuid), version, DP_RC(rc)); + goto out_buf; + } + + /* Update svc->ps_pool to match the new pool map. */ + rc = ds_pool_tgt_map_update(svc->ps_pool, buf, version); + if (rc != 0) { + D_ERROR("Failed to refresh local pool "DF_UUIDF" map with version %u: " + DF_RC"\n", DP_UUID(svc->ps_uuid), version, DP_RC(rc)); + /* + * Have to resign to avoid handling future requests with stale pool map cache. + * Continue to distribute the new pool map to other pool shards since the RDB + * has already been updated. + */ + rdb_resign(svc->ps_rsvc.s_db, svc->ps_rsvc.s_term); + } else { + ds_rsvc_request_map_dist(&svc->ps_rsvc); + ABT_rwlock_unlock(svc->ps_lock); + locked = false; + ds_rsvc_wait_map_dist(&svc->ps_rsvc); + } + +out_buf: + pool_buf_free(buf); +out_lock: + if (locked) + ABT_rwlock_unlock(svc->ps_lock); + rdb_tx_end(&tx); +out: + return rc; +} + +int +ds_pool_svc_update_label(struct ds_pool_svc *ds_svc, const char *label) +{ + struct pool_svc *svc = pool_ds2svc(ds_svc); + daos_prop_t *prop = NULL; + struct rdb_tx tx = { 0 }; + int rc = 0; + + prop = daos_prop_alloc(1); + if (prop == NULL) + D_GOTO(out, rc = -DER_NOMEM); + + prop->dpp_entries[0].dpe_type = DAOS_PROP_PO_LABEL; + if (label != NULL) { + D_STRNDUP(prop->dpp_entries[0].dpe_str, label, strlen(label)); + if (prop->dpp_entries[0].dpe_str == NULL) + D_GOTO(out, rc = -DER_NOMEM); + } else { + prop->dpp_entries[0].dpe_flags = DAOS_PROP_ENTRY_NOT_SET; + prop->dpp_entries[0].dpe_str = NULL; + } + + rc = rdb_tx_begin(svc->ps_rsvc.s_db, svc->ps_rsvc.s_term, &tx); + if (rc != 0) { + D_ERROR("Failed to begin TX for updating pool "DF_UUIDF" label %s: "DF_RC"\n", + DP_UUID(svc->ps_uuid), label != NULL ? label : "(null)", DP_RC(rc)); + D_GOTO(out, rc); + } + + ABT_rwlock_wrlock(svc->ps_lock); + + rc = pool_prop_write(&tx, &svc->ps_root, prop); + if (rc != 0) { + D_ERROR("Failed to updating pool "DF_UUIDF" label %s: "DF_RC"\n", + DP_UUID(svc->ps_uuid), label != NULL ? label : "(null)", DP_RC(rc)); + D_GOTO(out_lock, rc); + } + + rc = rdb_tx_commit(&tx); + if (rc != 0) + D_ERROR("Failed to commit TX for updating pool "DF_UUIDF" label %s: "DF_RC"\n", + DP_UUID(svc->ps_uuid), label != NULL ? label : "(null)", DP_RC(rc)); + +out_lock: + ABT_rwlock_unlock(svc->ps_lock); + rdb_tx_end(&tx); +out: + daos_prop_free(prop); + return rc; +} + +int +ds_pool_svc_evict_all(struct ds_pool_svc *ds_svc) +{ + struct pool_svc *svc = pool_ds2svc(ds_svc); + struct pool_metrics *metrics; + uuid_t *hdl_uuids = NULL; + struct rdb_tx tx = { 0 }; + size_t hdl_uuids_size = 0; + int n_hdl_uuids = 0; + int rc = 0; + + rc = rdb_tx_begin(svc->ps_rsvc.s_db, svc->ps_rsvc.s_term, &tx); + if (rc != 0) { + D_ERROR("Failed to begin TX for evict pool "DF_UUIDF" connections: "DF_RC"\n", + DP_UUID(svc->ps_uuid), DP_RC(rc)); + D_GOTO(out, rc); + } + + ABT_rwlock_wrlock(svc->ps_lock); + + rc = find_hdls_to_evict(&tx, svc, &hdl_uuids, &hdl_uuids_size, &n_hdl_uuids, NULL); + if (rc != 0) { + D_ERROR("Failed to find hdls for evict pool "DF_UUIDF" connections: "DF_RC"\n", + DP_UUID(svc->ps_uuid), DP_RC(rc)); + D_GOTO(out_lock, rc); + } + + if (n_hdl_uuids > 0) { + rc = pool_disconnect_hdls(&tx, svc, hdl_uuids, n_hdl_uuids, + dss_get_module_info()->dmi_ctx); + if (rc != 0) + D_GOTO(out_lock, rc); + + metrics = svc->ps_pool->sp_metrics[DAOS_POOL_MODULE]; + d_tm_inc_counter(metrics->evict_total, n_hdl_uuids); + rc = rdb_tx_commit(&tx); + if (rc != 0) + D_ERROR("Failed to commit TX for evict pool "DF_UUIDF" connections: " + DF_RC"\n", DP_UUID(svc->ps_uuid), DP_RC(rc)); + } + +out_lock: + D_FREE(hdl_uuids); + ABT_rwlock_unlock(svc->ps_lock); + rdb_tx_end(&tx); +out: + return rc; +} + +struct ds_pool * +ds_pool_svc2pool(struct ds_pool_svc *ds_svc) +{ + return pool_ds2svc(ds_svc)->ps_pool; +} + +struct cont_svc * +ds_pool_ps2cs(struct ds_pool_svc *ds_svc) +{ + return pool_ds2svc(ds_svc)->ps_cont_svc; +} + /* Upgrade the VOS pool of a pool service replica (if any). */ int ds_pool_svc_upgrade_vos_pool(struct ds_pool *pool) diff --git a/src/pool/srv_pool_check.c b/src/pool/srv_pool_check.c new file mode 100644 index 00000000000..e88f4667b9e --- /dev/null +++ b/src/pool/srv_pool_check.c @@ -0,0 +1,880 @@ +/* + * (C) Copyright 2022-2023 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ +/** + * \file + * + * ds_pool: Pool Service Check + */ + +#define D_LOGFAC DD_FAC(pool) + +#include + +#include +#include +#include +#include +#include "srv_internal.h" +#include "srv_layout.h" + +static int +pool_glance(uuid_t uuid, char *path, struct ds_pool_clue *clue_out) +{ + struct rdb_storage *storage; + struct ds_pool_svc_clue clue; + struct rdb_tx tx; + rdb_path_t root; + uint32_t global_version; + struct pool_buf *map_buf; + d_iov_t value; + int rc; + + D_ASSERT(dss_get_module_info()->dmi_xs_id == 0); + + rc = rdb_open(path, uuid, RDB_NIL_TERM, NULL /* cbs */, NULL /* arg */, &storage); + if (rc != 0) { + D_ERROR(DF_UUID": failed to open %s: "DF_RC"\n", DP_UUID(uuid), path, DP_RC(rc)); + goto out; + } + + rc = rdb_glance(storage, &clue.psc_db_clue); + if (rc != 0) { + D_ERROR(DF_UUID": failed to glance at %s: "DF_RC"\n", DP_UUID(uuid), path, + DP_RC(rc)); + goto out_storage; + } + + rc = rdb_tx_begin_local(storage, &tx); + if (rc != 0) + goto out_db_clue; + + rc = rdb_path_init(&root); + if (rc != 0) + goto out_tx; + rc = rdb_path_push(&root, &rdb_path_root_key); + if (rc != 0) + goto out_root; + + d_iov_set(&value, NULL, 0); + rc = rdb_tx_lookup(&tx, &root, &ds_pool_prop_label, &value); + if (rc == 0) { + if (value.iov_len > DAOS_PROP_LABEL_MAX_LEN) { + /* Hit local data corruption. */ + D_ERROR("Bad label length for pool "DF_UUID "%zu (> %d).\n", + DP_UUID(uuid), value.iov_len, DAOS_PROP_LABEL_MAX_LEN); + D_GOTO(out_root, rc = -DER_IO); + } + + if (strncmp(DAOS_PROP_NO_PO_LABEL, value.iov_buf, DAOS_PROP_LABEL_MAX_LEN) == 0) { + clue_out->pc_label_len = 0; + } else { + D_STRNDUP(clue_out->pc_label, value.iov_buf, value.iov_len); + if (clue_out->pc_label == NULL) + D_GOTO(out_root, rc = -DER_NOMEM); + + clue_out->pc_label_len = value.iov_len; + } + } else if (rc == -DER_NONEXIST) { + clue_out->pc_label_len = 0; + } else { + goto out_root; + } + + rc = ds_pool_svc_load(&tx, uuid, &root, &global_version, &map_buf, &clue.psc_map_version); + if (rc == 0) { + D_FREE(map_buf); + } else if (rc == DER_UNINIT) { + clue.psc_map_version = 0; + rc = 0; + } else if (rc != 0) { + goto out_label; + } + + memcpy(clue_out->pc_svc_clue, &clue, sizeof(clue)); +out_label: + if (rc != 0) { + D_FREE(clue_out->pc_label); + clue_out->pc_label_len = 0; + } +out_root: + rdb_path_fini(&root); +out_tx: + rdb_tx_end(&tx); +out_db_clue: + if (rc != 0) + d_rank_list_free(clue.psc_db_clue.bcl_replicas); +out_storage: + rdb_close(storage); +out: + return rc; +} + +/** + * Glance at the pool with \a uuid in \a dir, and report \a clue about its + * persistent state. Note that if an error has occurred, it is reported in \a + * clue->pc_rc, with \a clue->pc_uuid and \a clue->pc_dir fields also being + * valid. + * + * \param[in] uuid pool UUID + * \param[in] dir storage directory + * \param[out] clue pool clue + */ +void +ds_pool_clue_init(uuid_t uuid, enum ds_pool_dir dir, struct ds_pool_clue *clue) +{ + char *path = NULL; + char *file = NULL; + struct stat st; + int rc; + int i; + + memset(clue, 0, sizeof(*clue)); + uuid_copy(clue->pc_uuid, uuid); + clue->pc_rank = dss_self_rank(); + clue->pc_dir = dir; + + /* + * Only glance at pool services in the normal directory for simplicity. + */ + if (dir != DS_POOL_DIR_NORMAL) { + rc = 0; + goto out; + } + + /* "pc_tgt_nr < 0" means failed to load some vos target. */ + clue->pc_tgt_nr = -1; + + if (dss_self_rank() == daos_fail_value_get() && DAOS_FAIL_CHECK(DAOS_CHK_FAIL_REPORT_POOL1)) + D_GOTO(out, rc = -DER_NOMEM); + + D_ALLOC_ARRAY(clue->pc_tgt_status, dss_tgt_nr); + if (clue->pc_tgt_status == NULL) { + D_ERROR(DF_UUIDF": failed to allocate service clue for shards status\n", + DP_UUID(uuid)); + D_GOTO(out, rc = -DER_NOMEM); + } + + for (i = 0; i < dss_tgt_nr; i++) { + rc = ds_mgmt_tgt_file(uuid, VOS_FILE, &i, &file); + if (file == NULL) { + D_ERROR(DF_UUIDF": failed to allocate file name for shards status %d\n", + DP_UUID(uuid), i); + D_GOTO(out, rc = -DER_NOMEM); + } + + rc = stat(file, &st); + D_FREE(file); + if (rc != 0) { + if (errno == ENOENT) { + clue->pc_tgt_status[i] = DS_POOL_TGT_NONEXIST; + } else { + rc = daos_errno2der(errno); + D_ERROR(DF_UUIDF": failed to stat target %d: %d\n", + DP_UUID(uuid), i, rc); + D_GOTO(out, rc); + } + } else { + if (st.st_size > 0) + clue->pc_tgt_status[i] = DS_POOL_TGT_NORMAL; + else + clue->pc_tgt_status[i] = DS_POOL_TGT_EMPTY; + } + } + + /* Set pc_tgt_nr as the right value if all vos targets are loaded successfully. */ + clue->pc_tgt_nr = dss_tgt_nr; + + if (dss_self_rank() == daos_fail_value_get() && DAOS_FAIL_CHECK(DAOS_CHK_FAIL_REPORT_POOL2)) + D_GOTO(out, rc = -DER_NOMEM); + + path = ds_pool_svc_rdb_path(uuid); + if (path == NULL) { + D_ERROR(DF_UUID": failed to allocate RDB path\n", DP_UUID(uuid)); + rc = -DER_NOMEM; + goto out; + } + + rc = stat(path, &st); + if (rc != 0) { + rc = errno; + if (rc == ENOENT) { + /* Not a pool service replica. */ + rc = 0; + } else { + D_ERROR(DF_UUID": failed to stat %s: %d\n", DP_UUID(uuid), path, rc); + rc = daos_errno2der(rc); + } + goto out_path; + } + + D_ALLOC(clue->pc_svc_clue, sizeof(*clue->pc_svc_clue)); + if (clue->pc_svc_clue == NULL) { + D_ERROR(DF_UUID": failed to allocate service clue\n", DP_UUID(uuid)); + rc = -DER_NOMEM; + goto out_path; + } + + rc = pool_glance(uuid, path, clue); + if (rc != 0) { + D_ERROR(DF_UUID": failed to glance pool service: "DF_RC"\n", DP_UUID(uuid), + DP_RC(rc)); + D_FREE(clue->pc_svc_clue); + } + +out_path: + D_FREE(path); +out: + if (clue->pc_svc_clue != NULL) { + rc = 1; + } else { + D_ASSERT(rc <= 0); + if (rc < 0 && clue->pc_tgt_nr < 0) + D_FREE(clue->pc_tgt_status); + } + + clue->pc_rc = rc; +} + +/** + * Finalize \a clue that was initialized by ds_pool_clue_init. + * + * \param[in,out] clue pool clue + */ +void +ds_pool_clue_fini(struct ds_pool_clue *clue) +{ + if (clue->pc_svc_clue != NULL) { + d_rank_list_free(clue->pc_svc_clue->psc_db_clue.bcl_replicas); + D_FREE(clue->pc_svc_clue); + } + D_FREE(clue->pc_label); + D_FREE(clue->pc_tgt_status); +} + +/* Argument for glance_at_one */ +struct glance_arg { + ds_pool_clues_init_filter_t ga_filter; + void *ga_filter_arg; + enum ds_pool_dir ga_dir; + struct ds_pool_clues ga_clues; +}; + +static int +glance_at_one(uuid_t uuid, void *varg) +{ + struct glance_arg *arg = varg; + struct ds_pool_clues *clues = &arg->ga_clues; + int phase = 0; + int rc; + + if (arg->ga_filter != NULL) { + rc = arg->ga_filter(uuid, arg->ga_filter_arg, &phase); + if (rc != 0) + goto out; + } + + if (clues->pcs_cap < clues->pcs_len + 1) { + int new_cap = clues->pcs_cap; + struct ds_pool_clue *new_array; + + /* Double the capacity. */ + if (new_cap == 0) + new_cap = 1; + else + new_cap *= 2; + D_REALLOC_ARRAY(new_array, clues->pcs_array, clues->pcs_cap, new_cap); + if (new_array == NULL) { + D_ERROR(DF_UUID": failed to reallocate clues array\n", DP_UUID(uuid)); + goto out; + } + clues->pcs_array = new_array; + clues->pcs_cap = new_cap; + } + + ds_pool_clue_init(uuid, arg->ga_dir, &clues->pcs_array[clues->pcs_len]); + clues->pcs_array[clues->pcs_len].pc_phase = phase; + clues->pcs_len++; + +out: + /* Always return 0 to continue scanning other pools. */ + return 0; +} + +/** + * Finalize \a clues that was initialized by ds_pool_clues_init. + * + * \param[in,out] clues pool clues + */ +void +ds_pool_clues_fini(struct ds_pool_clues *clues) +{ + int i; + + if (clues != NULL && clues->pcs_array != NULL) { + for (i = 0; i < clues->pcs_len; i++) + ds_pool_clue_fini(&clues->pcs_array[i]); + + D_FREE(clues->pcs_array); + } +} + +/** + * Scan local pools and glance at (i.e., call ds_pool_clue_init on) those for + * which \a filter returns 0. If \a filter is NULL, all local pools will be + * glanced at. Must be called on the system xstream when all local pools are + * stopped. If successfully initialized, \a clues must be finalized with + * ds_pool_clues_fini eventually. + * + * \param[in] filter optional filter callback + * \param[in] filter_arg optional argument for \a filter + * \param[out] clues_out pool clues + */ +int +ds_pool_clues_init(ds_pool_clues_init_filter_t filter, void *filter_arg, + struct ds_pool_clues *clues_out) +{ + struct glance_arg arg = {0}; + int rc; + + arg.ga_filter = filter; + arg.ga_filter_arg = filter_arg; + + arg.ga_dir = DS_POOL_DIR_NORMAL; + rc = ds_mgmt_tgt_pool_iterate(glance_at_one, &arg); + if (rc != 0) { + D_ERROR("failed to glance at local pools: "DF_RC"\n", DP_RC(rc)); + goto err_clues; + } + + arg.ga_dir = DS_POOL_DIR_NEWBORN; + rc = ds_mgmt_newborn_pool_iterate(glance_at_one, &arg); + if (rc != 0) { + D_ERROR("failed to glance at local new born pools: "DF_RC"\n", DP_RC(rc)); + goto err_clues; + } + + arg.ga_dir = DS_POOL_DIR_ZOMBIE; + rc = ds_mgmt_zombie_pool_iterate(glance_at_one, &arg); + if (rc != 0) { + D_ERROR("failed to glance at local new born pools: "DF_RC"\n", DP_RC(rc)); + goto err_clues; + } + + *clues_out = arg.ga_clues; + return 0; + +err_clues: + ds_pool_clues_fini(&arg.ga_clues); + return rc; +} + +/* For testing purposes... */ +void +ds_pool_clues_print(struct ds_pool_clues *clues) +{ + struct ds_pool_svc_clue svc_clue = {0}; + int i; + + for (i = 0; i < clues->pcs_len; i++) { + struct ds_pool_clue *c = &clues->pcs_array[i]; + struct ds_pool_svc_clue *sc = c->pc_svc_clue == NULL ? &svc_clue : + c->pc_svc_clue; + struct rdb_clue *dc = &sc->psc_db_clue; + + D_PRINT("pool clue %d:\n" + " uuid "DF_UUID"\n" + " rank %u\n" + " dir %d\n" + " rc %d\n" + " map_version %u\n" + " term "DF_U64"\n" + " vote %d\n" + " self %u\n" + " last_index "DF_U64"\n" + " last_term "DF_U64"\n" + " base_index "DF_U64"\n" + " base_term "DF_U64"\n" + " n_replicas %u\n" + " oid_next "DF_U64"\n", + i, DP_UUID(c->pc_uuid), c->pc_rank, c->pc_dir, c->pc_rc, + sc->psc_map_version, dc->bcl_term, dc->bcl_vote, dc->bcl_self, + dc->bcl_last_index, dc->bcl_last_term, dc->bcl_base_index, + dc->bcl_base_term, dc->bcl_replicas == NULL ? 0 : dc->bcl_replicas->rl_nr, + dc->bcl_oid_next); + } +} + +int +ds_pool_clues_find_rank(struct ds_pool_clues *clues, d_rank_t rank) +{ + int i; + + for (i = 0; i < clues->pcs_len; i++) + if (clues->pcs_array[i].pc_rank == rank) + return i; + return -DER_NONEXIST; +} + +/* + * Return + * + * > 0 is newer than + * < 0 is older than + * = 0 is equal to + */ +static int +compare_logs(uint64_t x_last_term, uint64_t x_last_index, + uint64_t y_last_term, uint64_t y_last_index) +{ + if (x_last_term > y_last_term) + return 1; + if (x_last_term < y_last_term) + return -1; + + if (x_last_index > y_last_index) + return 1; + if (x_last_index < y_last_index) + return -1; + + return 0; +} + +/** + * Analyze \a clues, which must be nonempty and comprise clues about replicas + * of one PS, and report if this PS requires catastrophic recovery or not. + * + * \param[in] clues pool clues for one PS + * \param[out] advice_out when the return value is >0, the index of the + * advised replica in \a clues to rebootstrap the + * PS from. For return zero case, it is the index + * of the replica that can be PS leader candidate. + * + * \return 0 this PS does not require catastrophic recovery + * >0 the caller is advised to rebootstrap this PS from the + * replica at index \a *advice_out in \a clues + */ +int +ds_pool_check_svc_clues(struct ds_pool_clues *clues, int *advice_out) +{ + uuid_t uuid; + uint32_t map_version = 0; + uint64_t log_term = 0; + uint64_t log_index = 0; + int advice = -1; + int i; + + /* Assert that all clues are about replicas of the same PS. */ + D_ASSERT(clues->pcs_len > 0); + uuid_copy(uuid, clues->pcs_array[0].pc_uuid); + for (i = 0; i < clues->pcs_len; i++) { + struct ds_pool_clue *clue = &clues->pcs_array[i]; + + D_ASSERT(uuid_compare(uuid, clue->pc_uuid) == 0); + D_ASSERTF(clue->pc_rc > 0, DF_RC"\n", DP_RC(clue->pc_rc)); + D_ASSERT(clue->pc_svc_clue != NULL); + } + + /* For each replica, see if it can get votes from a majority. */ + for (i = 0; i < clues->pcs_len; i++) { + struct ds_pool_clue *clue = &clues->pcs_array[i]; + struct ds_pool_svc_clue *svc_clue = clue->pc_svc_clue; + struct rdb_clue *db_clue = &svc_clue->psc_db_clue; + int n_votes = 0; + int j; + + /* This replica must be a voting node itself. */ + if (!d_rank_list_find(db_clue->bcl_replicas, db_clue->bcl_self, NULL /* idx */)) + continue; + + /* + * Check each replica in the local membership and count the + * number of votes this replica can get. + */ + for (j = 0; j < db_clue->bcl_replicas->rl_nr; j++) { + struct rdb_clue *c; + int k; + + /* + * Find the member, which may be missing. If it is + * ourself, the log comparison will pass. + */ + k = ds_pool_clues_find_rank(clues, db_clue->bcl_replicas->rl_ranks[j]); + if (k < 0) + continue; + c = &clues->pcs_array[k].pc_svc_clue->psc_db_clue; + + /* + * Since terms will grow as replicas communicate with + * each other, we only compare the logs. + */ + if (compare_logs(db_clue->bcl_last_term, db_clue->bcl_last_index, + c->bcl_last_term, c->bcl_last_index) < 0) + continue; + + n_votes++; + } + + D_DEBUG(DB_MD, DF_UUID": rank %u: %d/%u votes\n", DP_UUID(uuid), + db_clue->bcl_self, n_votes, db_clue->bcl_replicas->rl_nr); + + if (n_votes > db_clue->bcl_replicas->rl_nr / 2) { + /* Replica @i can be as PS leader candidate. */ + *advice_out = i; + return 0; + } + } + + /* + * No replica can become a leader. Find out which replica among those + * who have the newest pool map version has the newest log. + */ + for (i = 0; i < clues->pcs_len; i++) { + struct ds_pool_clue *clue = &clues->pcs_array[i]; + struct ds_pool_svc_clue *svc_clue = clue->pc_svc_clue; + struct rdb_clue *db_clue = &svc_clue->psc_db_clue; + + /* Track who has the newest pool map version. */ + if (svc_clue->psc_map_version > map_version) { + map_version = svc_clue->psc_map_version; + log_term = db_clue->bcl_last_term; + log_index = db_clue->bcl_last_index; + advice = i; + } else if (svc_clue->psc_map_version == map_version) { + /* + * Track who among those with this map version has the + * newest log. + */ + if (compare_logs(db_clue->bcl_last_term, db_clue->bcl_last_index, + log_term, log_index) > 0) { + log_term = db_clue->bcl_last_term; + log_index = db_clue->bcl_last_index; + advice = i; + } + } + } + D_ASSERTF(advice >= 0 && advice < clues->pcs_len, "%d\n", advice); + *advice_out = advice; + return 1; +} + +#if 0 /* TODO: Adapt these tests to some new test framework. */ +/* Test compare_logs. */ +void +ds_pool_test_compare_logs(void) +{ + D_ASSERT(compare_logs(2, 2, 1, 2) > 0); /* term > */ + D_ASSERT(compare_logs(1, 2, 2, 1) < 0); /* term < */ + D_ASSERT(compare_logs(1, 4, 1, 3) > 0); /* term ==, index > */ + D_ASSERT(compare_logs(1, 2, 1, 3) < 0); /* term ==, index < */ + D_ASSERT(compare_logs(1, 2, 1, 2) == 0); /* term ==, index == */ +} + +static d_rank_t test_ranks[] = {0, 1, 2, 3, 4}; +static struct ds_pool_svc_clue test_svc_clues[ARRAY_SIZE(test_ranks)]; +static struct ds_pool_clue test_clues[ARRAY_SIZE(test_ranks)]; + +/* Test ds_pool_check_svc_clues. */ +void +ds_pool_test_check_svc_clues(void) +{ + uuid_t uuid; + int i; + int rc; + + /* + * Initialize test data that can't be initialized at compile time. + * Every test_svc_clues[i] corresponds to test_clues[i]. Each subtest + * must set the following fields. + * + * test_svc_clue[i].psc_db_clue.bcl_replicas + * test_svc_clue[i].psc_db_clue.bcl_last_index + * test_svc_clue[i].psc_db_clue.bcl_last_term + * test_svc_clue[i].psc_map_version + */ + uuid_generate(uuid); + for (i = 0; i < ARRAY_SIZE(test_clues); i++) { + uuid_copy(test_clues[i].pc_uuid, uuid); + test_clues[i].pc_rank = i; + test_clues[i].pc_dir = DS_POOL_DIR_NORMAL; + test_clues[i].pc_svc_clue = &test_svc_clues[i]; + } + for (i = 0; i < ARRAY_SIZE(test_svc_clues); i++) + test_svc_clues[i].psc_db_clue.bcl_self = i; + + /* Test a single replica that does not require CR. */ + { + struct ds_pool_clues clues = { + .pcs_array = test_clues, + .pcs_len = 1, + .pcs_cap = 1 + }; + d_rank_list_t replicas = { + .rl_ranks = test_ranks, + .rl_nr = 1 + }; + int advice = -1; + + test_svc_clues[0].psc_db_clue.bcl_replicas = &replicas; + test_svc_clues[0].psc_db_clue.bcl_last_index = 9; + test_svc_clues[0].psc_db_clue.bcl_last_term = 1; + test_svc_clues[0].psc_map_version = 1; + + rc = ds_pool_check_svc_clues(&clues, &advice); + D_ASSERTF(rc == 0, DF_RC"\n", DP_RC(rc)); + D_ASSERTF(advice == -1, "%d\n", advice); + } + + /* + * Test a single replica that has not itself but a missing replica in + * the membership and requires CR. + */ + { + struct ds_pool_clues clues = { + .pcs_array = test_clues, + .pcs_len = 1, + .pcs_cap = 1 + }; + d_rank_list_t replicas = { + .rl_ranks = &test_ranks[1], + .rl_nr = 1 + }; + int advice = -1; + + test_svc_clues[0].psc_db_clue.bcl_replicas = &replicas; + test_svc_clues[0].psc_db_clue.bcl_last_index = 9; + test_svc_clues[0].psc_db_clue.bcl_last_term = 1; + test_svc_clues[0].psc_map_version = 1; + + rc = ds_pool_check_svc_clues(&clues, &advice); + D_ASSERTF(rc > 0, DF_RC"\n", DP_RC(rc)); + D_ASSERTF(advice == 0, "%d\n", advice); + } + + /* Test a complete set of replicas that do not require CR. */ + { + const int n = 3; + struct ds_pool_clues clues = { + .pcs_array = test_clues, + .pcs_len = n, + .pcs_cap = n + }; + d_rank_list_t replicas = { + .rl_ranks = test_ranks, + .rl_nr = n + }; + int advice = -1; + + D_ASSERT(n <= ARRAY_SIZE(test_ranks)); + + test_svc_clues[0].psc_db_clue.bcl_replicas = &replicas; + test_svc_clues[0].psc_db_clue.bcl_last_index = 9; + test_svc_clues[0].psc_db_clue.bcl_last_term = 1; + test_svc_clues[0].psc_map_version = 1; + + test_svc_clues[1].psc_db_clue.bcl_replicas = &replicas; + test_svc_clues[1].psc_db_clue.bcl_last_index = 9; + test_svc_clues[1].psc_db_clue.bcl_last_term = 1; + test_svc_clues[1].psc_map_version = 1; + + test_svc_clues[2].psc_db_clue.bcl_replicas = &replicas; + test_svc_clues[2].psc_db_clue.bcl_last_index = 9; + test_svc_clues[2].psc_db_clue.bcl_last_term = 1; + test_svc_clues[2].psc_map_version = 1; + + rc = ds_pool_check_svc_clues(&clues, &advice); + D_ASSERTF(rc == 0, DF_RC"\n", DP_RC(rc)); + D_ASSERTF(advice == -1, "%d\n", advice); + } + + /* Test an incomplete but sufficient set of replicas that do not require CR. */ + { + const int m = 2; + const int n = 3; + struct ds_pool_clues clues = { + .pcs_array = test_clues, + .pcs_len = m, + .pcs_cap = m + }; + d_rank_list_t replicas = { + .rl_ranks = test_ranks, + .rl_nr = n + }; + int advice = -1; + + D_ASSERT(m <= ARRAY_SIZE(test_ranks)); + D_ASSERT(n <= ARRAY_SIZE(test_ranks)); + D_ASSERT(m < n); + + test_svc_clues[0].psc_db_clue.bcl_replicas = &replicas; + test_svc_clues[0].psc_db_clue.bcl_last_index = 9; + test_svc_clues[0].psc_db_clue.bcl_last_term = 1; + test_svc_clues[0].psc_map_version = 1; + + test_svc_clues[1].psc_db_clue.bcl_replicas = &replicas; + test_svc_clues[1].psc_db_clue.bcl_last_index = 9; + test_svc_clues[1].psc_db_clue.bcl_last_term = 1; + test_svc_clues[1].psc_map_version = 1; + + rc = ds_pool_check_svc_clues(&clues, &advice); + D_ASSERTF(rc == 0, DF_RC"\n", DP_RC(rc)); + D_ASSERTF(advice == -1, "%d\n", advice); + } + + /* + * Test a complete (for at least one replica) but insufficient set of + * replicas that require CR. + */ + { + const int n = 3; + struct ds_pool_clues clues = { + .pcs_array = test_clues, + .pcs_len = n, + .pcs_cap = n + }; + d_rank_list_t replicas_0; + d_rank_list_t replicas_1; + d_rank_list_t replicas_2; + int advice = -1; + + D_ASSERT(n <= ARRAY_SIZE(test_ranks)); + + /* Unable to get votes from {1, 2} in {0, 1, 2}. */ + replicas_0.rl_ranks = test_ranks; + replicas_0.rl_nr = n; + test_svc_clues[0].psc_db_clue.bcl_replicas = &replicas_0; + test_svc_clues[0].psc_db_clue.bcl_last_index = 9; + test_svc_clues[0].psc_db_clue.bcl_last_term = 1; + test_svc_clues[0].psc_map_version = 1; + + /* Unable to get votes from {2} in {1, 2}. */ + replicas_1.rl_ranks = &test_ranks[1]; + replicas_1.rl_nr = n - 1; + test_svc_clues[1].psc_db_clue.bcl_replicas = &replicas_1; + test_svc_clues[1].psc_db_clue.bcl_last_index = 10; + test_svc_clues[1].psc_db_clue.bcl_last_term = 1; + test_svc_clues[1].psc_map_version = 1; + + /* Unable to get votes from absent {3} in {2, 3}. */ + D_ASSERT(ARRAY_SIZE(test_ranks) >= 4); + replicas_2.rl_ranks = &test_ranks[2]; + replicas_2.rl_nr = 2; + test_svc_clues[2].psc_db_clue.bcl_replicas = &replicas_2; + test_svc_clues[2].psc_db_clue.bcl_last_index = 11; + test_svc_clues[2].psc_db_clue.bcl_last_term = 1; + test_svc_clues[2].psc_map_version = 1; + + rc = ds_pool_check_svc_clues(&clues, &advice); + D_ASSERTF(rc > 0, DF_RC"\n", DP_RC(rc)); + D_ASSERTF(advice == 2, "%d\n", advice); + } + + /* Test an insufficient set of replicas that require CR: case 1. */ + { + const int m = 2; + const int n = 5; + struct ds_pool_clues clues = { + .pcs_array = test_clues, + .pcs_len = m, + .pcs_cap = m + }; + d_rank_list_t replicas = { + .rl_ranks = test_ranks, + .rl_nr = n + }; + int advice = -1; + + D_ASSERT(m <= ARRAY_SIZE(test_ranks)); + D_ASSERT(n <= ARRAY_SIZE(test_ranks)); + D_ASSERT(m < n); + + test_svc_clues[0].psc_db_clue.bcl_replicas = &replicas; + test_svc_clues[0].psc_db_clue.bcl_last_index = 9; + test_svc_clues[0].psc_db_clue.bcl_last_term = 1; + test_svc_clues[0].psc_map_version = 1; + + /* A newer map version and a newer log. */ + test_svc_clues[1].psc_db_clue.bcl_replicas = &replicas; + test_svc_clues[1].psc_db_clue.bcl_last_index = 11; + test_svc_clues[1].psc_db_clue.bcl_last_term = 1; + test_svc_clues[1].psc_map_version = 2; + + rc = ds_pool_check_svc_clues(&clues, &advice); + D_ASSERTF(rc > 0, DF_RC"\n", DP_RC(rc)); + D_ASSERTF(advice == 1, "%d\n", advice); + } + + /* Test an insufficient set of replicas that require CR: case 2. */ + { + const int m = 2; + const int n = 5; + struct ds_pool_clues clues = { + .pcs_array = test_clues, + .pcs_len = m, + .pcs_cap = m + }; + d_rank_list_t replicas = { + .rl_ranks = test_ranks, + .rl_nr = n + }; + int advice = -1; + + D_ASSERT(m <= ARRAY_SIZE(test_ranks)); + D_ASSERT(n <= ARRAY_SIZE(test_ranks)); + D_ASSERT(m < n); + + /* A newer map version. */ + test_svc_clues[0].psc_db_clue.bcl_replicas = &replicas; + test_svc_clues[0].psc_db_clue.bcl_last_index = 11; + test_svc_clues[0].psc_db_clue.bcl_last_term = 1; + test_svc_clues[0].psc_map_version = 2; + + /* A newer log. */ + test_svc_clues[1].psc_db_clue.bcl_replicas = &replicas; + test_svc_clues[1].psc_db_clue.bcl_last_index = 10; + test_svc_clues[1].psc_db_clue.bcl_last_term = 2; + test_svc_clues[1].psc_map_version = 1; + + rc = ds_pool_check_svc_clues(&clues, &advice); + D_ASSERTF(rc > 0, DF_RC"\n", DP_RC(rc)); + D_ASSERTF(advice == 0, "%d\n", advice); + } + + /* Test an insufficient set of replicas that require CR: case 3. */ + { + const int m = 2; + const int n = 5; + struct ds_pool_clues clues = { + .pcs_array = test_clues, + .pcs_len = m, + .pcs_cap = m + }; + d_rank_list_t replicas = { + .rl_ranks = test_ranks, + .rl_nr = n + }; + int advice = -1; + + D_ASSERT(m <= ARRAY_SIZE(test_ranks)); + D_ASSERT(n <= ARRAY_SIZE(test_ranks)); + D_ASSERT(m < n); + + test_svc_clues[0].psc_db_clue.bcl_replicas = &replicas; + test_svc_clues[0].psc_db_clue.bcl_last_index = 11; + test_svc_clues[0].psc_db_clue.bcl_last_term = 1; + test_svc_clues[0].psc_map_version = 2; + + /* The same map version but a newer log. */ + test_svc_clues[1].psc_db_clue.bcl_replicas = &replicas; + test_svc_clues[1].psc_db_clue.bcl_last_index = 10; + test_svc_clues[1].psc_db_clue.bcl_last_term = 2; + test_svc_clues[1].psc_map_version = 2; + + rc = ds_pool_check_svc_clues(&clues, &advice); + D_ASSERTF(rc > 0, DF_RC"\n", DP_RC(rc)); + D_ASSERTF(advice == 1, "%d\n", advice); + } +} +#endif diff --git a/src/pool/srv_target.c b/src/pool/srv_target.c index b716f8bfc11..2a6c4647598 100644 --- a/src/pool/srv_target.c +++ b/src/pool/srv_target.c @@ -185,6 +185,9 @@ start_gc_ult(struct ds_pool_child *child) D_ASSERT(child != NULL); D_ASSERT(child->spc_gc_req == NULL); + D_DEBUG(DB_MGMT, DF_UUID"[%d]: starting GC ULT\n", + DP_UUID(child->spc_uuid), dmi->dmi_tgt_id); + sched_req_attr_init(&attr, SCHED_REQ_GC, &child->spc_uuid); attr.sra_flags = SCHED_REQ_FL_NO_DELAY; @@ -470,47 +473,62 @@ pool_child_start(struct ds_pool_child *child, bool recreate) D_FREE(path); - if (rc) { - DL_CDEBUG(rc == -DER_NVME_IO, DB_MGMT, DLOG_ERR, rc, - DF_UUID": Open VOS pool failed.", DP_UUID(child->spc_uuid)); - goto out; + if (rc != 0) { + if (rc != -DER_NONEXIST) { + DL_CDEBUG(rc == -DER_NVME_IO, DB_MGMT, DLOG_ERR, rc, + DF_UUID": Open VOS pool failed.", DP_UUID(child->spc_uuid)); + goto out; + } + + D_WARN("Lost pool "DF_UUIDF" shard %u on rank %u.\n", + DP_UUID(child->spc_uuid), info->dmi_tgt_id, dss_self_rank()); + /* + * Ignore the failure to allow subsequent logic (such as DAOS check) + * to handle the trouble. + */ + child->spc_no_storage = 1; + goto done; } - rc = start_gc_ult(child); - if (rc != 0) - goto out_close; + if (!engine_in_check()) { + rc = start_gc_ult(child); + if (rc != 0) + goto out_close; - rc = start_flush_ult(child); - if (rc != 0) - goto out_gc; + rc = start_flush_ult(child); + if (rc != 0) + goto out_gc; - rc = ds_start_chkpt_ult(child); - if (rc != 0) - goto out_flush; + rc = ds_start_scrubbing_ult(child); + if (rc != 0) + goto out_flush; + } - rc = ds_start_scrubbing_ult(child); + rc = ds_start_chkpt_ult(child); if (rc != 0) - goto out_chkpt; + goto out_scrub; /* Start all containers */ rc = ds_cont_child_start_all(child); if (rc) goto out_cont; +done: *child->spc_state = POOL_CHILD_STARTED; return 0; out_cont: ds_cont_child_stop_all(child); - ds_stop_scrubbing_ult(child); -out_chkpt: ds_stop_chkpt_ult(child); +out_scrub: + ds_stop_scrubbing_ult(child); out_flush: stop_flush_ult(child); out_gc: stop_gc_ult(child); out_close: - vos_pool_close(child->spc_hdl); + if (likely(!child->spc_no_storage)) + vos_pool_close(child->spc_hdl); out: *child->spc_state = POOL_CHILD_NEW; return rc; @@ -568,11 +586,16 @@ pool_child_stop(struct ds_pool_child *child) D_DEBUG(DB_MGMT, DF_UUID": Stopping pool child.\n", DP_UUID(child->spc_uuid)); *child->spc_state = POOL_CHILD_STOPPING; + + if (unlikely(child->spc_no_storage)) + goto wait; + /* First stop all the ULTs who might need to hold ds_pool_child (or ds_cont_child) */ ds_cont_child_stop_all(child); D_ASSERT(d_list_empty(&child->spc_cont_list)); ds_stop_scrubbing_ult(child); +wait: /* Wait for all references dropped */ if (child->spc_ref > 0) { D_DEBUG(DB_MGMT, DF_UUID": Wait on pool child refs (%d) dropping.\n", @@ -583,6 +606,9 @@ pool_child_stop(struct ds_pool_child *child) } D_DEBUG(DB_MGMT, DF_UUID": Pool child refs dropped.\n", DP_UUID(child->spc_uuid)); + if (unlikely(child->spc_no_storage)) + goto done; + /* Stop all pool child owned ULTs which doesn't hold ds_pool_child reference */ ds_stop_chkpt_ult(child); D_DEBUG(DB_MGMT, DF_UUID": Checkpoint ULT stopped.\n", DP_UUID(child->spc_uuid)); @@ -594,6 +620,7 @@ pool_child_stop(struct ds_pool_child *child) vos_pool_close(child->spc_hdl); child->spc_hdl = DAOS_HDL_INVAL; +done: D_DEBUG(DB_MGMT, DF_UUID": Pool child stopped.\n", DP_UUID(child->spc_uuid)); *child->spc_state = POOL_CHILD_NEW; return 0; @@ -1093,6 +1120,105 @@ pool_fetch_hdls_ult_abort(struct ds_pool *pool) D_INFO(DF_UUID": fetch hdls ULT aborted\n", DP_UUID(pool->sp_uuid)); } +static int +ds_pool_chk_post_one(void *varg) +{ + struct pool_child_lookup_arg *arg = varg; + struct ds_pool_child *child = NULL; + int rc = 0; + + /* The pool shard must has been opened. */ + child = ds_pool_child_lookup(arg->pla_uuid); + if (child == NULL) + D_GOTO(out, rc = -DER_NONEXIST); + + D_ASSERT(*child->spc_state == POOL_CHILD_STARTED); + + if (unlikely(child->spc_no_storage)) + D_GOTO(out, rc = 0); + + rc = start_gc_ult(child); + if (rc != 0) + goto out; + + rc = start_flush_ult(child); + if (rc != 0) + goto out; + + rc = ds_start_scrubbing_ult(child); + if (rc != 0) + goto out; + + rc = ds_cont_chk_post(child); + +out: + if (child != NULL) { + if (rc != 0) { + ds_stop_scrubbing_ult(child); + stop_flush_ult(child); + stop_gc_ult(child); + } + + ds_pool_child_put(child); + } + + return rc; +} + +int +ds_pool_chk_post(uuid_t uuid) +{ + struct ds_pool *pool = NULL; + struct daos_llink *llink = NULL; + struct pool_child_lookup_arg collective_arg = { 0 }; + int rc = 0; + + D_ASSERT(engine_in_check()); + D_ASSERT(dss_get_module_info()->dmi_xs_id == 0); + + D_DEBUG(DB_MGMT, "Post handle pool starting for "DF_UUIDF" after DAOS check: "DF_RC"\n", + DP_UUID(uuid), DP_RC(rc)); + + /* The pool must has been opened. */ + rc = daos_lru_ref_hold(pool_cache, (void *)uuid, sizeof(uuid_t), + NULL /* create_args */, &llink); + if (rc != 0) + goto out; + + pool = pool_obj(llink); + if (pool->sp_stopping) + D_GOTO(out, rc = -DER_SHUTDOWN); + + pool->sp_fetch_hdls = 1; + pool_fetch_hdls_ult(pool); + + rc = ds_pool_start_ec_eph_query_ult(pool); + if (rc != 0) { + D_ERROR(DF_UUID": failed to start ec eph query ult: "DF_RC"\n", + DP_UUID(uuid), DP_RC(rc)); + goto out; + } + + collective_arg.pla_uuid = uuid; + rc = dss_thread_collective(ds_pool_chk_post_one, &collective_arg, 0); + +out: + if (pool != NULL) { + if (rc != 0) { + ds_pool_tgt_ec_eph_query_abort(pool); + pool_fetch_hdls_ult_abort(pool); + } + + daos_lru_ref_release(pool_cache, &pool->sp_entry); + } + + D_CDEBUG(rc != 0, DLOG_ERR, DLOG_INFO, + "Post handle pool started for "DF_UUIDF" after DAOS check: "DF_RC"\n", + DP_UUID(uuid), DP_RC(rc)); + + return rc; +} + /* * Start a pool. Must be called on the system xstream. Hold the ds_pool object * till ds_pool_stop. Only for mgmt and pool modules. @@ -1142,20 +1268,23 @@ ds_pool_start(uuid_t uuid) pool = pool_obj(llink); - rc = dss_ult_create(pool_fetch_hdls_ult, pool, DSS_XS_SYS, - 0, DSS_DEEP_STACK_SZ, NULL); - if (rc != 0) { - D_ERROR(DF_UUID": failed to create fetch ult: %d\n", - DP_UUID(uuid), rc); - D_GOTO(failure_pool, rc); - } + if (!engine_in_check()) { + rc = dss_ult_create(pool_fetch_hdls_ult, pool, DSS_XS_SYS, 0, + DSS_DEEP_STACK_SZ, NULL); + if (rc != 0) { + D_ERROR(DF_UUID": failed to create fetch ult: "DF_RC"\n", + DP_UUID(uuid), DP_RC(rc)); + D_GOTO(failure_pool, rc); + } - pool->sp_fetch_hdls = 1; - rc = ds_pool_start_ec_eph_query_ult(pool); - if (rc != 0) { - D_ERROR(DF_UUID": failed to start ec eph query ult: %d\n", - DP_UUID(uuid), rc); - D_GOTO(failure_ult, rc); + pool->sp_fetch_hdls = 1; + + rc = ds_pool_start_ec_eph_query_ult(pool); + if (rc != 0) { + D_ERROR(DF_UUID": failed to start ec eph query ult: "DF_RC"\n", + DP_UUID(uuid), DP_RC(rc)); + D_GOTO(failure_ult, rc); + } } ds_iv_ns_start(pool->sp_iv_ns); @@ -1897,6 +2026,9 @@ update_vos_prop_on_targets(void *in) if (child == NULL) return -DER_NONEXIST; /* no child created yet? */ + if (unlikely(child->spc_no_storage)) + D_GOTO(out, ret = 0); + ret = vos_pool_ctl(child->spc_hdl, VOS_PO_CTL_SET_DATA_THRESH, &pool->sp_data_thresh); if (ret) goto out; diff --git a/src/pool/srv_util.c b/src/pool/srv_util.c index 52e3496f732..211fd5f54e9 100644 --- a/src/pool/srv_util.c +++ b/src/pool/srv_util.c @@ -97,7 +97,7 @@ int ds_pool_bcast_create(crt_context_t ctx, struct ds_pool *pool, enum daos_module_id module, crt_opcode_t opcode, uint32_t version, crt_rpc_t **rpc, crt_bulk_t bulk_hdl, - d_rank_list_t *excluded_list) + d_rank_list_t *excluded_list, void *priv) { d_rank_list_t excluded; crt_opcode_t opc; @@ -124,7 +124,7 @@ ds_pool_bcast_create(crt_context_t ctx, struct ds_pool *pool, opc = DAOS_RPC_OPCODE(opcode, module, version); rc = crt_corpc_req_create(ctx, pool->sp_group, excluded.rl_nr == 0 ? NULL : &excluded, - opc, bulk_hdl/* co_bulk_hdl */, NULL /* priv */, + opc, bulk_hdl/* co_bulk_hdl */, priv, 0 /* flags */, crt_tree_topo(CRT_TREE_KNOMIAL, 32), rpc); diff --git a/src/proto/Makefile b/src/proto/Makefile index 69dc42f4790..0c71dd0c923 100644 --- a/src/proto/Makefile +++ b/src/proto/Makefile @@ -15,6 +15,8 @@ C_HEADER_FILES = include/daos/drpc.pb-c.h\ bio/smd.pb-c.h\ mgmt/server.pb-c.h\ mgmt/svc.pb-c.h\ + mgmt/check.pb-c.h\ + chk/chk.pb-c.h\ security/auth.pb-c.h\ tests/drpc/drpc_test.pb-c.h C_SOURCE_FILES = common/drpc.pb-c.c\ @@ -27,12 +29,15 @@ C_SOURCE_FILES = common/drpc.pb-c.c\ bio/smd.pb-c.c\ mgmt/server.pb-c.c\ mgmt/svc.pb-c.c\ + mgmt/check.pb-c.c\ + chk/chk.pb-c.c\ security/auth.pb-c.c\ tests/drpc/drpc_test.pb-c.c GO_CONTROL_FILES = common/proto/shared/ranks.pb.go\ common/proto/shared/event.pb.go\ common/proto/mgmt/acl.pb.go\ common/proto/mgmt/cont.pb.go\ + common/proto/mgmt/check.pb.go\ common/proto/mgmt/mgmt.pb.go\ common/proto/mgmt/pool.pb.go\ common/proto/mgmt/svc.pb.go\ @@ -48,6 +53,8 @@ GO_CONTROL_FILES = common/proto/shared/ranks.pb.go\ common/proto/ctl/support.pb.go\ common/proto/ctl/firmware.pb.go\ common/proto/ctl/ranks.pb.go\ + common/proto/chk/chk.pb.go\ + common/proto/chk/faults.pb.go\ common/proto/srv/srv.pb.go\ drpc/drpc.pb.go\ security/auth/auth.pb.go\ @@ -151,10 +158,10 @@ C_TARGETS = $(addprefix $(DAOS_ROOT)/src/,$(C_HEADER_FILES)) \ proto-c: $(PROTOC_GEN_C) $(C_TARGETS) $(DAOS_ROOT)/src/%.pb-c.h: $(PROTO_SOURCE_DIR)/%.proto - protoc -I $(dir $<) --c_out=$(dir $@) $(notdir $<) + protoc -I $(dir $<) -I $(PROTO_SOURCE_DIR) --c_out=$(dir $@) $(notdir $<) $(DAOS_ROOT)/src/%.pb-c.c: $(PROTO_SOURCE_DIR)/%.proto - protoc -I $(dir $<) --c_out=$(dir $@) $(notdir $<) + protoc -I $(dir $<) -I $(PROTO_SOURCE_DIR) --c_out=$(dir $@) $(notdir $<) $(DAOS_ROOT)/src/bio/smd.pb-c.h: $(PROTO_SOURCE_DIR)/ctl/smd.proto protoc -I $(dir $<) --c_out=$(dir $@) $(notdir $<) @@ -169,10 +176,10 @@ $(DAOS_ROOT)/src/engine/event.pb-c.c: $(PROTO_SOURCE_DIR)/shared/event.proto protoc -I $(dir $<) --c_out=$(dir $@) $(notdir $<) $(DAOS_ROOT)/src/engine/%.pb-c.h: $(PROTO_SOURCE_DIR)/srv/%.proto - protoc -I $(dir $<) --c_out=$(dir $@) $(notdir $<) + protoc -I $(dir $<) -I $(PROTO_SOURCE_DIR) --c_out=$(dir $@) $(notdir $<) $(DAOS_ROOT)/src/engine/%.pb-c.c: $(PROTO_SOURCE_DIR)/srv/%.proto - protoc -I $(dir $<) --c_out=$(dir $@) $(notdir $<) + protoc -I $(dir $<) -I $(PROTO_SOURCE_DIR) --c_out=$(dir $@) $(notdir $<) $(DAOS_ROOT)/src/mgmt/smd.pb-c.h: $(PROTO_SOURCE_DIR)/ctl/smd.proto protoc -I $(dir $<) --c_out=$(dir $@) $(notdir $<) diff --git a/src/proto/chk/chk.proto b/src/proto/chk/chk.proto new file mode 100644 index 00000000000..19171a9a910 --- /dev/null +++ b/src/proto/chk/chk.proto @@ -0,0 +1,224 @@ +// +// (C) Copyright 2022 Intel Corporation. +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// + +syntax = "proto3"; +package chk; + +option go_package = "github.com/daos-stack/daos/src/control/common/proto/chk"; + +// The enumeration values shaed by DAOS check engine and control plane. + +// Kinds of DAOS global inconsistency. +enum CheckInconsistClass { + // Consistent cases. + CIC_NONE = 0; + // Only a subset of the pool services are present but we will have a quorum. + // Default action: CIA_IGNORE. + CIC_POOL_LESS_SVC_WITH_QUORUM = 1; + // Only a subset of the pool services are present, and we don't have a quorum. + // Default action: CIA_INTERACT. + CIC_POOL_LESS_SVC_WITHOUT_QUORUM = 2; + // More members are reported than the pool service was created with. + // Default action: CIA_DISCARD. Remove unrecognized pool service. + CIC_POOL_MORE_SVC = 3; + // Engine(s) claim the pool which is not registered to MS. + // Default action: CIA_READD. Register the pool to the MS. + CIC_POOL_NONEXIST_ON_MS = 4; + // Pool is registered to MS but not claimed by any engine. + // Default action: CIA_DISCARD. De-register pool from MS. + CIC_POOL_NONEXIST_ON_ENGINE = 5; + // Svcl list stored in MS does not match the actual PS membership. + // Default action: CIA_TRUST_PS. Refresh svcl list in MS DB. + CIC_POOL_BAD_SVCL = 6; + // The pool label recorded by MS does not match the pool label property from PS. + // Default action: CIA_TRUST_PS. Refresh label in MS DB. + CIC_POOL_BAD_LABEL = 7; + // An engine has some allocated storage but does not appear in pool map. + // Default action: CIA_DISCARD. Associated files and blobs will be deleted from the engine. + CIC_ENGINE_NONEXIST_IN_MAP = 8; + // An engine has some allocated storage and is marked as down/downout in pool map. + // Default action: CIA_IGNORE. It can be reintegrated after CR scan. + CIC_ENGINE_DOWN_IN_MAP = 9; + // An engine is referenced in pool map, but no storage is actually allocated on this engine. + // Default action: CIA_DISCARD. Evict the rank from pool map, give left things to rebuild. + CIC_ENGINE_HAS_NO_STORAGE = 10; + // Containers that have storage allocated on engine but does not exist in the PS. + // Default action: CIA_DISCARD. Destrory the unrecognized container. + CIC_CONT_NONEXIST_ON_PS = 11; + // The container label recorded by PS does not match the container label property. + // Default action: CIA_TRUST_PS. Refresh label property on related target(s). + CIC_CONT_BAD_LABEL = 12; + + // More for subsequent CR phases. + + // The DTX is corrupted, some participant RDG(s) may be lost. + // Default action: CIA_INTERACT. + CIC_DTX_CORRUPTED = 13; + // The DTX entry on leader does not exist, then not sure the status. + // Default action: CIA_DISCARD. It is equal to abort the DTX and may lost data on related + // shard, then we may found data inconsistency in subseqeunt CR scan phase, at that time, + // such data inconsistency will be fixed. + CIC_DTX_ORPHAN = 14; + // The checksum information is lost. + // Default action: CIA_READD. We have to trust the data and recalculate the checksum. If + // data is corrupted, then we may hit data inconsistency in subseqeunt CR scan phase, at + // that time, such data inconsistency will be fixed. + CIC_CSUM_LOST = 15; + // Checksum related inconsistency or data corruption. + // Default action: CIA_DISCARD. Then we will hit data lost in subseqeunt CR scan phase, + // at that time, such data inconsistency will be fixed. + CIC_CSUM_FAILURE = 16; + // Replicated object lost some replica(s). + // Default action: CIA_READD. Copy from another valid replica. + CIC_OBJ_LOST_REP = 17; + // EC object lost parity or data shard(s). + // Default action: CIA_READD. Trust other available shards and recalculate the lost one(s). + CIC_OBJ_LOST_EC_SHARD = 18; + // EC object lost too many shards that exceeds its redundancy. + // Default action: CIA_INTERACT. Ask the admin to decide whether keep or remove the object. + CIC_OBJ_LOST_EC_DATA = 19; + // Data inconsistency among replicas + // Default action: CIA_TRUST_LATEST. Try to keep the latest data. If all have the same epoch, + // then ask the admin (CIA_INTERACT) to decide which one will be trusted. + CIC_OBJ_DATA_INCONSIST = 20; + + // Unknown inconsistency. + // Default action: CIA_IGNORE. + CIC_UNKNOWN = 100; +} + +// Actions for how to handle kinds of inconsistency. +enum CheckInconsistAction { + // Default action, depends on the detailed inconsistency class. + CIA_DEFAULT = 0; + // Interact with administrator for further action. + CIA_INTERACT = 1; + // Ignore but log the inconsistency. + CIA_IGNORE = 2; + // Discard the unrecognized element: pool service, pool itself, container, and so on. + CIA_DISCARD = 3; + // Re-add the missing element: pool to MS, target to pool map, and so on. + CIA_READD = 4; + // Trust the information recorded in MS DB. + CIA_TRUST_MS = 5; + // Trust the information recorded in PS DB. + CIA_TRUST_PS = 6; + // Trust the information recorded by target(s). + CIA_TRUST_TARGET = 7; + // Trust the majority parts (if have). + CIA_TRUST_MAJORITY = 8; + // Trust the one with latest (pool map or epoch) information. Keep the latest data. + CIA_TRUST_LATEST = 9; + // Trust the one with oldest (pool map or epoch) information. Rollback to old version. + CIA_TRUST_OLDEST = 10; + // Trust EC parity shard. + CIA_TRUST_EC_PARITY = 11; + // Trust EC data shard. + CIA_TRUST_EC_DATA = 12; +} + +// The flags to control DAOS check general behavior, not related with any detailed inconsistency. +enum CheckFlag { + CF_NONE = 0; + // Only scan without real repairing inconsistency. + CF_DRYRUN = 1; + // Start DAOS check from the beginning. + // Otherwise, resume the DAOS check from the latest checkpoint by default. + CF_RESET = 2; + // Stop DAOS check if hit unknown inconsistency or fail to repair some inconsistency. + // Otherwise, mark 'fail' on related component and continue to handle next one by default. + CF_FAILOUT = 4; + // If the admin does not want to interact with engine during check scan, then CIA_INTERACT + // will be converted to CIA_IGNORE. That will overwrite the CheckInconsistPolicy. + CF_AUTO = 8; + // Handle orphan pool when start the check instance. If not specify the flag, some orphan + // pool(s) may be not handled (by default) unless all pools are checked from the scratch. + CF_ORPHAN_POOL = 16; + // Overwrite former set CF_FAILOUT flag, cannot be specified together with CF_FAILOUT. + CF_NO_FAILOUT = 32; + // Overwrite former set CF_AUTO flag, cannot be specified together with CF_AUTO. + CF_NO_AUTO = 64; + + // More flags with 2^n. +} + +// The status of DAOS check instance. +enum CheckInstStatus { + CIS_INIT = 0; // DAOS check has never been run. + CIS_RUNNING = 1; // DAOS check is still in process. + CIS_COMPLETED = 2; // All passes have been done for all required pools. + CIS_STOPPED = 3; // DAOS check has been explicitly stopped, do not allow to rejoin. + CIS_FAILED = 4; // DAOS check auto stopped for some unrecoverable failure, do not rejoin. + CIS_PAUSED = 5; // DAOS check has been paused because engine exit, allow to rejoin. + CIS_IMPLICATED = 6; // Check on the engine exit for other engine failure, do not rejoin. +} + +// The pool status for DAOS check. +enum CheckPoolStatus { + CPS_UNCHECKED = 0; // DAOS check has not started against this pool. + CPS_CHECKING = 1; // The pool is being checked. + CPS_CHECKED = 2; // DAOS check has successfully completed all the passes on this pool. + CPS_FAILED = 3; // DAOS check could not be completed due to some unrecoverable failure. + CPS_PAUSED = 4; // Checking the pool has been paused because engine exit. + CPS_PENDING = 5; // Waiting for the decision from the admin. + CPS_STOPPED = 6; // DAOS check on the pool has been stopped explicitly. + CPS_IMPLICATED = 7; // Check on the pool is stopped because of other pool or engine failure. +} + +// DAOS check engine scan phases. +enum CheckScanPhase { + CSP_PREPARE = 0; // Initial phase, prepare to start check on related engines. + CSP_POOL_LIST = 1; // Pool list consolidation. + + // The following phases are per-pool based. The scanning different pools can be + // handled in parallel, so different pools maybe in different check scan phases. + + CSP_POOL_MBS = 2; // Pool membership. + CSP_POOL_CLEANUP = 3; // Pool cleanup. + CSP_CONT_LIST = 4; // Container list consolidation. + CSP_CONT_CLEANUP = 5; // Container cleanup. + + // The following phases will be implemented in the future. + + CSP_DTX_RESYNC = 6; // DTX resync and cleanup. + CSP_OBJ_SCRUB = 7; // RP/EC shards consistency verification with checksum scrub if have. + CSP_REBUILD = 8; // Object rebuild. + CSP_AGGREGATION = 9; // EC aggregation & VOS aggregation. + + CSP_DONE = 10; // All done. +} + +// DAOS check engine reports the found inconsistency and repair result to control plane. +// If the repair action is CIA_INTERACT, then the control plane will reply current dRPC +// firstly, and then interact with the admin for the repair decision in another section +// and tell DAOS check engine via another DRPC_METHOD_MGMT_CHK_ACT dRPC call. +// +// If the CheckReport::msg is not enough to help admin to make the decision, then we +// may have to leverage DAOS debug tools to dump more information from related target. +message CheckReport { + uint64 seq = 1; // DAOS Check event sequence, unique for the instance. + CheckInconsistClass class = 2; // Inconsistency class + CheckInconsistAction action = 3; // The action taken to repair the inconsistency + // Repair result: zero is for repaired successfully. + // negative value if failed to repair. + // positive value is for CIA_IGNORE or dryrun mode. + // It is meaningless if the action is CIA_INTERACT. + int32 result = 4; + uint32 rank = 5; // Inconsistency happened on which rank if applicable. + uint32 target = 6; // Inconsistency happened on which target in the rank if applicable. + string pool_uuid = 7; // The consistency is in which pool if applicable. + string pool_label = 8; // The pool label, if available. + string cont_uuid = 9; // The consistency is in which container if applicable. + string cont_label = 10; // The container label, if available. + string objid = 11; // The consistency is in which object if applicable. + string dkey = 12; // The consistency is in which dkey if applicable. + string akey = 13; // The consistency is in which akey if applicable. + string timestamp = 14; // The time of report (and repair) the inconsistency. + string msg = 15; // Information to describe the inconsistency in detail. + repeated CheckInconsistAction act_choices = 16; // Interactive mode options (first is suggested). + repeated string act_details = 17; // Details for each potential action (length should match actions). + repeated string act_msgs = 18; // Formatted messages containing details for each action choice. +} diff --git a/src/proto/chk/faults.proto b/src/proto/chk/faults.proto new file mode 100644 index 00000000000..8bfb4d58c6f --- /dev/null +++ b/src/proto/chk/faults.proto @@ -0,0 +1,19 @@ +// +// (C) Copyright 2022 Intel Corporation. +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// + +syntax = "proto3"; +package chk; + +option go_package = "github.com/daos-stack/daos/src/control/common/proto/chk"; + +import "chk/chk.proto"; + +message Fault { + CheckInconsistClass class = 1; + repeated string strings = 2; + repeated uint32 uints = 3; + repeated int32 ints = 4; +} diff --git a/src/proto/ctl/ranks.proto b/src/proto/ctl/ranks.proto index f983974c8a3..ca2a98b1fe7 100644 --- a/src/proto/ctl/ranks.proto +++ b/src/proto/ctl/ranks.proto @@ -1,5 +1,5 @@ // -// (C) Copyright 2020-2021 Intel Corporation. +// (C) Copyright 2020-2022 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -16,6 +16,7 @@ import "shared/ranks.proto"; message RanksReq { bool force = 3; // force operation string ranks = 4; // rankset to operate over + bool check_mode = 5; // start in check mode } // Generic response containing DER result from multiple ranks. diff --git a/src/proto/mgmt/check.proto b/src/proto/mgmt/check.proto new file mode 100644 index 00000000000..3fbfca9a323 --- /dev/null +++ b/src/proto/mgmt/check.proto @@ -0,0 +1,197 @@ +// +// (C) Copyright 2022-2023 Intel Corporation. +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// + +syntax = "proto3"; +package mgmt; + +option go_package = "github.com/daos-stack/daos/src/control/common/proto/mgmt"; + +import "chk/chk.proto"; + +// Check Protobuf Definitions related to interactions between +// DAOS control server and DAOS check via dRPC. + +// The pairs for kinds of inconsistency and related repair action. The control plane need to +// generate such policy array from some configuration file either via command line option or +// some default location, such as /etc/daos/daos_check.yml. Such policy arrge will be passed +// to DAOS engine when start check and cannot changed during check scanning, but can be list +// via 'dmg check prop' - see CheckPropResp. +message CheckInconsistPolicy { + chk.CheckInconsistClass inconsist_cas = 1; // See CheckInconsistClass. + chk.CheckInconsistAction inconsist_act = 2; // See CheckInconsistAction. +} + +message CheckEnableReq { + string sys = 1; +} + +message CheckDisableReq { + string sys = 1; +} + +// For 'dmg check start'. +message CheckStartReq { + string sys = 1; // DAOS system identifier. + uint32 flags = 2; // See CheckFlag. + // The list of ranks to start DAOS check. Cannot be empty. + // + // The control plane will generate the ranks list and guarantee that any rank in the system + // is either will participate in check or has been excluded. Otherwise, partial ranks check + // may cause some unexpected and unrecoverable result unless the specified pool(s) does not + // exist on those missed rank(s). + repeated uint32 ranks = 3; + // UUID for the pools for which to start DAOS check. + // If empty, then start DAOS check for all pools in the system. + repeated string uuids = 4; + repeated CheckInconsistPolicy policies = 5; // Policy array for handling inconsistency. +} + +// CheckStartResp returns the result of check start. +message CheckStartResp { + int32 status = 1; // DAOS error code. +} + +// For 'dmg check stop'. +message CheckStopReq { + string sys = 1; // DAOS system identifier. + // UUID for the pools for which to stop DAOS check. + // If empty, then stop check for all pools in the system. + repeated string uuids = 2; +} + +// CheckStopResp returns the result of check stop. +message CheckStopResp { + int32 status = 1; // DAOS error code. +} + + +// For 'dmg check query'. +message CheckQueryReq { + string sys = 1; // DAOS system identifier. + // UUID for the pools for which to query DAOS check. + // If empty, then query DAOS check for all pools in the system. + repeated string uuids = 2; + bool shallow = 3; // shallow query (findings only) + repeated uint64 seqs = 4; // return findings with these sequences (implies shallow) +} + +// Time information on related component: system, pool or target. +message CheckQueryTime { + uint64 start_time = 1; // The time of check instance being started on the component. + // If the check instance is still running on the component, then it is the estimated + // remaining time to complete the check on the component. Otherwise, it is the time + // of the check instance completed, failed or stopped on the component. + uint64 misc_time = 2; +} + +// Inconsistency statistics on related component: system, pool or target. +message CheckQueryInconsist { + uint32 total = 1; // The count of total found inconsistency on the component. + uint32 repaired = 2; // The count of repaired inconsistency on the component. + uint32 ignored = 3; // The count of ignored inconsistency on the component. + uint32 failed = 4; // The count of fail to repaired inconsistency on the component. +} + +// Check query result for the pool shard on the target. +message CheckQueryTarget { + uint32 rank = 1; // Rank ID. + uint32 target = 2; // Target index in the rank. + chk.CheckInstStatus status = 3; // Check instance status on this target - see CheckInstStatus. + // Inconsistency statistics during the phases range + // [CSP_DTX_RESYNC, CSP_AGGREGATION] for the pool shard on the target. + CheckQueryInconsist inconsistency = 4; + // Time information for the pool shard on the target if applicable. + CheckQueryTime time = 5; +} + +// Check query result for the pool. +message CheckQueryPool { + string uuid = 1; // Pool UUID. + chk.CheckPoolStatus status = 2; // Pool status - see CheckPoolStatus. + chk.CheckScanPhase phase = 3; // Scan phase - see CheckScanPhase. + // Inconsistency statistics during the phases range + // [CSP_POOL_MBS, CSP_CONT_CLEANUP] for the pool. + CheckQueryInconsist inconsistency = 4; + // Time information for the pool if applicable. + CheckQueryTime time = 5; + // Per target based query result for the phases since CSP_DTX_RESYNC. + repeated CheckQueryTarget targets = 6; +} + +// CheckQueryResp returns DAOS check status for required pool(s) or the whole system. +// Depend on the dmg command line option, the control plane needs to reorganize the query +// results with summary (of pool shards from targets) and different detailed information. +message CheckQueryResp { + int32 req_status = 1; // DAOS error code. + // The whole check instance status depends on the each engine status: + // As long as one target is in CIS_RUNNING, then the instance is CIS_RUNNING. + // Otherwise, in turn with the status of CIS_FAILED, CIS_STOPPED, CIS_IMPLICATED, + // CIS_COMPLETED, CIS_PAUSED, CIS_INIT. + chk.CheckInstStatus ins_status = 2; + // Scan phase - see CheckScanPhase. Before moving to CSP_POOL_MBS, the check + // instance status is maintained on the check leader. And then multiple pools + // can be processed in parallel, so the instance phase for different pools may + // be different, see CheckQueryPool::phase. + chk.CheckScanPhase ins_phase = 3; + // Inconsistency statistics during the phases range + // [CSP_PREPARE, CSP_POOL_LIST] for the whole system. + CheckQueryInconsist inconsistency = 4; + // Time information for the whole system if applicable. + CheckQueryTime time = 5; + // Per pool based query result for the phases since CSP_POOL_MBS. + repeated CheckQueryPool pools = 6; + // Inconsistency reports to be displayed + repeated chk.CheckReport reports = 7; +} + +// For 'dmg check set-policy' +message CheckSetPolicyReq { + string sys = 1; // DAOS system identifier. + uint32 flags = 2; // The flags when start check - see CheckFlag. + repeated CheckInconsistPolicy policies = 3; // Inconsistency policy array. +} + +// To allow daos_server to query check leader properties +message CheckPropReq { + string sys = 1; // DAOS system identifier. +} + +// CheckPropResp returns the result of check prop and the properties when start check. +message CheckPropResp { + int32 status = 1; // DAOS error code. + uint32 flags = 2; // The flags when start check - see CheckFlag. + repeated CheckInconsistPolicy policies = 3; // Inconsistency policy array. +} + +// For 'dmg check get-policy' +message CheckGetPolicyReq { + string sys = 1; // DAOS system identifier. + repeated chk.CheckInconsistClass classes = 2; + bool last_used = 3; +} + +// CheckGetPolicyResp returns the result of check prop and the properties when start check. +// NB: Dupe of CheckPropResp currently; may consolidate if they don't diverge. +message CheckGetPolicyResp { + int32 status = 1; // DAOS error code. + uint32 flags = 2; // The flags when start check - see CheckFlag. + repeated CheckInconsistPolicy policies = 3; // Inconsistency policy array. +} + +// For the admin's decision from DAOS check interaction. +message CheckActReq { + string sys = 1; // DAOS system identifier. + // DAOS RAS event sequence - see RASEvent::extended_info::check_info::chk_inconsist_seq. + uint64 seq = 2; + // The decision from RASEvent::extended_info::check_info::chk_opts. + chk.CheckInconsistAction act = 3; + bool for_all = 4; // The same action is applicable to the same type of inconsistency. +} + +// CheckActResp returns the result of executing admin's decision. +message CheckActResp { + int32 status = 1; // DAOS error code. +} diff --git a/src/proto/mgmt/mgmt.proto b/src/proto/mgmt/mgmt.proto index 73f93b1b663..d58c8115fa8 100644 --- a/src/proto/mgmt/mgmt.proto +++ b/src/proto/mgmt/mgmt.proto @@ -11,10 +11,13 @@ option go_package = "github.com/daos-stack/daos/src/control/common/proto/mgmt"; import "shared/event.proto"; import "mgmt/pool.proto"; +import "mgmt/check.proto"; import "mgmt/cont.proto"; import "mgmt/svc.proto"; import "mgmt/acl.proto"; // ACL-related requests import "mgmt/system.proto"; +import "chk/chk.proto"; +import "chk/faults.proto"; // Management Service is replicated on a small number of servers in the system, // these requests will be processed on a host that is a member of the management @@ -80,6 +83,22 @@ service MgmtSvc { rpc SystemErase(SystemEraseReq) returns(SystemEraseResp) {} // Clean up leaked resources for a given node rpc SystemCleanup(SystemCleanupReq) returns(SystemCleanupResp){} + // Enable system check mode + rpc SystemCheckEnable(CheckEnableReq) returns(DaosResp){} + // Disable system check mode + rpc SystemCheckDisable(CheckDisableReq) returns(DaosResp){} + // Initiate a system check + rpc SystemCheckStart(CheckStartReq) returns(CheckStartResp){} + // Stop a system check + rpc SystemCheckStop(CheckStopReq) returns(CheckStopResp){} + // Query a system check + rpc SystemCheckQuery(CheckQueryReq) returns(CheckQueryResp){} + // Set system check properties + rpc SystemCheckSetPolicy(CheckSetPolicyReq) returns(DaosResp){} + // Query system check properties + rpc SystemCheckGetPolicy(CheckGetPolicyReq) returns(CheckGetPolicyResp){} + // Send the desired action to repair an inconsistency. + rpc SystemCheckRepair(CheckActReq) returns(CheckActResp){} // PoolUpgrade queries a DAOS pool. rpc PoolUpgrade(PoolUpgradeReq) returns (PoolUpgradeResp) {} // Set a system attribute or attributes. @@ -90,4 +109,12 @@ service MgmtSvc { rpc SystemSetProp(SystemSetPropReq) returns (DaosResp) {} // Get a system property or properties. rpc SystemGetProp(SystemGetPropReq) returns (SystemGetPropResp) {} + + + // Fault injection handlers are only implemented in non-release builds. + // FaultInjectReport injects a checker report. + rpc FaultInjectReport(chk.CheckReport) returns (DaosResp) {} + // FaultInjectPoolFault creates a pool fault for testing the checker. + rpc FaultInjectPoolFault(chk.Fault) returns (DaosResp) {} + rpc FaultInjectMgmtPoolFault(chk.Fault) returns (DaosResp) {} } diff --git a/src/proto/mgmt/svc.proto b/src/proto/mgmt/svc.proto index 6c6833a6b4e..8796a7f0183 100644 --- a/src/proto/mgmt/svc.proto +++ b/src/proto/mgmt/svc.proto @@ -43,6 +43,7 @@ message JoinReq { uint64 incarnation = 9; // rank incarnation repeated string secondary_uris = 10; // URIs for any secondary providers repeated uint32 secondary_nctxs = 11; // CaRT context count for each secondary provider + bool check_mode = 12; // rank started in check mode } message JoinResp { @@ -51,6 +52,7 @@ message JoinResp { enum State { IN = 0; // Server in the system. OUT = 1; // Server excluded from the system. + CHECK = 2; // Server should start in checker mode. } State state = 3; // Server state in the system map. string faultDomain = 4; // Fault domain for the instance diff --git a/src/proto/mgmt/system.proto b/src/proto/mgmt/system.proto index b66f65ce164..80ba956afeb 100644 --- a/src/proto/mgmt/system.proto +++ b/src/proto/mgmt/system.proto @@ -54,6 +54,7 @@ message SystemStartReq { string sys = 1; // DAOS system name string ranks = 2; // rankset to query string hosts = 3; // hostset to query + bool check_mode = 4; // start ranks in check mode } // SystemStartResp returns status of restart attempt and results diff --git a/src/proto/srv/srv.proto b/src/proto/srv/srv.proto index d8ec70e28ae..d2cc63dbab1 100644 --- a/src/proto/srv/srv.proto +++ b/src/proto/srv/srv.proto @@ -11,6 +11,8 @@ package srv; option go_package = "github.com/daos-stack/daos/src/control/common/proto/srv"; +import "chk/chk.proto"; + message NotifyReadyReq { string uri = 1; // Primary CaRT URI uint32 nctxs = 2; // Number of primary CaRT contexts @@ -20,6 +22,7 @@ message NotifyReadyReq { uint64 incarnation = 6; // HLC incarnation number repeated string secondaryUris = 7; // secondary CaRT URIs repeated uint32 secondaryNctxs = 8; // number of CaRT contexts for each secondary provider + bool check_mode = 9; // True if engine started in checker mode } // NotifyReadyResp is nil. @@ -42,3 +45,47 @@ message PoolFindByLabelResp { string uuid = 2; // Pool UUID repeated uint32 svcreps = 3; // Pool service replica ranks } + +// List all the known pools from MS. +message CheckListPoolReq { +} + +message CheckListPoolResp { + message OnePool { + string uuid = 1; // Pool UUID. + string label = 2; // Pool label. + repeated uint32 svcreps = 3; // Pool service replica ranks. + } + int32 status = 1; // DAOS error code. + repeated OnePool pools = 2; // The list of pools. +} + +// Register pool to MS. +message CheckRegPoolReq { + uint64 seq = 1; // DAOS Check event sequence, unique for the instance. + string uuid = 2; // Pool UUID. + string label = 3; // Pool label. + repeated uint32 svcreps = 4; // Pool service replica ranks. +} + +message CheckRegPoolResp { + int32 status = 1; // DAOS error code. +} + +// Deregister pool from MS. +message CheckDeregPoolReq { + uint64 seq = 1; // DAOS Check event sequence, unique for the instance. + string uuid = 2; // The pool to be deregistered. +} + +message CheckDeregPoolResp { + int32 status = 1; // DAOS error code. +} + +message CheckReportReq { + chk.CheckReport report = 1; // Report payload +} + +message CheckReportResp { + int32 status = 1; // DAOS error code. +} diff --git a/src/rdb/rdb.c b/src/rdb/rdb.c index 65e81c33008..8e88d10dfa8 100644 --- a/src/rdb/rdb.c +++ b/src/rdb/rdb.c @@ -242,7 +242,7 @@ rdb_open_internal(daos_handle_t pool, daos_handle_t mc, const uuid_t uuid, uint6 struct vos_pool_space vps; uint64_t rdb_extra_sys[DAOS_MEDIA_MAX]; - D_ASSERT(cbs->dc_stop != NULL); + D_ASSERT(cbs == NULL || cbs->dc_stop != NULL); D_ALLOC_PTR(db); if (db == NULL) { @@ -498,6 +498,97 @@ rdb_get_use_leases(void) return value; } +/** + * Glance at \a storage and return \a clue. Callers are responsible for freeing + * \a clue->bcl_replicas with d_rank_list_free. + * + * \param[in] storage database storage + * \param[out] clue database clue + */ +int +rdb_glance(struct rdb_storage *storage, struct rdb_clue *clue) +{ + struct rdb *db = rdb_from_storage(storage); + d_iov_t value; + uint64_t term; + int vote; + uint64_t last_index = db->d_lc_record.dlr_tail - 1; + uint64_t last_term; + d_rank_list_t *replicas; + uint64_t oid_next; + int rc; + + d_iov_set(&value, &term, sizeof(term)); + rc = rdb_mc_lookup(db->d_mc, RDB_MC_ATTRS, &rdb_mc_term, &value); + if (rc == -DER_NONEXIST) { + term = 0; + } else if (rc != 0) { + D_ERROR(DF_DB": failed to look up term: "DF_RC"\n", DP_DB(db), DP_RC(rc)); + goto err; + } + + d_iov_set(&value, &vote, sizeof(vote)); + rc = rdb_mc_lookup(db->d_mc, RDB_MC_ATTRS, &rdb_mc_vote, &value); + if (rc == -DER_NONEXIST) { + vote = -1; + } else if (rc != 0) { + D_ERROR(DF_DB": failed to look up vote: "DF_RC"\n", DP_DB(db), DP_RC(rc)); + goto err; + } + + if (last_index == db->d_lc_record.dlr_base) { + last_term = db->d_lc_record.dlr_base_term; + } else { + struct rdb_entry header; + + d_iov_set(&value, &header, sizeof(header)); + rc = rdb_lc_lookup(db->d_lc, last_index, RDB_LC_ATTRS, &rdb_lc_entry_header, + &value); + if (rc != 0) { + D_ERROR(DF_DB": failed to look up entry "DF_U64" header: %d\n", DP_DB(db), + last_index, rc); + goto err; + } + last_term = header.dre_term; + } + + rc = rdb_raft_load_replicas(db->d_lc, last_index, &replicas); + if (rc != 0) { + D_ERROR(DF_DB": failed to load replicas at "DF_U64": "DF_RC"\n", DP_DB(db), + last_index, DP_RC(rc)); + goto err; + } + + d_iov_set(&value, &oid_next, sizeof(oid_next)); + rc = rdb_lc_lookup(db->d_lc, last_index, RDB_LC_ATTRS, &rdb_lc_oid_next, &value); + if (rc == -DER_NONEXIST) { + oid_next = RDB_LC_OID_NEXT_INIT; + } else if (rc != 0) { + D_ERROR(DF_DB": failed to look up next object number: %d\n", DP_DB(db), rc); + goto err_replicas; + } + + clue->bcl_term = term; + clue->bcl_vote = vote; + /* + * In the future, the self node ID might differ from the rank and need + * to be stored persistently. + */ + clue->bcl_self = dss_self_rank(); + clue->bcl_last_index = last_index; + clue->bcl_last_term = last_term; + clue->bcl_base_index = db->d_lc_record.dlr_base; + clue->bcl_base_term = db->d_lc_record.dlr_base_term; + clue->bcl_replicas = replicas; + clue->bcl_oid_next = oid_next; + return 0; + +err_replicas: + d_rank_list_free(replicas); +err: + return rc; +} + /** * Start \a storage, converting \a storage into \a dbp. If this is successful, * the caller must stop using \a storage; otherwise, the caller remains @@ -574,6 +665,27 @@ rdb_stop_and_close(struct rdb *db) rdb_close(storage); } +/** + * Forcefully removing all other replicas from the membership. Callers must + * destroy all other replicas (or prevent them from starting) beforehand. + * + * This API is for catastrophic recovery scenarios, for instance, when more + * than a minority of replicas are lost. + * + * 1 Choose the best replica to recover from (see ds_pool_check_svc_clues). + * 2 Destroy all other replicas (or prevent them from starting). + * 3 Call rdb_open and rdb_dictate on the chosen replica. + * + * \param[in] storage database storage + */ +int +rdb_dictate(struct rdb_storage *storage) +{ + struct rdb *db = rdb_from_storage(storage); + + return rdb_raft_dictate(db); +} + /** * Add \a replicas. * diff --git a/src/rdb/rdb_internal.h b/src/rdb/rdb_internal.h index 9d9da7e6fab..f7e53daf615 100644 --- a/src/rdb/rdb_internal.h +++ b/src/rdb/rdb_internal.h @@ -185,10 +185,12 @@ int rdb_raft_open(struct rdb *db, uint64_t caller_term); int rdb_raft_start(struct rdb *db); void rdb_raft_stop(struct rdb *db); void rdb_raft_close(struct rdb *db); +int rdb_raft_dictate(struct rdb *db); void rdb_raft_resign(struct rdb *db, uint64_t term); int rdb_raft_campaign(struct rdb *db); int rdb_raft_ping(struct rdb *db, uint64_t caller_term); int rdb_raft_verify_leadership(struct rdb *db); +int rdb_raft_load_replicas(daos_handle_t lc, uint64_t index, d_rank_list_t **replicas); int rdb_raft_add_replica(struct rdb *db, d_rank_t rank); int rdb_raft_remove_replica(struct rdb *db, d_rank_t rank); int rdb_raft_append_apply(struct rdb *db, void *entry, size_t size, diff --git a/src/rdb/rdb_raft.c b/src/rdb/rdb_raft.c index 30d164b9319..44450fdcf8e 100644 --- a/src/rdb/rdb_raft.c +++ b/src/rdb/rdb_raft.c @@ -245,7 +245,7 @@ rdb_raft_store_replicas(daos_handle_t lc, uint64_t index, const d_rank_list_t *r 2 /* n */, keys, vals); } -static int +int rdb_raft_load_replicas(daos_handle_t lc, uint64_t index, d_rank_list_t **replicas) { d_iov_t value; @@ -2205,7 +2205,7 @@ rdb_raft_destroy_lc(daos_handle_t pool, daos_handle_t mc, d_iov_t *key, uuid_t uuid, struct rdb_lc_record *record) { struct rdb_lc_record r = {}; - d_iov_t value; + d_iov_t value; int rc; D_ASSERTF(key == &rdb_mc_lc || key == &rdb_mc_slc, "%p\n", key); @@ -2388,6 +2388,11 @@ rdb_raft_load_lc(struct rdb *db) DP_RC(rc)); goto err; } + if (uuid_is_null(db->d_slc_record.dlr_uuid)) { + D_DEBUG(DB_MD, DF_DB": null SLC record\n", DP_DB(db)); + db->d_slc = DAOS_HDL_INVAL; + goto load_snapshot; + } rc = vos_cont_open(db->d_pool, db->d_slc_record.dlr_uuid, &db->d_slc); if (rc == -DER_NONEXIST) { D_DEBUG(DB_MD, DF_DB": dangling SLC record: "DF_UUID"\n", @@ -2529,6 +2534,124 @@ rdb_raft_get_ae_max_size(void) return value; } +/* For the rdb_raft_dictate case. */ +static int +rdb_raft_discard_slc(struct rdb *db) +{ + struct rdb_lc_record slc_record; + d_iov_t value; + int rc; + + d_iov_set(&value, &slc_record, sizeof(slc_record)); + rc = rdb_mc_lookup(db->d_mc, RDB_MC_ATTRS, &rdb_mc_slc, &value); + if (rc == -DER_NONEXIST) { + D_DEBUG(DB_MD, DF_DB": no SLC record\n", DP_DB(db)); + return 0; + } else if (rc != 0) { + D_ERROR(DF_DB": failed to look up SLC: "DF_RC"\n", DP_DB(db), DP_RC(rc)); + return rc; + } + if (uuid_is_null(slc_record.dlr_uuid)) { + D_DEBUG(DB_MD, DF_DB": null SLC record\n", DP_DB(db)); + return 0; + } + + return rdb_raft_destroy_lc(db->d_pool, db->d_mc, &rdb_mc_slc, slc_record.dlr_uuid, + NULL /* record */); +} + +int +rdb_raft_dictate(struct rdb *db) +{ + struct rdb_lc_record lc_record = db->d_lc_record; + uint64_t term; + d_rank_list_t replicas; + d_rank_t self = dss_self_rank(); + d_iov_t keys[2]; + d_iov_t value; + uint64_t index = lc_record.dlr_tail; + int rc; + + /* + * If an SLC exists, discard it, since it must be either stale or + * incomplete. See rdb_raft_cb_recv_installsnapshot. + */ + rc = rdb_raft_discard_slc(db); + if (rc != 0) + return rc; + + /* + * Since we don't have an RDB fsck phase yet, do a basic check to avoid + * arithmetic issues. + */ + if (lc_record.dlr_base >= index) { + D_ERROR(DF_DB": LC record corrupted: base "DF_U64" >= tail "DF_U64"\n", DP_DB(db), + lc_record.dlr_base, index); + return -DER_IO; + } + + /* Get the term at the last index. */ + if (index - lc_record.dlr_base - 1 > 0) { + struct rdb_entry header; + + /* The LC has entries. Get from the last entry. */ + d_iov_set(&value, &header, sizeof(header)); + rc = rdb_lc_lookup(db->d_lc, index - 1, RDB_LC_ATTRS, &rdb_lc_entry_header, &value); + if (rc != 0) { + D_ERROR(DF_DB": failed to look up entry "DF_U64" header: "DF_RC"\n", + DP_DB(db), index - 1, DP_RC(rc)); + return rc; + } + term = header.dre_term; + } else { + /* The LC has no entries. Get from the snapshot. */ + term = lc_record.dlr_base_term; + } + + /* + * At a new index, reset the membership to only ourself. We also punch + * the entry header and data just for consistency, for this may be a + * membership change entry that, for instance, adds a node other than + * ourself, which contradicts with the new membership of only ourself. + */ + replicas.rl_ranks = &self; + replicas.rl_nr = 1; + rc = rdb_raft_store_replicas(db->d_lc, index, &replicas); + if (rc != 0) { + D_ERROR(DF_DB": failed to reset membership: "DF_RC"\n", DP_DB(db), DP_RC(rc)); + return rc; + } + keys[0] = rdb_lc_entry_header; + keys[1] = rdb_lc_entry_data; + rc = rdb_lc_punch(db->d_lc, index, RDB_LC_ATTRS, 2 /* n */, keys); + if (rc != 0) { + D_ERROR(DF_DB": failed to punch entry: "DF_RC"\n", DP_DB(db), DP_RC(rc)); + return rc; + } + + /* + * Update the LC base and tail. Note that, if successful, this + * "publishes" all the modifications above and effectively commits all + * entries. + */ + lc_record.dlr_base = index; + lc_record.dlr_base_term = term; + lc_record.dlr_tail = index + 1; + d_iov_set(&value, &lc_record, sizeof(lc_record)); + rc = rdb_mc_update(db->d_mc, RDB_MC_ATTRS, 1 /* n */, &rdb_mc_lc, &value); + if (rc != 0) { + D_ERROR(DF_DB": failed to update LC record: "DF_RC"\n", DP_DB(db), DP_RC(rc)); + return rc; + } + D_INFO(DF_DB": updated LC reocrd: base="DF_U64"->"DF_U64" base_term="DF_U64"->"DF_U64 + " tail="DF_U64"->"DF_U64"\n", DP_DB(db), db->d_lc_record.dlr_base, + lc_record.dlr_base, db->d_lc_record.dlr_base_term, lc_record.dlr_base_term, + db->d_lc_record.dlr_tail, lc_record.dlr_tail); + db->d_lc_record = lc_record; + + return 0; +} + int rdb_raft_open(struct rdb *db, uint64_t caller_term) { diff --git a/src/rdb/tests/rdb_test.c b/src/rdb/tests/rdb_test.c index 2069f464b53..c676956e59d 100644 --- a/src/rdb/tests/rdb_test.c +++ b/src/rdb/tests/rdb_test.c @@ -641,7 +641,7 @@ rdbt_init_handler(crt_rpc_t *rpc) D_WARN("ranks[%u]=%u\n", ri, ranks->rl_ranks[ri]); MUST(ds_rsvc_dist_start(DS_RSVC_CLASS_TEST, &test_svc_id, in->tii_uuid, ranks, RDB_NIL_TERM, - true /* create */, true /* bootstrap */, DB_CAP)); + DS_RSVC_CREATE, true /* bootstrap */, DB_CAP)); crt_reply_send(rpc); } @@ -827,6 +827,35 @@ rdbt_start_election_handler(crt_rpc_t *rpc) } +static void +rdbt_dictate_handler(crt_rpc_t *rpc) +{ + struct ds_rsvc *rsvc; + uuid_t db_uuid; + struct rdbt_dictate_in *in = crt_req_get(rpc); + struct rdbt_dictate_out *out = crt_reply_get(rpc); + d_rank_list_t *ranks; + + D_WARN("calling dictate on rank %u\n", dss_self_rank()); + + MUST(ds_rsvc_lookup(DS_RSVC_CLASS_TEST, &test_svc_id, &rsvc)); + uuid_copy(db_uuid, rsvc->s_db_uuid); + ds_rsvc_put(rsvc); + + MUST(d_rank_list_dup(&ranks, in->rti_ranks)); + MUST(d_rank_list_del(ranks, in->rti_rank)); + MUST(ds_rsvc_dist_stop(DS_RSVC_CLASS_TEST, &test_svc_id, ranks, NULL, RDB_NIL_TERM, true)); + + ranks->rl_ranks[0] = in->rti_rank; + ranks->rl_nr = 1; + MUST(ds_rsvc_dist_start(DS_RSVC_CLASS_TEST, &test_svc_id, db_uuid, ranks, RDB_NIL_TERM, + DS_RSVC_DICTATE, false /* bootstrap */, 0 /* size */)); + + d_rank_list_free(ranks); + out->rto_rc = 0; + crt_reply_send(rpc); +} + /* Define for cont_rpcs[] array population below. * See RDBT_PROTO_*_RPC_LIST macro definition */ diff --git a/src/rdb/tests/rdbt.c b/src/rdb/tests/rdbt.c index 21c29183a9f..0d76aa376ca 100644 --- a/src/rdb/tests/rdbt.c +++ b/src/rdb/tests/rdbt.c @@ -509,6 +509,27 @@ restore_initial_replicas(crt_group_t *grp, uint32_t nranks, return 0; } + +static int +dictate(crt_group_t *grp, d_rank_t rank, d_rank_t chosen_rank, d_rank_list_t *replicas) +{ + crt_rpc_t *rpc; + struct rdbt_dictate_in *in; + struct rdbt_dictate_out *out; + int rc; + + rpc = create_rpc(RDBT_DICTATE, grp, rank); + in = crt_req_get(rpc); + in->rti_ranks = replicas; + in->rti_rank = chosen_rank; + rc = invoke_rpc(rpc); + D_ASSERTF(rc == 0, "%d\n", rc); + out = crt_reply_get(rpc); + rc = out->rto_rc; + destroy_rpc(rpc); + return rc; +} + /**** init command functions ****/ static int @@ -1145,6 +1166,140 @@ testm_disruptive_membership(crt_group_t *grp, uint32_t nranks, return 0; } +static int +testm_dictate_internal(crt_group_t *grp, uint32_t nranks, uint32_t nreplicas, uint64_t key, + uint64_t val, d_rank_t chosen_rank, d_rank_t exec_rank) +{ + d_rank_list_t *ranks; + d_rank_t ldr_rank; + d_rank_t rank; + struct rsvc_hint h; + int i; + int rc; + + printf("INFO: chosen_rank=%u exec_rank=%u\n", chosen_rank, exec_rank); + + ranks = d_rank_list_alloc(nreplicas); + if (ranks == NULL) + return -DER_NOMEM; + + rc = dictate(grp, exec_rank, chosen_rank, ranks); + d_rank_list_free(ranks); + if (rc) { + fprintf(stderr, "FAIL: failed to dictate: "DF_RC"\n", DP_RC(rc)); + return rc; + } + ldr_rank = chosen_rank; + + printf("INFO: waiting for rank %u\n", ldr_rank); + for (i = 0; i < 20; i++) { + rc = rdbt_ping_rank(grp, ldr_rank, &h); + if (rc != -DER_NOTLEADER) + break; + sleep(1); + } + if (rc != 0) { + fprintf(stderr, "FAIL: no leader after dictating: "DF_RC"\n", DP_RC(rc)); + return rc; + } + + printf("INFO: restoring original replicas\n"); + for (rank = 0; rank < nreplicas; rank++) { + if (rank == ldr_rank) + continue; + rc = rdbt_add_replica_rank(grp, ldr_rank, rank, &h); + if (rc) { + fprintf(stderr, "FAIL: add back replica rank %u RPC to leader %u: " + DF_RC", hint:(r=%u, t="DF_U64"\n", rank, ldr_rank, + DP_RC(rc), h.sh_rank, h.sh_term); + return rc; + } + } + + printf("INFO: sleeping 10 s for the restored replicas to catch up\n"); + sleep(10); + + printf("INFO: lookup all replicas\n"); + for (rank = 0; rank < nreplicas; rank++) { + const int NO_UPDATE = 0; + uint64_t val_out = 0; + + rc = rdbt_test_rank(grp, rank, NO_UPDATE, RDBT_MEMBER_NOOP, + key, val, &val_out, &h); + if (rc) { + fprintf(stderr, "FAIL: lookup RDB failed via RPC to leader " + "rank %u: "DF_RC", hint:(r=%u, t="DF_U64"\n", + ldr_rank, DP_RC(rc), h.sh_rank, h.sh_term); + return rc; + } + if (val_out != val) { + fprintf(stderr, "FAIL: lookup val="DF_U64" expect "DF_U64"\n", val_out, + val); + return -1; + } + } + + return 0; +} + +static int +testm_dictate(crt_group_t *grp, uint32_t nranks, uint32_t nreplicas, uint64_t key, uint64_t val) +{ + int rc; + d_rank_t ldr_rank; + uint64_t term; + uint64_t val_out = 0; + const int UPDATE = 1; + struct rsvc_hint h; + + printf("\n==== TEST: RDB update, destroy majority, dictate, and lookup\n"); + + rc = rdbt_find_leader(grp, nranks, nreplicas, &ldr_rank, &term); + if (rc) { + fprintf(stderr, "ERR: RDB find leader failed\n"); + return rc; + } + printf("INFO: RDB discovered leader rank %u, term="DF_U64"\n", + ldr_rank, term); + + rc = rdbt_test_rank(grp, ldr_rank, UPDATE, RDBT_MEMBER_NOOP, key, val, + &val_out, &h); + if (rc) { + fprintf(stderr, "FAIL: update RDB failed via RPC to leader " + "rank %u: "DF_RC", hint:(r=%u, t="DF_U64"\n", + ldr_rank, DP_RC(rc), h.sh_rank, h.sh_term); + return rc; + } + if (val_out != val) { + fprintf(stderr, "FAIL: update val="DF_U64" expect "DF_U64"\n", val_out, val); + return -1; + } + + rc = testm_dictate_internal(grp, nranks, nreplicas, key, val, + ldr_rank /* chosen_rank */, + (ldr_rank + 1) % nreplicas /* exec_rank */); + if (rc) + return rc; + + rc = rdbt_find_leader(grp, nranks, nreplicas, &ldr_rank, &term); + if (rc) { + fprintf(stderr, "ERR: RDB find leader failed\n"); + return rc; + } + printf("INFO: RDB discovered leader rank %u, term="DF_U64"\n", + ldr_rank, term); + + rc = testm_dictate_internal(grp, nranks, nreplicas, key, val, + (ldr_rank + 1) % nreplicas /* chosen_rank */, + ldr_rank /* exec_rank */); + if (rc) + return rc; + + printf("====== PASS: dictate\n"); + + return 0; +} + static int rdbt_test_multi(crt_group_t *grp, uint32_t nranks, uint32_t nreplicas) { @@ -1205,6 +1360,12 @@ rdbt_test_multi(crt_group_t *grp, uint32_t nranks, uint32_t nreplicas) RDBT_MEMBER_RESIGN); if (rc != 0) return rc; + + val *= 2; + rc = testm_dictate(grp, nranks, nreplicas, key, val); + if (rc != 0) + return rc; + return 0; } diff --git a/src/rdb/tests/rpc.c b/src/rdb/tests/rpc.c index 50acf31a59f..da6a8ebe375 100644 --- a/src/rdb/tests/rpc.c +++ b/src/rdb/tests/rpc.c @@ -42,6 +42,7 @@ CRT_RPC_DEFINE(rdbt_create, DAOS_ISEQ_RDBT_CREATE_OP, DAOS_OSEQ_RDBT_CREATE_OP) CRT_RPC_DEFINE(rdbt_destroy, DAOS_ISEQ_RDBT_DESTROY_OP, DAOS_OSEQ_RDBT_DESTROY_OP) CRT_RPC_DEFINE(rdbt_test, DAOS_ISEQ_RDBT_TEST_OP, DAOS_OSEQ_RDBT_TEST_OP) +CRT_RPC_DEFINE(rdbt_dictate, DAOS_ISEQ_RDBT_DICTATE, DAOS_OSEQ_RDBT_DICTATE) /* Define for cont_rpcs[] array population below. * See RDBT_PROTO_*_RPC_LIST macro definition diff --git a/src/rdb/tests/rpc.h b/src/rdb/tests/rpc.h index 2189d9ce08c..27a406ee3c2 100644 --- a/src/rdb/tests/rpc.h +++ b/src/rdb/tests/rpc.h @@ -15,7 +15,7 @@ * These are for daos_rpc::dr_opc and DAOS_RPC_OPCODE(opc, ...) rather than * crt_req_create(..., opc, ...). See src/include/daos/rpc.h. */ -#define DAOS_RDBT_VERSION 2 +#define DAOS_RDBT_VERSION 3 /* LIST of internal RPCS in form of: * OPCODE, flags, FMT, handler, corpc_hdlr, */ @@ -46,7 +46,10 @@ rdbt_replicas_remove_handler, NULL), \ X(RDBT_START_ELECTION, \ 0, &CQF_rdbt_start_election, \ - rdbt_start_election_handler, NULL) + rdbt_start_election_handler, NULL), \ + X(RDBT_DICTATE, \ + 0, &CQF_rdbt_dictate, \ + rdbt_dictate_handler, NULL) /* Define for RPC enum population below */ #define X(a, b, c, d, e) a @@ -167,4 +170,14 @@ CRT_RPC_DECLARE(rdbt_replicas_stop, DAOS_ISEQ_RDBT_STARTSTOP, CRT_RPC_DECLARE(rdbt_start_election, DAOS_ISEQ_RDBT_START_ELECTION, DAOS_OSEQ_RDBT_START_ELECTION) +#define DAOS_ISEQ_RDBT_DICTATE /* input fields */ \ + ((d_rank_list_t) (rti_ranks) CRT_PTR)\ + ((int32_t) (rti_rank) CRT_VAR)\ + +#define DAOS_OSEQ_RDBT_DICTATE /* output fields */ \ + ((int32_t) (rto_rc) CRT_VAR) + +CRT_RPC_DECLARE(rdbt_dictate, DAOS_ISEQ_RDBT_DICTATE, + DAOS_OSEQ_RDBT_DICTATE) + #endif /* RDB_TESTS_RPC_H */ diff --git a/src/rebuild/scan.c b/src/rebuild/scan.c index 22d90eb025e..bd32d1d359b 100644 --- a/src/rebuild/scan.c +++ b/src/rebuild/scan.c @@ -339,9 +339,9 @@ rebuild_scan_done(void *data) tls = rebuild_pool_tls_lookup(rpt->rt_pool_uuid, rpt->rt_rebuild_ver, rpt->rt_rebuild_gen); - D_ASSERT(tls != NULL); + if (tls != NULL) + tls->rebuild_pool_scanning = 0; - tls->rebuild_pool_scanning = 0; return 0; } @@ -962,7 +962,8 @@ rebuild_scanner(void *data) tls = rebuild_pool_tls_lookup(rpt->rt_pool_uuid, rpt->rt_rebuild_ver, rpt->rt_rebuild_gen); - D_ASSERT(tls != NULL); + if (tls == NULL) + return 0; if (!is_rebuild_scanning_tgt(rpt)) { D_DEBUG(DB_REBUILD, DF_UUID" skip scan\n", DP_UUID(rpt->rt_pool_uuid)); diff --git a/src/rebuild/srv.c b/src/rebuild/srv.c index d7c39bf499c..a8ee692da0e 100644 --- a/src/rebuild/srv.c +++ b/src/rebuild/srv.c @@ -385,8 +385,8 @@ dss_rebuild_check_one(void *data) pool_tls = rebuild_pool_tls_lookup(rpt->rt_pool_uuid, rpt->rt_rebuild_ver, rpt->rt_rebuild_gen); - D_ASSERTF(pool_tls != NULL, DF_UUID" ver %d\n", - DP_UUID(rpt->rt_pool_uuid), rpt->rt_rebuild_ver); + if (pool_tls == NULL) + return 0; D_DEBUG(DB_REBUILD, "%d scanning %d status: "DF_RC"\n", idx, pool_tls->rebuild_pool_scanning, @@ -980,7 +980,7 @@ rebuild_scan_broadcast(struct ds_pool *pool, struct rebuild_global_pool_tracker rc = ds_pool_bcast_create(dss_get_module_info()->dmi_ctx, pool, DAOS_REBUILD_MODULE, REBUILD_OBJECTS_SCAN, DAOS_REBUILD_VERSION, - &rpc, NULL, excluded); + &rpc, NULL, excluded, NULL); if (rc != 0) { D_ERROR("pool map broad cast failed: rc "DF_RC"\n", DP_RC(rc)); D_GOTO(out, rc); @@ -1489,7 +1489,7 @@ rebuild_task_ult(void *arg) { struct rebuild_task *task = arg; struct ds_pool *pool; - uint32_t global_ver = 0; + uint32_t map_dist_ver = 0; struct rebuild_global_pool_tracker *rgt = NULL; d_rank_t myrank; uint64_t cur_ts = 0; @@ -1514,20 +1514,20 @@ rebuild_task_ult(void *arg) /* Check if the leader pool map has been synced to all other targets * to avoid -DER_GRP error. */ - rc = ds_pool_svc_global_map_version_get(task->dst_pool_uuid, &global_ver); + rc = ds_pool_svc_query_map_dist(task->dst_pool_uuid, &map_dist_ver, NULL); if (rc) { - D_ERROR("Get pool service version failed: "DF_RC"\n", - DP_RC(rc)); + DL_ERROR(rc, DF_UUID ": failed to get pool map distribution version", + DP_UUID(task->dst_pool_uuid)); D_GOTO(out_pool, rc); } - D_DEBUG(DB_REBUILD, "global_ver %u map ver %u\n", global_ver, + D_DEBUG(DB_REBUILD, "map_dist_ver %u map ver %u\n", map_dist_ver, task->dst_map_ver); if (pool->sp_stopping) D_GOTO(out_pool, rc = -DER_SHUTDOWN); - if (pool->sp_map_version <= global_ver) + if (pool->sp_map_version <= map_dist_ver) break; dss_sleep(1000); @@ -2373,17 +2373,20 @@ rebuild_prepare_one(void *data) struct ds_pool_child *dpc; int rc = 0; - pool_tls = rebuild_pool_tls_create(rpt->rt_pool_uuid, rpt->rt_poh_uuid, - rpt->rt_coh_uuid, rpt->rt_rebuild_ver, - rpt->rt_rebuild_gen); - if (pool_tls == NULL) - return -DER_NOMEM; - dpc = ds_pool_child_lookup(rpt->rt_pool_uuid); /* The pool child could be stopped */ if (dpc == NULL) return 0; + if (unlikely(dpc->spc_no_storage)) + D_GOTO(put, rc = 0); + + pool_tls = rebuild_pool_tls_create(rpt->rt_pool_uuid, rpt->rt_poh_uuid, + rpt->rt_coh_uuid, rpt->rt_rebuild_ver, + rpt->rt_rebuild_gen); + if (pool_tls == NULL) + D_GOTO(put, rc = -DER_NOMEM); + D_ASSERT(dss_get_module_info()->dmi_xs_id != 0); /* Set the rebuild epoch per VOS container, so VOS aggregation will not @@ -2395,6 +2398,7 @@ rebuild_prepare_one(void *data) " rebuild eph "DF_X64" "DF_RC"\n", DP_UUID(rpt->rt_pool_uuid), DP_UUID(rpt->rt_coh_uuid), rpt->rt_rebuild_fence, DP_RC(rc)); +put: ds_pool_child_put(dpc); return rc; diff --git a/src/rsvc/rpc.h b/src/rsvc/rpc.h index 8f8fd658a88..594fef20b09 100644 --- a/src/rsvc/rpc.h +++ b/src/rsvc/rpc.h @@ -52,7 +52,9 @@ extern struct crt_proto_format rsvc_proto_fmt; ((d_iov_t) (sai_svc_id) CRT_VAR) \ ((uuid_t) (sai_db_uuid) CRT_VAR) \ ((uint32_t) (sai_class) CRT_VAR) \ + ((uint32_t) (sai_mode) CRT_VAR) \ ((uint32_t) (sai_flags) CRT_VAR) \ + ((uint32_t) (sai_padding) CRT_VAR) \ ((uint64_t) (sai_size) CRT_VAR) \ ((uint64_t) (sai_term) CRT_VAR) \ ((d_rank_list_t) (sai_ranks) CRT_PTR) diff --git a/src/rsvc/srv.c b/src/rsvc/srv.c index 97a03f1f013..c43394cc8c2 100644 --- a/src/rsvc/srv.c +++ b/src/rsvc/srv.c @@ -1,5 +1,5 @@ /* - * (C) Copyright 2019-2023 Intel Corporation. + * (C) Copyright 2019-2024 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -407,6 +407,8 @@ init_map_distd(struct ds_rsvc *svc) D_ASSERT(svc->s_map_distd == ABT_THREAD_NULL); svc->s_map_dist = false; + svc->s_map_dist_inp = false; + svc->s_map_dist_ver = 0; svc->s_map_distd_stop = false; ds_rsvc_get(svc); @@ -609,37 +611,53 @@ map_distd(void *arg) struct ds_rsvc *svc = arg; D_DEBUG(DB_MD, "%s: start\n", svc->s_name); + ABT_mutex_lock(svc->s_mutex); for (;;) { - bool stop; - int rc; + uint32_t version; + int rc; - ABT_mutex_lock(svc->s_mutex); for (;;) { - stop = svc->s_map_distd_stop; - if (stop) - break; + if (svc->s_map_distd_stop) + goto break_out; if (svc->s_map_dist) { + /* Dequeue the request and start serving it. */ svc->s_map_dist = false; + svc->s_map_dist_inp = true; break; } sched_cond_wait(svc->s_map_dist_cv, svc->s_mutex); } ABT_mutex_unlock(svc->s_mutex); - if (stop) - break; - rc = rsvc_class(svc->s_class)->sc_map_dist(svc); + + rc = rsvc_class(svc->s_class)->sc_map_dist(svc, &version); if (rc != 0) { /* * Try again, but back off a little bit to limit the * retry rate. */ - svc->s_map_dist = true; dss_sleep(3000 /* ms */); } + + ABT_mutex_lock(svc->s_mutex); + /* Stop serving the request. */ + svc->s_map_dist_inp = false; + if (rc == 0) { + if (version > svc->s_map_dist_ver) { + D_DEBUG(DB_MD, "%s: version=%u->%u\n", svc->s_name, + svc->s_map_dist_ver, version); + svc->s_map_dist_ver = version; + } + ABT_cond_broadcast(svc->s_map_dist_cv); + } else { + /* Enqueue the request again. */ + svc->s_map_dist = true; + } } +break_out: + ABT_mutex_unlock(svc->s_mutex); put_leader(svc); - ds_rsvc_put(svc); D_DEBUG(DB_MD, "%s: stop\n", svc->s_name); + ds_rsvc_put(svc); } /** @@ -656,6 +674,60 @@ ds_rsvc_request_map_dist(struct ds_rsvc *svc) D_DEBUG(DB_MD, "%s: requested map distribution\n", svc->s_name); } +/** + * Query the map distribution state. + * + * \param[in] svc replicated service + * \param[out] version if not NULL, highest map version distributed + * successfully + * \param[out] idle if not NULL, whether map distribution is idle (i.e., no + * in-progress or pending request) + */ +void +ds_rsvc_query_map_dist(struct ds_rsvc *svc, uint32_t *version, bool *idle) +{ + if (version != NULL) + *version = svc->s_map_dist_ver; + if (idle != NULL) + *idle = !svc->s_map_dist_inp && !svc->s_map_dist; +} + +/** + * Wait until map distribution is idle or stopping. + * + * \param[in] svc replicated service + */ +void +ds_rsvc_wait_map_dist(struct ds_rsvc *svc) +{ + D_DEBUG(DB_MD, "%s: begin", svc->s_name); + ABT_mutex_lock(svc->s_mutex); + for (;;) { + if (svc->s_map_distd_stop) + break; + if (!svc->s_map_dist && !svc->s_map_dist_inp) + break; + sched_cond_wait(svc->s_map_dist_cv, svc->s_mutex); + } + ABT_mutex_unlock(svc->s_mutex); + D_DEBUG(DB_MD, "%s: end", svc->s_name); +} + +static char * +start_mode_str(enum ds_rsvc_start_mode mode) +{ + switch (mode) { + case DS_RSVC_START: + return "start"; + case DS_RSVC_CREATE: + return "create"; + case DS_RSVC_DICTATE: + return "dictate"; + default: + return "unknown"; + } +} + static bool self_only(d_rank_list_t *replicas) { @@ -664,8 +736,9 @@ self_only(d_rank_list_t *replicas) } static int -start(enum ds_rsvc_class_id class, d_iov_t *id, uuid_t db_uuid, uint64_t term, bool create, - size_t size, d_rank_list_t *replicas, void *arg, struct ds_rsvc **svcp) +start(enum ds_rsvc_class_id class, d_iov_t *id, uuid_t db_uuid, uint64_t term, + enum ds_rsvc_start_mode mode, size_t size, d_rank_list_t *replicas, void *arg, + struct ds_rsvc **svcp) { struct rdb_storage *storage; struct ds_rsvc *svc = NULL; @@ -676,7 +749,7 @@ start(enum ds_rsvc_class_id class, d_iov_t *id, uuid_t db_uuid, uint64_t term, b goto err; svc->s_ref++; - if (create) + if (mode == DS_RSVC_CREATE) rc = rdb_create(svc->s_db_path, svc->s_db_uuid, term, size, replicas, &rsvc_rdb_cbs, svc, &storage); else @@ -684,11 +757,17 @@ start(enum ds_rsvc_class_id class, d_iov_t *id, uuid_t db_uuid, uint64_t term, b if (rc != 0) goto err_svc; + if (mode == DS_RSVC_DICTATE) { + rc = rdb_dictate(storage); + if (rc != 0) + goto err_storage; + } + rc = rdb_start(storage, &svc->s_db); if (rc != 0) goto err_storage; - if (create && self_only(replicas) && + if (mode == DS_RSVC_CREATE && self_only(replicas) && rsvc_class(class)->sc_bootstrap != NULL) { rc = bootstrap_self(svc, arg); if (rc != 0) @@ -702,7 +781,7 @@ start(enum ds_rsvc_class_id class, d_iov_t *id, uuid_t db_uuid, uint64_t term, b rdb_stop(svc->s_db, &storage); err_storage: rdb_close(storage); - if (create) + if (mode == DS_RSVC_CREATE) rdb_destroy(svc->s_db_path, svc->s_db_uuid); err_svc: svc->s_ref--; @@ -794,16 +873,16 @@ ds_rsvc_stop_nodb(enum ds_rsvc_class_id class, d_iov_t *id) } /** - * Start a replicated service. If \a create is false, all remaining input - * parameters are ignored; otherwise, create the replica first. If \a replicas - * is NULL, all remaining input parameters are ignored; otherwise, bootstrap - * the replicated service. + * Start a replicated service. If \a mode is not DS_RSVC_CREATE, all remaining + * input parameters are ignored; otherwise, create the replica first. If \a + * replicas is NULL, all remaining input parameters are ignored; otherwise, + * bootstrap the replicated service. * * \param[in] class replicated service class * \param[in] id replicated service ID * \param[in] db_uuid DB UUID * \param[in] caller_term caller term if not RDB_NIL_TERM (see rdb_open) - * \param[in] create whether to create the replica before starting + * \param[in] mode mode of starting the replicated service * \param[in] size replica size in bytes * \param[in] replicas optional initial membership * \param[in] arg argument for cbs.sc_bootstrap @@ -814,7 +893,7 @@ ds_rsvc_stop_nodb(enum ds_rsvc_class_id class, d_iov_t *id) */ int ds_rsvc_start(enum ds_rsvc_class_id class, d_iov_t *id, uuid_t db_uuid, uint64_t caller_term, - bool create, size_t size, d_rank_list_t *replicas, void *arg) + enum ds_rsvc_start_mode mode, size_t size, d_rank_list_t *replicas, void *arg) { struct ds_rsvc *svc = NULL; d_list_t *entry; @@ -826,25 +905,35 @@ ds_rsvc_start(enum ds_rsvc_class_id class, d_iov_t *id, uuid_t db_uuid, uint64_t if (entry != NULL) { svc = rsvc_obj(entry); D_DEBUG(DB_MD, "%s: found: stop=%d\n", svc->s_name, svc->s_stop); - if (caller_term != RDB_NIL_TERM) { - rc = rdb_ping(svc->s_db, caller_term); - if (rc != 0) { - D_CDEBUG(rc == -DER_STALE, DB_MD, DLOG_ERR, - "%s: failed to ping local replica\n", svc->s_name); - ds_rsvc_put(svc); - goto out; + if (mode == DS_RSVC_DICTATE && !svc->s_stop) { + /* + * If we need to dictate, and the service is not + * stopping, then stop it, which should not fail in + * this case, and continue. + */ + rc = ds_rsvc_stop(class, id, caller_term, false /* destroy */); + D_ASSERTF(rc == 0, DF_RC"\n", DP_RC(rc)); + ds_rsvc_put(svc); + } else { + if (caller_term != RDB_NIL_TERM) { + rc = rdb_ping(svc->s_db, caller_term); + if (rc != 0) { + D_CDEBUG(rc == -DER_STALE, DB_MD, DLOG_ERR, + "%s: failed to ping local replica\n", svc->s_name); + ds_rsvc_put(svc); + goto out; + } } + if (svc->s_stop) + rc = -DER_CANCELED; + else + rc = -DER_ALREADY; + ds_rsvc_put(svc); + goto out; } - D_ASSERT(!svc->s_destroy); - if (svc->s_stop) - rc = -DER_CANCELED; - else - rc = -DER_ALREADY; - ds_rsvc_put(svc); - goto out; } - rc = start(class, id, db_uuid, caller_term, create, size, replicas, arg, &svc); + rc = start(class, id, db_uuid, caller_term, mode, size, replicas, arg, &svc); if (rc != 0) goto out; @@ -852,19 +941,32 @@ ds_rsvc_start(enum ds_rsvc_class_id class, d_iov_t *id, uuid_t db_uuid, uint64_t &svc->s_entry, true /* exclusive */); if (rc != 0) { D_DEBUG(DB_MD, "%s: insert: "DF_RC"\n", svc->s_name, DP_RC(rc)); - stop(svc, create /* destroy */); + stop(svc, mode == DS_RSVC_CREATE /* destroy */); goto out; } D_DEBUG(DB_MD, "%s: started replicated service\n", svc->s_name); ds_rsvc_put(svc); out: - if (rc != 0 && rc != -DER_ALREADY && !(create && rc == -DER_EXIST)) - D_ERROR("Failed to start replicated service: "DF_RC"\n", - DP_RC(rc)); + if (rc != 0 && rc != -DER_ALREADY && !(mode == DS_RSVC_CREATE && rc == -DER_EXIST)) + D_ERROR("Failed to start replicated service: "DF_RC"\n", DP_RC(rc)); return rc; } +static int +remove_path(char *path) +{ + int rc; + + rc = remove(path); + if (rc != 0) { + rc = errno; + D_CDEBUG(rc == ENOENT, DB_MD, DLOG_ERR, "failed to remove %s: %d\n", path, rc); + return daos_errno2der(rc); + } + return 0; +} + static int stop(struct ds_rsvc *svc, bool destroy) { @@ -903,15 +1005,14 @@ stop(struct ds_rsvc *svc, bool destroy) } /** - * Stop a replicated service. If destroy is false, all remaining parameters are - * ignored; otherwise, destroy the service afterward. + * Stop a replicated service. If destroy is true, destroy the service + * afterward. * * \param[in] class replicated service class * \param[in] id replicated service ID * \param[in] caller_term caller term if not RDB_NIL_TERM (see rdb_open) * \param[in] destroy whether to destroy the replica after stopping * - * \retval -DER_ALREADY replicated service already stopped * \retval -DER_CANCELED replicated service stopping * \retval -DER_STALE stale \a caller_term */ @@ -924,8 +1025,20 @@ ds_rsvc_stop(enum ds_rsvc_class_id class, d_iov_t *id, uint64_t caller_term, boo D_ASSERT(dss_get_module_info()->dmi_xs_id == 0); rc = ds_rsvc_lookup(class, id, &svc); - if (rc != 0) - return -DER_ALREADY; + if (rc != 0) { + if (rc != -DER_NOTREPLICA && destroy) { + char *path; + + rc = rsvc_class(class)->sc_locate(id, &path); + if (rc != 0) + return rc; + rc = remove_path(path); + D_FREE(path); + if (rc != 0 && rc != -DER_NONEXIST) + return rc; + } + return 0; + } if (caller_term != RDB_NIL_TERM) { rc = rdb_ping(svc->s_db, caller_term); @@ -1043,7 +1156,7 @@ ds_rsvc_add_replicas_s(struct ds_rsvc *svc, d_rank_list_t *ranks, size_t size) int rc; rc = ds_rsvc_dist_start(svc->s_class, &svc->s_id, svc->s_db_uuid, ranks, svc->s_term, - true /* create */, false /* bootstrap */, size); + DS_RSVC_CREATE, false /* bootstrap */, size); /* TODO: Attempt to only add replicas that were successfully started */ if (rc != 0) @@ -1126,8 +1239,7 @@ ds_rsvc_remove_replicas(enum ds_rsvc_class_id class, d_iov_t *id, /*************************** Distributed Operations ***************************/ enum rdb_start_flag { - RDB_AF_CREATE = 0x1, - RDB_AF_BOOTSTRAP = 0x2 + RDB_AF_BOOTSTRAP = 0x1, }; enum rdb_stop_flag { @@ -1157,23 +1269,24 @@ bcast_create(crt_opcode_t opc, bool filter_invert, d_rank_list_t *filter_ranks, } /** - * Perform a distributed create, if \a create is true, and start operation on - * all replicas of a database with \a dbid spanning \a ranks. This method can - * be called on any rank. If \a create is false, \a ranks may be NULL. + * Perform a distributed start operation in \a mode on all replicas of a + * database with \a dbid spanning \a ranks. This method can be called on any + * rank. If \a mode is DS_RSVC_START, \a ranks may be NULL. If \a mode is + * DS_RSVC_DICTATE, \a ranks must comprise one and only one rank. * * \param[in] class replicated service class * \param[in] id replicated service ID * \param[in] dbid database UUID * \param[in] ranks list of replica ranks * \param[in] caller_term caller term if not RDB_NIL_TERM (see rdb_open) - * \param[in] create create replicas first - * \param[in] bootstrap start with an initial list of replicas - * \param[in] size size of each replica in bytes if \a create + * \param[in] mode mode of starting the replicated service + * \param[in] bootstrap create with an initial list of replicas if \a mode is DS_RSVC_CREATE + * \param[in] size size of each replica in bytes if \a mode is DS_RSVC_CREATE */ int ds_rsvc_dist_start(enum ds_rsvc_class_id class, d_iov_t *id, const uuid_t dbid, - const d_rank_list_t *ranks, uint64_t caller_term, bool create, bool bootstrap, - size_t size) + const d_rank_list_t *ranks, uint64_t caller_term, enum ds_rsvc_start_mode mode, + bool bootstrap, size_t size) { crt_rpc_t *rpc; struct rsvc_start_in *in; @@ -1181,8 +1294,8 @@ ds_rsvc_dist_start(enum ds_rsvc_class_id class, d_iov_t *id, const uuid_t dbid, int rc; D_ASSERT(!bootstrap || ranks != NULL); - D_DEBUG(DB_MD, DF_UUID": %s DB\n", - DP_UUID(dbid), create ? "creating" : "starting"); + D_ASSERT(mode != DS_RSVC_DICTATE || ranks->rl_nr == 1); + D_DEBUG(DB_MD, DF_UUID": %s DB\n", DP_UUID(dbid), start_mode_str(mode)); rc = bcast_create(RSVC_START, ranks != NULL /* filter_invert */, (d_rank_list_t *)ranks, &rpc); @@ -1194,9 +1307,8 @@ ds_rsvc_dist_start(enum ds_rsvc_class_id class, d_iov_t *id, const uuid_t dbid, if (rc != 0) goto out_rpc; uuid_copy(in->sai_db_uuid, dbid); - if (create) - in->sai_flags |= RDB_AF_CREATE; - if (bootstrap) + in->sai_mode = mode; + if (mode == DS_RSVC_CREATE && bootstrap) in->sai_flags |= RDB_AF_BOOTSTRAP; in->sai_size = size; in->sai_term = caller_term; @@ -1209,10 +1321,11 @@ ds_rsvc_dist_start(enum ds_rsvc_class_id class, d_iov_t *id, const uuid_t dbid, out = crt_reply_get(rpc); rc = out->sao_rc; if (rc != 0) { - D_ERROR(DF_UUID": failed to start%s %d replicas: "DF_RC"\n", - DP_UUID(dbid), create ? "/create" : "", rc, - DP_RC(out->sao_rc_errval)); - ds_rsvc_dist_stop(class, id, ranks, NULL, caller_term, create); + D_ERROR(DF_UUID": failed to %s %d replicas: "DF_RC"\n", DP_UUID(dbid), + start_mode_str(mode), rc, DP_RC(out->sao_rc_errval)); + if (ranks == NULL || ranks->rl_nr > 1) + ds_rsvc_dist_stop(class, id, ranks, NULL, caller_term, + mode == DS_RSVC_CREATE); rc = out->sao_rc_errval; } @@ -1229,7 +1342,6 @@ ds_rsvc_start_handler(crt_rpc_t *rpc) { struct rsvc_start_in *in = crt_req_get(rpc); struct rsvc_start_out *out = crt_reply_get(rpc); - bool create = in->sai_flags & RDB_AF_CREATE; bool bootstrap = in->sai_flags & RDB_AF_BOOTSTRAP; int rc; @@ -1238,8 +1350,15 @@ ds_rsvc_start_handler(crt_rpc_t *rpc) goto out; } - rc = ds_rsvc_start(in->sai_class, &in->sai_svc_id, in->sai_db_uuid, in->sai_term, create, - in->sai_size, bootstrap ? in->sai_ranks : NULL, NULL /* arg */); + if (in->sai_mode == DS_RSVC_DICTATE && + (in->sai_ranks == NULL || in->sai_ranks->rl_nr != 1)) { + rc = -DER_PROTO; + goto out; + } + + rc = ds_rsvc_start(in->sai_class, &in->sai_svc_id, in->sai_db_uuid, in->sai_term, + in->sai_mode, in->sai_size, bootstrap ? in->sai_ranks : NULL, + NULL /* arg */); if (rc == -DER_ALREADY) rc = 0; diff --git a/src/tests/ftest/SConscript b/src/tests/ftest/SConscript index 32cfd473e6f..2f2c8b2266a 100644 --- a/src/tests/ftest/SConscript +++ b/src/tests/ftest/SConscript @@ -18,7 +18,7 @@ def scons(): 'daos_perf', 'daos_racer', 'daos_vol', 'daos_test', 'data', 'fault_domain', 'io', 'ior', 'mdtest', 'network', 'nvme', 'mpiio', - 'object', 'osa', 'pool', 'rebuild', 'security', + 'object', 'osa', 'pool', 'rebuild', 'recovery', 'security', 'server', 'soak', 'erasurecode', 'datamover', 'scripts', 'dbench', 'harness', 'telemetry', 'deployment', 'performance', diff --git a/src/tests/ftest/daos_test/dfuse.yaml b/src/tests/ftest/daos_test/dfuse.yaml index 0a6e5d72e1b..e7633dcfbbf 100644 --- a/src/tests/ftest/daos_test/dfuse.yaml +++ b/src/tests/ftest/daos_test/dfuse.yaml @@ -1,7 +1,7 @@ hosts: test_servers: 1 test_clients: 1 -timeout: 90 +timeout: 120 pool: scm_size: 1G server_config: diff --git a/src/tests/ftest/daos_test/suite.py b/src/tests/ftest/daos_test/suite.py index 5bb66798206..5ed8b3f22b5 100644 --- a/src/tests/ftest/daos_test/suite.py +++ b/src/tests/ftest/daos_test/suite.py @@ -45,6 +45,22 @@ def test_daos_management(self): """ self.run_subtest() + def test_daos_cat_recovery(self): + """Jira ID: DAOS-13047 + + Test Description: + Run daos_test -F + + Use cases: + Core tests for daos_test + + :avocado: tags=all,pr,daily_regression + :avocado: tags=hw,medium,provider + :avocado: tags=daos_test,daos_core_test + :avocado: tags=DaosCoreTest,test_daos_cat_recovery + """ + self.run_subtest() + def test_daos_pool(self): """Jira ID: DAOS-1568 diff --git a/src/tests/ftest/daos_test/suite.yaml b/src/tests/ftest/daos_test/suite.yaml index c1bc395c2b0..164a3a9bd43 100644 --- a/src/tests/ftest/daos_test/suite.yaml +++ b/src/tests/ftest/daos_test/suite.yaml @@ -9,6 +9,7 @@ timeout: 600 timeouts: test_daos_degraded_mode: 450 test_daos_management: 110 + test_daos_cat_recovery: 5400 test_daos_pool: 180 test_daos_container: 510 test_daos_epoch: 125 @@ -84,6 +85,7 @@ daos_tests: num_clients: test_daos_degraded_mode: 1 test_daos_management: 1 + test_daos_cat_recovery: 1 test_daos_pool: 2 test_daos_container: 1 test_daos_epoch: 1 @@ -113,6 +115,7 @@ daos_tests: test_name: test_daos_degraded_mode: DAOS_Degraded_Mode test_daos_management: DAOS_Management + test_daos_cat_recovery: DAOS_Cat_Recovery test_daos_pool: DAOS_Pool test_daos_container: DAOS_Container test_daos_epoch: DAOS_Epoch @@ -142,6 +145,7 @@ daos_tests: daos_test: test_daos_degraded_mode: d test_daos_management: m + test_daos_cat_recovery: F test_daos_pool: p test_daos_container: c test_daos_epoch: e diff --git a/src/tests/ftest/recovery/container_cleanup.py b/src/tests/ftest/recovery/container_cleanup.py new file mode 100644 index 00000000000..79037ea9987 --- /dev/null +++ b/src/tests/ftest/recovery/container_cleanup.py @@ -0,0 +1,130 @@ +""" + (C) Copyright 2023 Intel Corporation. + + SPDX-License-Identifier: BSD-2-Clause-Patent +""" +import time + +from exception_utils import CommandFailure +from general_utils import report_errors +from recovery_test_base import RecoveryTestBase + + +class ContainerCleanupTest(RecoveryTestBase): + """Test Pass 5: Container Cleanup + + :avocado: recursive + """ + + def test_container_label_inconsistency(self): + """Test container label inconsistency in CS and property. + + 1. Create a pool and a container. + 2. Inject fault to cause container label inconsistency. i.e., Corrupt label in CS. + 3. Try to access property using the original label in CS. + 4. Show that PS has the original label by using "new-label", which was injected by + the fault injection tool. + 5. Enable the checker. + 6. Set policy to --all-interactive. + 7. Start the checker and query the checker until the fault is detected. + 8. Repair by selecting "Trust the container label in container property." + 9. Query the checker until the fault is repaired. + 10. Disable the checker. + 11. Verify that the inconsistency was fixed. + + Jira ID: DAOS-12289 + + :avocado: tags=all,pr + :avocado: tags=vm + :avocado: tags=recovery,container_cleanup + :avocado: tags=ContainerCleanupTest,test_container_label_inconsistency + """ + # 1. Create a pool and a container. + self.log_step("Create a pool and a container") + pool = self.get_pool(connect=False) + container = self.get_container(pool=pool) + + # 2. Inject fault to cause container label inconsistency. + self.log_step("Inject fault to cause container label inconsistency.") + daos_command = self.get_daos_command() + daos_command.faults_container( + pool=pool.identifier, cont=container.identifier, + location="DAOS_CHK_CONT_BAD_LABEL") + + # 3. Try to access property using the original label in CS. + self.log_step("Try to access property using the original label in CS.") + try: + _ = daos_command.container_get_prop( + pool=pool.identifier, cont=container.identifier) + except CommandFailure: + pass + else: + self.fail("Label inconsistency fault wasn't injected property!") + + # 4. Show that PS has the original label by using "new-label", which was injected + # by the fault injection tool. + cont_prop = daos_command.container_get_prop( + pool=pool.identifier, cont="new-label", properties=["label"]) + ps_label = cont_prop["response"][0]["value"] + errors = [] + if ps_label != container.identifier: + msg = (f"Unexpected label in PS before repair! Expected = " + f"{container.identifier}; Actual = {ps_label}") + errors.append(msg) + + # 5. Enable the checker. + self.log_step("Enable the checker.") + dmg_command = self.get_dmg_command() + dmg_command.check_enable() + + # 6. Set policy to --all-interactive. + self.log_step("Set policy to --all-interactive.") + dmg_command.check_set_policy(all_interactive=True) + + # 7. Start the checker and query the checker until the fault is detected. + self.log_step("Start and query the checker until the fault is detected.") + seq_num = None + # Start checker. + dmg_command.check_start() + # Query the checker until expected number of inconsistencies are repaired. + for _ in range(8): + check_query_out = dmg_command.check_query() + # Status is INIT before starting the checker. + if check_query_out["response"]["status"] == "RUNNING" and\ + check_query_out["response"]["reports"]: + seq_num = check_query_out["response"]["reports"][0]["seq"] + break + time.sleep(5) + if not seq_num: + self.fail("Checker didn't detect any fault!") + + # 8. Repair by selecting "Trust the container label in container property." + self.log_step( + 'Repair by selecting "Trust the container label in container property."') + dmg_command.check_repair(seq_num=seq_num, action=2) + + # 9. Query the checker until the fault is repaired. + self.log_step("Query the checker until the fault is repaired.") + repair_report = self.wait_for_check_complete()[0] + + # Verify that the repair report has expected message "Update the CS label". + action_message = repair_report["act_msgs"][0] + exp_msg = "Update the CS label" + if exp_msg not in action_message: + errors.append(f"{exp_msg} not in {action_message}!") + + # 10. Disable the checker. + self.log_step("Disable the checker.") + dmg_command.check_disable() + + # 11. Verify that the inconsistency was fixed. + self.log_step("Verify that the inconsistency was fixed.") + cont_prop = daos_command.container_get_prop( + pool=pool.identifier, cont=container.identifier, properties=["label"]) + ps_label = cont_prop["response"][0]["value"] + if ps_label != container.identifier: + msg = (f"Unexpected label in PS after repair! Expected = " + f"{container.identifier}; Actual = {ps_label}") + errors.append(msg) + + report_errors(test=self, errors=errors) diff --git a/src/tests/ftest/recovery/container_cleanup.yaml b/src/tests/ftest/recovery/container_cleanup.yaml new file mode 100644 index 00000000000..aa12f2885bb --- /dev/null +++ b/src/tests/ftest/recovery/container_cleanup.yaml @@ -0,0 +1,24 @@ +hosts: + test_servers: 1 + +timeout: 360 + +server_config: + name: daos_server + engines_per_host: 1 + engines: + 0: + targets: 4 + nr_xs_helpers: 0 + storage: + 0: + class: ram + scm_mount: /mnt/daos + system_ram_reserved: 1 + +pool: + size: 5G + +container: + type: POSIX + control_method: daos diff --git a/src/tests/ftest/recovery/container_list_consolidation.py b/src/tests/ftest/recovery/container_list_consolidation.py new file mode 100644 index 00000000000..ea5a3605257 --- /dev/null +++ b/src/tests/ftest/recovery/container_list_consolidation.py @@ -0,0 +1,156 @@ +""" + (C) Copyright 2023 Intel Corporation. + + SPDX-License-Identifier: BSD-2-Clause-Patent +""" +import re +import time + +from ClusterShell.NodeSet import NodeSet +from ddb_utils import DdbCommand +from exception_utils import CommandFailure +from general_utils import report_errors +from recovery_test_base import RecoveryTestBase + + +class ContainerListConsolidationTest(RecoveryTestBase): + """Test Pass 4: Container List Consolidation + + :avocado: recursive + """ + + def test_orphan_container(self): + """Test orphan container. Container is in shard, but not in PS. + + 1. Create a pool and a container. + 2. Inject fault to cause orphan container. i.e., container is left in the system, + but doesn't appear with daos commands. + 3. Check that the container doesn't appear with daos command. + 4. Stop servers. + 5. Use ddb to verify that the container is left in shards. + 6. Enable the checker. + 7. Set policy to --all-interactive. + 8. Start the checker and query the checker until the fault is detected. + 9. Repair by selecting the destroy option. + 10. Query the checker until the fault is repaired. + 11. Disable the checker. + 12. Run the ddb command and verify that the container is removed from shard. + + Jira ID: DAOS-12287 + + :avocado: tags=all,pr + :avocado: tags=vm + :avocado: tags=recovery,container_list_consolidation + :avocado: tags=ContainerListConsolidationTest,test_orphan_container + """ + # 1. Create a pool and a container. + self.log_step("Create a pool and a container") + pool = self.get_pool(connect=False) + container = self.get_container(pool=pool) + + # 2. Inject fault to cause orphan container. + self.log_step("Inject fault to cause orphan container.") + daos_command = self.get_daos_command() + daos_command.faults_container( + pool=pool.identifier, cont=container.identifier, + location="DAOS_CHK_CONT_ORPHAN") + + # 3. Check that the container doesn't appear with daos command. + self.log_step("Check that the container doesn't appear with daos command.") + pool_list = daos_command.pool_list_containers(pool=pool.identifier) + errors = [] + if pool_list["response"]: + errors.append(f"Container appears with daos command! {pool_list}") + + # 4. Stop servers. + self.log_step("Stop servers.") + dmg_command = self.get_dmg_command() + dmg_command.system_stop() + + # 5. Use ddb to verify that the container is left in shards. + self.log_step("Use ddb to verify that the container is left in shards.") + scm_mount = self.server_managers[0].get_config_value("scm_mount") + ddb_command = DdbCommand( + server_host=NodeSet(self.hostlist_servers[0]), path=self.bin, + mount_point=scm_mount, pool_uuid=pool.uuid, + vos_file=self.get_vos_file_path(pool=pool)) + cmd_result = ddb_command.list_component() + ls_out = "\n".join(cmd_result[0]["stdout"]) + uuid_regex = r"([0-f]{8}-[0-f]{4}-[0-f]{4}-[0-f]{4}-[0-f]{12})" + match = re.search(uuid_regex, ls_out) + if match is None: + self.fail("Unexpected output from ddb command, unable to parse.") + self.log.info("Container UUID from ddb ls = %s", match.group(1)) + + # UUID if found. Verify that it's the container UUID of the container we created. + actual_uuid = match.group(1) + expected_uuid = container.uuid.lower() + if actual_uuid != expected_uuid: + msg = "Unexpected container UUID! Expected = {}; Actual = {}".format( + expected_uuid, actual_uuid) + errors.append(msg) + + # 6. Enable the checker. + self.log_step("Enable the checker.") + dmg_command.check_enable(stop=False) + + # 7. Set policy to --all-interactive. + self.log_step("Set policy to --all-interactive.") + dmg_command.check_set_policy(all_interactive=True) + + # 8. Start the checker and query the checker until the fault is detected. + self.log_step("Start and query the checker until the fault is detected.") + seq_num = None + # Start checker. + dmg_command.check_start() + # Query the checker until expected number of inconsistencies are repaired. + for _ in range(8): + check_query_out = dmg_command.check_query() + # Status is INIT before starting the checker. + if check_query_out["response"]["status"] == "RUNNING" and\ + check_query_out["response"]["reports"]: + seq_num = check_query_out["response"]["reports"][0]["seq"] + break + time.sleep(5) + if not seq_num: + self.fail("Checker didn't detect any fault!") + + # 9. Repair by selecting the destroy option, 0. + msg = ("Repair with option 0; Destroy the orphan container to release space " + "[suggested].") + self.log_step(msg) + dmg_command.check_repair(seq_num=seq_num, action=0) + + # 10. Query the checker until the fault is repaired. + self.log_step("Query the checker until the fault is repaired.") + repair_report = self.wait_for_check_complete()[0] + + # Verify that the repair report has expected message "Discard the container". + action_message = repair_report["act_msgs"][0] + exp_msg = "Discard the container" + errors = [] + if exp_msg not in action_message: + errors.append(f"{exp_msg} not in {action_message}!") + + # 11. Disable the checker. + self.log_step("Disable the checker.") + dmg_command.check_disable(start=False) + + # 12. Run the ddb command and verify that the container is removed from shard. + self.log_step( + "Run the ddb command and verify that the container is removed from shard.") + cmd_result = ddb_command.list_component() + ls_out = "\n".join(cmd_result[0]["stdout"]) + uuid_regex = r"([0-f]{8}-[0-f]{4}-[0-f]{4}-[0-f]{4}-[0-f]{12})" + match = re.search(uuid_regex, ls_out) + if match: + errors.append("Container UUID is found in shard! Checker didn't remove it.") + + # Start server to prepare for the cleanup. + try: + dmg_command.system_start() + except CommandFailure as error: + # Handle the potential system start error just in case. + self.log.error(error) + finally: + report_errors(test=self, errors=errors) diff --git a/src/tests/ftest/recovery/container_list_consolidation.yaml b/src/tests/ftest/recovery/container_list_consolidation.yaml new file mode 100644 index 00000000000..aa12f2885bb --- /dev/null +++ b/src/tests/ftest/recovery/container_list_consolidation.yaml @@ -0,0 +1,24 @@ +hosts: + test_servers: 1 + +timeout: 360 + +server_config: + name: daos_server + engines_per_host: 1 + engines: + 0: + targets: 4 + nr_xs_helpers: 0 + storage: + 0: + class: ram + scm_mount: /mnt/daos + system_ram_reserved: 1 + +pool: + size: 5G + +container: + type: POSIX + control_method: daos diff --git a/src/tests/ftest/recovery/ddb.py b/src/tests/ftest/recovery/ddb.py new file mode 100644 index 00000000000..784d13b70a4 --- /dev/null +++ b/src/tests/ftest/recovery/ddb.py @@ -0,0 +1,642 @@ +""" + (C) Copyright 2022-2024 Intel Corporation. + + SPDX-License-Identifier: BSD-2-Clause-Patent +""" +import ctypes +import os +import re + +from ClusterShell.NodeSet import NodeSet +from ddb_utils import DdbCommand +from exception_utils import CommandFailure +from general_utils import (DaosTestError, create_string_buffer, distribute_files, + get_clush_command, get_random_string, report_errors, run_command) +from pydaos.raw import DaosObjClass, IORequest +from recovery_test_base import RecoveryTestBase + + +def insert_objects(context, container, object_count, dkey_count, akey_count, base_dkey, + base_akey, base_data): + """Insert objects, dkeys, akeys, and data into the container. + + Args: + context (DaosContext): + container (TestContainer): Container to insert objects. + object_count (int): Number of objects to insert. + dkey_count (int): Number of dkeys to insert. + akey_count (int): Number of akeys to insert. + base_dkey (str): Base dkey. Index numbers will be appended to it. + base_akey (str):Base akey. Index numbers will be appended to it. + base_data (str):Base data that goes inside akey. Index numbers will be appended + to it. + + Returns: + tuple: Inserted objects, dkeys, akeys, and data as (ioreqs, dkeys, akeys, + data_list) + + """ + ioreqs = [] + dkeys = [] + akeys = [] + data_list = [] + + container.open() + + for obj_index in range(object_count): + # Insert object. + ioreqs.append(IORequest( + context=context, container=container.container, obj=None, + objtype=DaosObjClass.OC_S1)) + + for dkey_index in range(dkey_count): + # Prepare the dkey to insert into the object. + dkey_str = " ".join( + [base_dkey, str(obj_index), str(dkey_index)]).encode("utf-8") + dkeys.append(create_string_buffer(value=dkey_str, size=len(dkey_str))) + + for akey_index in range(akey_count): + # Prepare the akey to insert into the dkey. + akey_str = " ".join( + [base_akey, str(obj_index), str(dkey_index), + str(akey_index)]).encode("utf-8") + akeys.append(create_string_buffer(value=akey_str, size=len(akey_str))) + + # Prepare the data to insert into the akey. + data_str = " ".join( + [base_data, str(obj_index), str(dkey_index), + str(akey_index)]).encode("utf-8") + data_list.append(create_string_buffer(value=data_str, size=len(data_str))) + c_size = ctypes.c_size_t(ctypes.sizeof(data_list[-1])) + + # Insert dkeys, akeys, and the data. + ioreqs[-1].single_insert( + dkey=dkeys[-1], akey=akeys[-1], value=data_list[-1], size=c_size) + + return (ioreqs, dkeys, akeys, data_list) + + +def copy_remote_to_local(remote_file_path, test_dir, remote): + """Copy the given file from the server node to the local test node and retrieve + the original name. + + Args: + remote_file_path (str): File path to copy to local. + test_dir (str): Test directory. Usually self.test_dir. + remote (str): Remote hostname to copy file from. + """ + # Use clush --rcopy to copy the file from the remote server node to the local test + # node. clush will append . to the file when copying. + args = "--rcopy {} --dest {}".format(remote_file_path, test_dir) + clush_command = get_clush_command(hosts=remote, args=args) + try: + run_command(command=clush_command) + except DaosTestError as error: + print("ERROR: Copying {} from {}: {}".format(remote_file_path, remote, error)) + raise error + + # Remove the appended . from the copied file. + current_file_path = "".join([remote_file_path, ".", remote]) + mv_command = "mv {} {}".format(current_file_path, remote_file_path) + try: + run_command(command=mv_command) + except DaosTestError as error: + print( + "ERROR: Moving {} to {}: {}".format( + current_file_path, remote_file_path, error)) + raise error + + +class DdbTest(RecoveryTestBase): + """Test ddb subcommands. + + :avocado: recursive + """ + + def __init__(self, *args, **kwargs): + """Initialize a DdbTest object.""" + super().__init__(*args, **kwargs) + # how many objects and keys to insert/expect + self.object_count = 5 + self.dkey_count = 2 + self.akey_count = 1 + # Generate random keys and data to insert into the object. + self.random_dkey = get_random_string(10) + self.random_akey = get_random_string(10) + self.random_data = get_random_string(10) + + def test_recovery_ddb_ls(self): + """Test ddb ls. + + 1. Verify container UUID. + 2. Verify object count in the container. + 3. Verify there are two dkeys for every object. Also verify the dkey string and + the size. + 4. Verify there is one akey for every dkey. Also verify the key string and the + size. + 5. Restart the server for the cleanup. + 6. Reset the container and the pool to prepare for the cleanup. + + :avocado: tags=all,full_regression + :avocado: tags=vm + :avocado: tags=recovery + :avocado: tags=DdbTest,ddb_cmd,test_recovery_ddb_ls + """ + # Create a pool and a container. + self.add_pool() + self.add_container(pool=self.pool) + + # Find the vos file name. e.g., /mnt/daos0//vos-0. + scm_mount = self.server_managers[0].get_config_value("scm_mount") + ddb_command = DdbCommand( + server_host=NodeSet(self.hostlist_servers[0]), path=self.bin, + mount_point=scm_mount, pool_uuid=self.pool.uuid, + vos_file=self.get_vos_file_path(pool=self.pool)) + + errors = [] + + object_count = self.object_count + dkey_count = self.dkey_count + akey_count = self.akey_count + # Insert objects with API. + insert_objects( + context=self.context, container=self.container, object_count=object_count, + dkey_count=dkey_count, akey_count=akey_count, base_dkey=self.random_dkey, + base_akey=self.random_akey, base_data=self.random_data) + + # Need to stop the server to use ddb. + self.get_dmg_command().system_stop() + + # 1. Verify container UUID. + cmd_result = ddb_command.list_component() + # Sample output. + # Listing contents of '/' + # CONT: (/[0]) /3082b7d3-32f9-41ea-bcbf-5d6450c1b34f + # stdout is a list which contains each line as separate element. Concatenate them + # to single string so that we can apply regex. + ls_out = "\n".join(cmd_result[0]["stdout"]) + # Matches the container uuid + uuid_regex = r"([0-f]{8}-[0-f]{4}-[0-f]{4}-[0-f]{4}-[0-f]{12})" + match = re.search(uuid_regex, ls_out) + if match is None: + self.fail("Unexpected output from ddb command, unable to parse.") + self.log.info("Container UUID from ddb ls = %s", match.group(1)) + + actual_uuid = match.group(1).lower() + expected_uuid = self.container.uuid.lower() + if actual_uuid != expected_uuid: + msg = "Unexpected container UUID! Expected = {}; Actual = {}".format( + expected_uuid, actual_uuid) + errors.append(msg) + + # 2. Verify object count in the container. + cmd_result = ddb_command.list_component(component_path="[0]") + # Sample output. + # Listing contents of 'CONT: (/[0]) /3082b7d3-32f9-41ea-bcbf-5d6450c1b34f' + # OBJ: (/[0]/[0]) /3082b7d3-32f9-41ea-bcbf-5d6450c1b34f/937030214649643008.1.0.1 + # OBJ: (/[0]/[1]) /3082b7d3-32f9-41ea-bcbf-5d6450c1b34f/937030214649643009.1.0.1 + # OBJ: (/[0]/[2]) /3082b7d3-32f9-41ea-bcbf-5d6450c1b34f/937030214649643016.1.0.1 + ls_out = "\n".join(cmd_result[0]["stdout"]) + # Matches an object id. (4 digits separated by a period '.') + object_id_regex = r"\d+\.\d+\.\d+\.\d+" + match = re.findall(object_id_regex, ls_out) + self.log.info("List objects match = %s", match) + + actual_object_count = len(match) + if actual_object_count != object_count: + errors.append( + "Unexpected object count! Expected = {}; Actual = {}".format( + object_count, actual_object_count)) + + # 3. Verify there are two dkeys for every object. Also verify the dkey string and + # the size. + dkey_regex = f"/{uuid_regex}/{object_id_regex}/(.*)" + actual_dkey_count = 0 + for obj_index in range(object_count): + component_path = "[0]/[{}]".format(obj_index) + cmd_result = ddb_command.list_component(component_path=component_path) + ls_out = "\n".join(cmd_result[0]["stdout"]) + # Sample output. + # /d4e0c836-17bd-4df3-b255-929732486bab/281479271677953.0.0/ + # [0] 'Sample dkey 0 0' (15) + # [1] 'Sample dkey 0 1' (15) + match = re.findall(dkey_regex, ls_out) + + actual_dkey_count += len(match) + + # Verify dkey string. + for idx in range(self.dkey_count): + actual_dkey = match[idx][1] + if self.random_dkey not in actual_dkey: + msg = ("Unexpected dkey! obj_i = {}. Expected = {}; " + "Actual = {}").format(obj_index, self.random_dkey, actual_dkey) + errors.append(msg) + + # Verify there are two dkeys for every object. + expected_dkey_count = object_count * dkey_count + if actual_dkey_count != expected_dkey_count: + msg = "Unexpected number of dkeys! Expected = {}; Actual = {}".format( + expected_dkey_count, actual_dkey_count) + errors.append(msg) + + # 4. Verify there is one akey for every dkey. Also verify the key string and the + # size. + akey_count = 0 + for obj_index in range(object_count): + for dkey_index in range(dkey_count): + component_path = "[0]/[{}]/[{}]".format(obj_index, dkey_index) + cmd_result = ddb_command.list_component(component_path=component_path) + ls_out = "\n".join(cmd_result[0]["stdout"]) + msg = "List akeys obj_index = {}, dkey_index = {}, stdout = {}".format( + obj_index, dkey_index, ls_out) + self.log.info(msg) + # Output is in the same format as dkey, so use the same regex. + # /d4e0c836-17bd-4df3-b255-929732486bab/281479271677954.0.0/' + # Sample dkey 1 0'/ + # [0] 'Sample akey 1 0 0' (17) + match = re.findall(f"{dkey_regex}/(.*)", ls_out) + + akey_count += len(match) + + # Verify akey string. As in dkey, ignore the numbers at the end. + actual_akey = match[0][2] + if self.random_akey not in actual_akey: + msg = ("Unexpected akey! obj_index = {}; dkey_index = {}; " + "Expected = {}; Actual = {}").format( + obj_index, dkey_index, self.random_akey, actual_akey) + errors.append(msg) + + # Verify there is one akey for every dkey. + if expected_dkey_count != akey_count: + msg = "Unexpected number of akeys! Expected = {}; Actual = {}".format( + expected_dkey_count, akey_count) + errors.append(msg) + + # 5. Restart the server for the cleanup. + self.get_dmg_command().system_start() + + # 6. Reset the container and the pool to prepare for the cleanup. + self.container.close() + self.pool.disconnect() + self.pool.connect() + self.container.open() + self.get_dmg_command().system_start() + + self.log.info("##### Errors #####") + report_errors(test=self, errors=errors) + self.log.info("##################") + + def test_recovery_ddb_rm(self): + """Test rm. + + 1. Create a pool and a container. Insert objects, dkeys, and akeys. + 2. Stop the server to use ddb. + 3. Find the vos file name. e.g., /mnt/daos0//vos-0. + 4. Call ddb rm to remove the akey. + 5. Restart the server to use the API. + 6. Reset the object, container, and pool to use the API after server restart. + 7. Call list_akey() in pydaos API to verify that the akey was removed. + 8. Stop the server to use ddb. + 9. Call ddb rm to remove the dkey. + 10. Restart the server to use the API. + 11. Reset the object, container, and pool to use the API after server restart. + 12. Call list_dkey() in pydaos API to verify that the dkey was removed. + 13. Stop the server to use ddb. + 14. Call ddb rm to remove the object. + 15. Restart the server to use daos command. + 16. Reset the container and pool so that cleanup works. + 17. Call "daos container list-objects " to verify that the + object was removed. + + :avocado: tags=all,full_regression + :avocado: tags=vm + :avocado: tags=recovery + :avocado: tags=DdbTest,ddb_cmd,test_recovery_ddb_rm + """ + # 1. Create a pool and a container. Insert objects, dkeys, and akeys. + self.add_pool(connect=True) + self.add_container(pool=self.pool) + + # Insert one object with one dkey and one akey with API. + obj_dataset = insert_objects( + context=self.context, container=self.container, object_count=1, + dkey_count=1, akey_count=2, base_dkey=self.random_dkey, + base_akey=self.random_akey, base_data=self.random_data) + ioreqs = obj_dataset[0] + dkeys_inserted = obj_dataset[1] + akeys_inserted = obj_dataset[2] + + # For debugging/reference, check that the dkey and the akey we just inserted are + # returned from the API. + akeys_api = ioreqs[0].list_akey(dkey=dkeys_inserted[0]) + self.log.info("akeys from API (before) = %s", akeys_api) + dkeys_api = ioreqs[0].list_dkey() + self.log.info("dkeys from API (before) = %s", dkeys_api) + + # For debugging/reference, check that the object was inserted using daos command. + list_obj_out = self.get_daos_command().container_list_objects( + pool=self.pool.identifier, cont=self.container.uuid) + self.log.info("Object list (before) = %s", list_obj_out["response"]) + + # 2. Need to stop the server to use ddb. + dmg_command = self.get_dmg_command() + dmg_command.system_stop() + + # 3. Find the vos file name. + vos_file = self.get_vos_file_path(pool=self.pool) + host = NodeSet(self.hostlist_servers[0]) + scm_mount = self.server_managers[0].get_config_value("scm_mount") + ddb_command = DdbCommand( + server_host=host, path=self.bin, mount_point=scm_mount, + pool_uuid=self.pool.uuid, vos_file=vos_file) + + # 4. Call ddb rm to remove the akey. + cmd_result = ddb_command.remove_component(component_path="[0]/[0]/[0]/[0]") + self.log.info("rm akey stdout = %s", cmd_result[0]["stdout"]) + + # 5. Restart the server to use the API. + dmg_command.system_start() + + # 6. Reset the object, container, and pool to use the API after server restart. + ioreqs[0].obj.close() + self.container.close() + self.pool.disconnect() + self.pool.connect() + self.container.open() + ioreqs[0].obj.open() + + # 7. Call list_akey() in pydaos API to verify that the akey was removed. + akeys_api = ioreqs[0].list_akey(dkey=dkeys_inserted[0]) + self.log.info("akeys from API (after) = %s", akeys_api) + + errors = [] + expected_len = len(akeys_inserted) - 1 + actual_len = len(akeys_api) + if actual_len != expected_len: + msg = ("Unexpected number of akeys after ddb rm! " + "Expected = {}; Actual = {}").format(expected_len, actual_len) + errors.append(msg) + + # 8. Stop the server to use ddb. + dmg_command.system_stop() + + # 9. Call ddb rm to remove the dkey. + cmd_result = ddb_command.remove_component(component_path="[0]/[0]/[0]") + self.log.info("rm dkey stdout = %s", cmd_result[0]["stdout"]) + + # 10. Restart the server to use the API. + dmg_command.system_start() + + # 11. Reset the object, container, and pool to use the API after server restart. + ioreqs[0].obj.close() + self.container.close() + self.pool.disconnect() + self.pool.connect() + self.container.open() + ioreqs[0].obj.open() + + # 12. Call list_dkey() in pydaos API to verify that the dkey was removed. + dkeys_api = ioreqs[0].list_dkey() + self.log.info("dkeys from API (after) = %s", dkeys_api) + + expected_len = len(dkeys_inserted) - 1 + actual_len = len(dkeys_api) + if actual_len != expected_len: + msg = ("Unexpected number of dkeys after ddb rm! " + "Expected = {}; Actual = {}").format(expected_len, actual_len) + errors.append(msg) + + # 13. Stop the server to use ddb. + dmg_command.system_stop() + + # 14. Call ddb rm to remove the object. + cmd_result = ddb_command.remove_component(component_path="[0]/[0]") + self.log.info("rm object stdout = %s", cmd_result[0]["stdout"]) + + # 15. Restart the server to use daos command. + dmg_command.system_start() + + # 16. Reset the container and pool so that cleanup works. + self.container.close() + self.pool.disconnect() + self.pool.connect() + self.container.open() + + # 17. Call "daos container list-objects " to verify that + # the object was removed. + list_obj_out = self.get_daos_command().container_list_objects( + pool=self.pool.identifier, cont=self.container.uuid) + obj_list = list_obj_out["response"] + self.log.info("Object list (after) = %s", obj_list) + + expected_len = len(ioreqs) - 1 + if obj_list: + actual_len = len(obj_list) + else: + actual_len = 0 + if actual_len != expected_len: + msg = ("Unexpected number of objects after ddb rm! " + "Expected = {}; Actual = {}").format(expected_len, actual_len) + errors.append(msg) + + self.log.info("##### Errors #####") + report_errors(test=self, errors=errors) + self.log.info("##################") + + def test_recovery_ddb_load(self): + """Test ddb value_load. + + 1. Create a pool and a container. + 2. Insert one object with one dkey with the API. + 3. Stop the server to use ddb. + 4. Find the vos file name. e.g., /mnt/daos0//vos-0. + 5. Load new data into [0]/[0]/[0]/[0] + 6. Restart the server. + 7. Reset the object, container, and pool to use the API. + 8. Verify the data in the akey with single_fetch(). + + :avocado: tags=all,full_regression + :avocado: tags=vm + :avocado: tags=recovery + :avocado: tags=DdbTest,ddb_cmd,test_recovery_ddb_load + """ + # 1. Create a pool and a container. + self.add_pool(connect=True) + self.add_container(pool=self.pool) + + # 2. Insert one object with one dkey with API. + obj_dataset = insert_objects( + context=self.context, container=self.container, object_count=1, + dkey_count=1, akey_count=1, base_dkey=self.random_dkey, + base_akey=self.random_akey, base_data=self.random_data) + ioreqs = obj_dataset[0] + dkeys_inserted = obj_dataset[1] + akeys_inserted = obj_dataset[2] + data_list = obj_dataset[3] + + # For debugging/reference, call single_fetch and get the data just inserted. + # Pass in size + 1 to single_fetch to avoid the no-space error. + data_size = len(data_list[0]) + 1 + data = ioreqs[0].single_fetch( + dkey=dkeys_inserted[0], akey=akeys_inserted[0], size=data_size) + self.log.info("data (before) = %s", data.value.decode('utf-8')) + + # 3. Stop the server to use ddb. + dmg_command = self.get_dmg_command() + dmg_command.system_stop() + + # 4. Find the vos file name. + vos_file = self.get_vos_file_path(pool=self.pool) + host = NodeSet(self.hostlist_servers[0]) + scm_mount = self.server_managers[0].get_config_value("scm_mount") + ddb_command = DdbCommand( + server_host=host, path=self.bin, mount_point=scm_mount, + pool_uuid=self.pool.uuid, vos_file=vos_file) + + # 5. Load new data into [0]/[0]/[0]/[0] + # Create a file in test node. + load_file_path = os.path.join(self.test_dir, "new_data.txt") + new_data = "New akey data 0123456789" + with open(load_file_path, "w") as file: + file.write(new_data) + + # Copy the created file to server node. + try: + distribute_files( + hosts=host, source=load_file_path, destination=load_file_path, + mkdir=False) + except DaosTestError as error: + raise CommandFailure( + "ERROR: Copying new_data.txt to {0}: {1}".format(host, error)) \ + from error + + # The file with the new data is ready. Run ddb load. + ddb_command.value_load(component_path="[0]/[0]/[0]/[0]", load_file_path=load_file_path) + + # 6. Restart the server. + dmg_command.system_start() + + # 7. Reset the object, container, and pool to use the API after server restart. + ioreqs[0].obj.close() + self.container.close() + self.pool.disconnect() + self.pool.connect() + self.container.open() + ioreqs[0].obj.open() + + # 8. Verify the data in the akey with single_fetch(). + data_size = len(new_data) + 1 + data = ioreqs[0].single_fetch( + dkey=dkeys_inserted[0], akey=akeys_inserted[0], size=data_size) + actual_data = data.value.decode('utf-8') + self.log.info("data (after) = %s", actual_data) + + errors = [] + if new_data != actual_data: + msg = "ddb load failed! Expected = {}; Actual = {}".format( + new_data, actual_data) + errors.append(msg) + + self.log.info("##### Errors #####") + report_errors(test=self, errors=errors) + self.log.info("##################") + + def test_recovery_ddb_dump_value(self): + """Test ddb dump_value. + + 1. Create a pool and a container. + 2. Insert one object with one dkey with API. + 3. Stop the server to use ddb. + 4. Find the vos file name. e.g., /mnt/daos0//vos-0. + 5. Dump the two akeys to files. + 6. Verify the content of the files. + 7. Restart the server for the cleanup. + 8. Reset the object, container, and pool to prepare for the cleanup. + + :avocado: tags=all,full_regression + :avocado: tags=vm + :avocado: tags=recovery + :avocado: tags=DdbTest,ddb_cmd,test_recovery_ddb_dump_value + """ + # 1. Create a pool and a container. + self.add_pool(connect=True) + self.add_container(pool=self.pool) + + # 2. Insert one object with one dkey with API. + obj_dataset = insert_objects( + context=self.context, container=self.container, object_count=1, + dkey_count=1, akey_count=2, base_dkey=self.random_dkey, + base_akey=self.random_akey, base_data=self.random_data) + ioreqs = obj_dataset[0] + data_list = obj_dataset[3] + + # 3. Stop the server to use ddb. + dmg_command = self.get_dmg_command() + dmg_command.system_stop() + + # 4. Find the vos file name. + vos_file = self.get_vos_file_path(pool=self.pool) + host = NodeSet(self.hostlist_servers[0]) + scm_mount = self.server_managers[0].get_config_value("scm_mount") + ddb_command = DdbCommand( + server_host=host, path=self.bin, mount_point=scm_mount, + pool_uuid=self.pool.uuid, vos_file=vos_file) + + # 5. Dump the two akeys to files. + akey1_file_path = os.path.join(self.test_dir, "akey1.txt") + ddb_command.value_dump( + component_path="[0]/[0]/[0]/[0]", out_file_path=akey1_file_path) + akey2_file_path = os.path.join(self.test_dir, "akey2.txt") + ddb_command.value_dump( + component_path="[0]/[0]/[0]/[1]", out_file_path=akey2_file_path) + + # Copy them from remote server node to local test node. + copy_remote_to_local( + remote_file_path=akey1_file_path, test_dir=self.test_dir, + remote=self.hostlist_servers[0]) + copy_remote_to_local( + remote_file_path=akey2_file_path, test_dir=self.test_dir, + remote=self.hostlist_servers[0]) + + # 6. Verify the content of the files. + actual_akey1_data = None + with open(akey1_file_path, "r") as file: + actual_akey1_data = file.readlines()[0] + actual_akey2_data = None + with open(akey2_file_path, "r") as file: + actual_akey2_data = file.readlines()[0] + + errors = [] + str_data_list = [] + # Convert the data to string. + for data in data_list: + str_data_list.append(data.value.decode("utf-8")) + # Verify that we were able to obtain the data and akey1 and akey2 aren't the same. + if actual_akey1_data is None or actual_akey2_data is None or \ + actual_akey1_data == actual_akey2_data: + msg = ("Invalid dumped value! Dumped akey1 data = {}; " + "Dumped akey2 data = {}").format(actual_akey1_data, actual_akey2_data) + errors.append(msg) + # Verify that the data we obtained with ddb are the ones we wrote. The order isn't + # deterministic, so check with "in". + if actual_akey1_data not in str_data_list or \ + actual_akey2_data not in str_data_list: + msg = ("Unexpected dumped value! Dumped akey data 1 = {}; " + "Dumped akey data 2 = {}; Expected data list = {}").format( + actual_akey1_data, actual_akey2_data, str_data_list) + errors.append(msg) + + # 7. Restart the server for the cleanup. + dmg_command.system_start() + + # 8. Reset the object, container, and pool to prepare for the cleanup. + ioreqs[0].obj.close() + self.container.close() + self.pool.disconnect() + self.pool.connect() + self.container.open() + ioreqs[0].obj.open() + + self.log.info("##### Errors #####") + report_errors(test=self, errors=errors) + self.log.info("##################") diff --git a/src/tests/ftest/recovery/ddb.yaml b/src/tests/ftest/recovery/ddb.yaml new file mode 100644 index 00000000000..c794b7928c5 --- /dev/null +++ b/src/tests/ftest/recovery/ddb.yaml @@ -0,0 +1,27 @@ +hosts: + test_servers: 1 + +timeout: 1800 + +server_config: + name: daos_server + engines_per_host: 1 + engines: + 0: + targets: 1 + storage: auto + system_ram_reserved: 1 + +# In CI, all tests in ddb.py are ran in a single launch.py execution. In that case, the +# test_dir (/var/tmp/daos_testing/) in the server node will not be created +# for each test if "start_servers_once: False" isn't set. test_load() needs this +# directory, so we need to set it. +setup: + start_servers_once: False + +pool: + control_method: dmg + scm_size: 1G + +container: + control_method: API diff --git a/src/tests/ftest/recovery/ms_membership.py b/src/tests/ftest/recovery/ms_membership.py new file mode 100644 index 00000000000..f0fdb7652fe --- /dev/null +++ b/src/tests/ftest/recovery/ms_membership.py @@ -0,0 +1,132 @@ +""" + (C) Copyright 2023 Intel Corporation. + + SPDX-License-Identifier: BSD-2-Clause-Patent +""" + +from apricot import TestWithServers +from exception_utils import CommandFailure +from general_utils import report_errors + + +class MSMembershipTest(TestWithServers): + """Test Pass 0: Management Service & Membership + + :avocado: recursive + """ + + def test_checker_on_admin_excluded(self): + """Test checker can only be run when the system status is AdminExcluded. + + 1. Call dmg check enable. + 2. Stop rank 1. + 3. Call dmg system query and check that status of at least one rank is not + "checkerstarted". We verify that the new checkerstarted state is properly changed. + 4. Call dmg check start. It should show error because the stopped rank is not at + CheckerStarted state. + 5. Call dmg check query. It should show error because the stopped rank is not at + CheckerStarted state. + 6. Call dmg check disable. It should work. + 7. Restart the stopped rank for cleanup. + + Jira ID: DAOS-11703 + + :avocado: tags=all,full_regression + :avocado: tags=vm + :avocado: tags=recovery,ms_membership + :avocado: tags=MSMembershipTest,test_checker_on_admin_excluded + """ + dmg_command = self.get_dmg_command() + + # 1. Call dmg check enable. + dmg_command.check_enable() + + # 2. Stop rank 1. + dmg_command.system_stop(ranks="1") + + # 3. Call dmg system query and check that status of at least one rank is not + # "checkerstarted". We verify that the new checkerstarted state is properly + # changed. + query_out = dmg_command.system_query() + not_checkerstarted_found = False + for member in query_out["response"]["members"]: + if member["state"] != "checkerstarted": + not_checkerstarted_found = True + break + if not not_checkerstarted_found: + # All rank's status is "checkerstarted". + self.fail("All rank's status is checkerstarted!") + + # 4. Call dmg check start. It should show error because the stopped rank is not at + # CheckerStarted state. + try: + dmg_command.check_start() + except CommandFailure as error: + self.log.info("dmg check start is expected to fail. Error: %s", error) + + # 5. Call dmg check query. It should show error because the stopped rank is not at + # CheckerStarted state. + try: + dmg_command.check_query() + except CommandFailure as error: + self.log.info("dmg check query is expected to fail. Error: %s", error) + + # 6. Call dmg check disable. It should work. + dmg_command.check_disable() + + # 7. Restart the stopped rank for cleanup. + self.log.info("Restart stopped rank for cleanup.") + dmg_command.system_start(ranks="1") + + def test_enable_disable_admin_excluded(self): + """Test dmg system exclude and clear-exclude. + + Test admin can enable and disable the rank state to AdminExcluded when the rank is + down. + + 1. Stop rank 1. + 2. Set rank 1 to AdminExcluded by calling dmg system exclude --ranks=1 and verify + the state has been changed. + 3. Verify that the checker can be run with AdminExcluded state by calling enable, + start, query, and disable. Verify that none of the commands returns error. + 4. Disable AdminExcluded of rank 1 by calling dmg system clear-exclude --ranks=1 + and verify the state has been changed. + 5. Servers haven't been started, so update the expected state of rank 0 for + cleanup. + + Jira ID: DAOS-11704 + + :avocado: tags=all,full_regression + :avocado: tags=vm + :avocado: tags=recovery,ms_membership + :avocado: tags=MSMembershipTest,test_enable_disable_admin_excluded + """ + errors = [] + dmg_command = self.get_dmg_command() + + # 1. Stop rank 1. + dmg_command.system_stop(ranks="1") + + # 2. Set rank 1 to AdminExcluded and verify the state has been changed. + self.server_managers[-1].system_exclude(ranks=[1]) + + # 3. Verify that the checker can be run with AdminExcluded state. + try: + dmg_command.check_enable() + dmg_command.check_start() + dmg_command.check_query() + # We need to start after calling dmg system clear-exclude, otherwise the start + # command will hang. + dmg_command.check_disable(start=False) + except CommandFailure as error: + msg = f"dmg check command failed! {error}" + errors.append(msg) + + # 4. Disable AdminExcluded of rank 1 and verify the state has been changed. + self.server_managers[-1].system_clear_exclude(ranks=[1]) + + # 5. Servers haven't been started, so update the expected state of rank 0 for + # cleanup. + self.server_managers[-1].update_expected_states(0, ['stopped']) + + report_errors(test=self, errors=errors) diff --git a/src/tests/ftest/recovery/ms_membership.yaml b/src/tests/ftest/recovery/ms_membership.yaml new file mode 100644 index 00000000000..b698773d1bf --- /dev/null +++ b/src/tests/ftest/recovery/ms_membership.yaml @@ -0,0 +1,13 @@ +hosts: + test_servers: 2 + +timeout: 180 + +server_config: + name: daos_server + engines_per_host: 1 + engines: + 0: + targets: 1 + storage: auto + system_ram_reserved: 1 diff --git a/src/tests/ftest/recovery/pool_cleanup.py b/src/tests/ftest/recovery/pool_cleanup.py new file mode 100644 index 00000000000..c2b1effb73a --- /dev/null +++ b/src/tests/ftest/recovery/pool_cleanup.py @@ -0,0 +1,244 @@ +""" + (C) Copyright 2023 Intel Corporation. + + SPDX-License-Identifier: BSD-2-Clause-Patent +""" +import time + +from general_utils import report_errors +from recovery_test_base import RecoveryTestBase + + +class PoolCleanupTest(RecoveryTestBase): + """Test Pass 3: Pool Cleanup + + :avocado: recursive + """ + + def test_corrupt_label_ms(self): + """Test corrupt label in MS. + + 1. Create a pool. + 2. Mangle the label in the MS (Mangle the copy of the PS metadata exists in the + MS). + 3. Check that the label in MS is corrupted with -fault added. + 4. Check that the label in PS is not corrupted. + 5. Stop the servers and enable the checker. + 6. Set the policy to --all-interactive. + 7. Start the checker and query the checker until the fault is detected. + 8. Repair with option 1; Trust PS pool label. + 9. Query the checker until the fault is repaired. + 10. Call dmg check disable and restart the servers. + 11. Wait for all ranks to join. + 12. Verify that the corrupted label in MS is fixed. + + Jira ID: DAOS-11741 + + :avocado: tags=all,pr + :avocado: tags=vm + :avocado: tags=recovery,pool_cleanup + :avocado: tags=PoolCleanupTest,test_corrupt_label_ms + """ + # 1. Create a pool. + self.log_step("Create a pool") + pool = self.get_pool(connect=False) + + # 2. Mangle the label in the MS. + self.log_step("Mangle the label in the MS") + dmg_command = self.get_dmg_command() + dmg_command.faults_mgmt_svc_pool( + pool=pool.identifier, checker_report_class="CIC_POOL_BAD_LABEL") + + # 3. Check that the label in MS is corrupted with -fault added. + self.log_step("Check that the label in MS is corrupted with -fault added") + pool_labels = dmg_command.get_pool_list_labels() + errors = [] + expected_label = pool.identifier + "-fault" + if pool_labels[0] != expected_label: + msg = (f"Unexpected label in MS! Expected = {expected_label}; Actual = " + f"{pool_labels[0]}") + errors.append(msg) + + # 4. Check that the label in PS isn’t corrupted. + self.log_step("Check that the label in PS isn’t corrupted.") + # Add -fault to the label and use that to call dmg pool get-prop. + orig_identifier = pool.identifier + pool.label.update(orig_identifier + "-fault") + ps_label = pool.get_property(prop_name="label") + if ps_label != orig_identifier: + msg = (f"Unexpected label in PS! Expected = {orig_identifier}; Actual = " + f"{ps_label}") + errors.append(msg) + # Restore the label to the original. + pool.label.update(orig_identifier) + + # 5. Stop the servers and enable the checker. + self.log_step("Stop the servers and enable the checker.") + dmg_command.check_enable() + + # 6. Set the policy to --all-interactive. + self.log_step("Set the policy to --all-interactive.") + dmg_command.check_set_policy(all_interactive=True) + + # 7. Start the checker and query the checker until the fault is detected. + self.log_step("Start and query the checker until the fault is detected.") + seq_num = None + # Start checker. + dmg_command.check_start() + # Query the checker until expected number of inconsistencies are repaired. + for _ in range(8): + check_query_out = dmg_command.check_query() + # Status is INIT before starting the checker. + if check_query_out["response"]["status"] == "RUNNING": + seq_num = check_query_out["response"]["reports"][0]["seq"] + break + time.sleep(5) + if not seq_num: + self.fail("Checker didn't detect any fault!") + + # 8. Repair with option 1; Trust PS pool label. + self.log_step("Repair with option 1; Trust PS pool label (in JSON output).") + dmg_command.check_repair(seq_num=seq_num, action=1) + + # 9. Query the checker until the fault is repaired. + self.log_step("Query the checker until the fault is repaired.") + repair_report = self.wait_for_check_complete()[0] + + # Verify that the repair report has expected message "Update the MS label". + action_message = repair_report["act_msgs"][0] + exp_msg = "Update the MS label" + errors = [] + if exp_msg not in action_message: + errors.append(f"{exp_msg} not in {action_message}!") + + # 10. Call dmg check disable and restart the servers. + self.log_step("Call dmg check disable and restart the servers.") + dmg_command.check_disable() + + # 11. Wait for all ranks to join. + self.log_step("Wait for all ranks to join.") + rank_list = self.server_managers[0].get_host_ranks(hosts=self.hostlist_servers) + failed_ranks = self.server_managers[0].check_rank_state( + ranks=rank_list, valid_states="joined", max_checks=10) + if failed_ranks: + errors.append( + f"Following ranks didn't become joined after restart! {failed_ranks}") + + # 12. Verify that the corrupted label in MS is fixed. + self.log_step("Verify that the corrupted label in MS is fixed.") + pool_labels = dmg_command.get_pool_list_labels() + if pool_labels[0] != pool.identifier: + msg = (f"Label in MS hasn't been repaired! Expected = " + f"{pool.identifier.lower()}; Actual = {pool_labels[0]}") + errors.append(msg) + + report_errors(test=self, errors=errors) + + def test_corrupt_label_ps(self): + """Test corrupt label in PS. + + 1. Create a pool. + 2. Mangle the label in the PS metadata. + 3. Call dmg pool get-prop TestLabel_1 label and verify that the label value is + TestLabel_1-fault. + 4. Stop servers and enable the checker. + 5. Set checker policy to --all-interactive. + 6. Start the checker and query the checker until the fault is detected. + 7. Repair with option 0; Trust MS pool label. + 8. Query the checker until the fault is repaired. + 9. Disable the checker and restart servers. + 10. Wait for all ranks to join. + 11. Call dmg pool get-prop mkp1 label and verify that the original label is + restored. + + Jira ID: DAOS-11742 + + :avocado: tags=all,pr + :avocado: tags=vm + :avocado: tags=recovery,pool_cleanup + :avocado: tags=PoolCleanupTest,test_corrupt_label_ps + """ + # 1. Create a pool. + self.log_step("Create a pool") + pool = self.get_pool(connect=False) + + # 2. Mangle the label in the PS. + self.log_step("Mangle the label in the PS") + dmg_command = self.get_dmg_command() + dmg_command.faults_pool_svc( + pool=pool.identifier, checker_report_class="CIC_POOL_BAD_LABEL") + + # 3. Call dmg pool get-prop TestLabel_1 label and verify that the label value is + # TestLabel_1-fault. + self.log_step("Check that the label in PS is corrupted with -fault added") + ps_label = pool.get_property(prop_name="label") + errors = [] + expected_label = pool.identifier + "-fault" + if ps_label != expected_label: + msg = (f"Unexpected label in PS! Expected = {expected_label}; Actual = " + f"{ps_label}") + errors.append(msg) + + # 4. Stop the servers and enable the checker. + self.log_step("Stop the servers and enable the checker.") + dmg_command.check_enable() + + # 5. Set the policy to --all-interactive. + self.log_step("Set the policy to --all-interactive.") + dmg_command.check_set_policy(all_interactive=True) + + # 6. Start the checker and query the checker until the fault is detected. + self.log_step("Start and query the checker until the fault is detected.") + seq_num = None + # Start checker. + dmg_command.check_start() + # Query the checker until the label inconsistency is detected. + for _ in range(8): + check_query_out = dmg_command.check_query() + # Status is INIT before starting the checker. + if check_query_out["response"]["status"] == "RUNNING": + seq_num = check_query_out["response"]["reports"][0]["seq"] + break + time.sleep(5) + if not seq_num: + self.fail("Checker didn't detect any fault!") + + # 7. Repair with option 0; Trust MS pool label. + self.log_step("Repair with option 0; Trust MS pool label.") + dmg_command.check_repair(seq_num=seq_num, action=0) + + # 8. Query the checker until the fault is repaired. + self.log_step("Query the checker until the fault is repaired.") + repair_report = self.wait_for_check_complete()[0] + + # Verify that the repair report has expected message "Reset the pool property + # using the MS label". + action_message = repair_report["act_msgs"][0] + exp_msg = "Reset the pool property using the MS label" + errors = [] + if exp_msg not in action_message: + errors.append(f"{exp_msg} not in {action_message}!") + + # 9. Call dmg check disable and restart the servers. + self.log_step("Call dmg check disable and restart the servers.") + dmg_command.check_disable() + + # 10. Wait for all ranks to join. + self.log_step("Wait for all ranks to join.") + rank_list = self.server_managers[0].get_host_ranks(hosts=self.hostlist_servers) + failed_ranks = self.server_managers[0].check_rank_state( + ranks=rank_list, valid_states="joined", max_checks=10) + if failed_ranks: + errors.append( + f"Following ranks didn't become joined after restart! {failed_ranks}") + + # 11. Call dmg pool get-prop TestPool_1 label and verify that the original label + # is restored in PS. + self.log_step("Verify that the corrupted label in PS is fixed.") + ps_label = pool.get_property(prop_name="label") + if ps_label != pool.identifier: + msg = (f"Label in PS hasn't been repaired! Expected = " + f"{pool.identifier}; Actual = {ps_label}") + errors.append(msg) + + report_errors(test=self, errors=errors) diff --git a/src/tests/ftest/recovery/pool_cleanup.yaml b/src/tests/ftest/recovery/pool_cleanup.yaml new file mode 100644 index 00000000000..6e8fd24b5bc --- /dev/null +++ b/src/tests/ftest/recovery/pool_cleanup.yaml @@ -0,0 +1,20 @@ +hosts: + test_servers: 1 + +timeout: 360 + +server_config: + name: daos_server + engines_per_host: 1 + engines: + 0: + targets: 4 + nr_xs_helpers: 0 + storage: + 0: + class: ram + scm_mount: /mnt/daos + system_ram_reserved: 1 + +pool: + size: 5G diff --git a/src/tests/ftest/recovery/pool_list_consolidation.py b/src/tests/ftest/recovery/pool_list_consolidation.py new file mode 100644 index 00000000000..e3e4f805c6c --- /dev/null +++ b/src/tests/ftest/recovery/pool_list_consolidation.py @@ -0,0 +1,391 @@ +""" + (C) Copyright 2023 Intel Corporation. + + SPDX-License-Identifier: BSD-2-Clause-Patent +""" +import time + +from avocado.core.exceptions import TestFail +from ClusterShell.NodeSet import NodeSet +from general_utils import check_file_exists, pcmd, report_errors +from recovery_test_base import RecoveryTestBase + + +class PoolListConsolidationTest(RecoveryTestBase): + """Test Pass 1: Pool List Consolidation + + :avocado: recursive + """ + + def chk_dist_checker(self, inconsistency, policies=None): + """Run DAOS checker with kinds of options. + + 1. Enable check mode. + 2. Run checker under dry-run mode. + 3. Set repair policy as interaction. + 4. Run checker under auto mode and verify that it detects inconsistency. + 5. Reset repair policy as default. + 6. Run checker under regular mode, that will repair the inconsistency. + 7. Disable check mode. + + Jira ID: DAOS-13047 + + Args: + inconsistency (str): The message string for the inconsistency to be detected. + policies (str, optional): Policies used during dmg check start. Defaults to None. + + Returns: + list: Errors. + + """ + errors = [] + + dmg_command = self.get_dmg_command() + # 1. Enable check mode. + dmg_command.check_enable() + + # 2.1 Start checker with "dry-run" option. + # that will detect the inconsistency but not really repair it. + dmg_command.check_start(dry_run=True) + + # 2.2 Query the checker. + query_msg = self.wait_for_check_complete()[0]["msg"] + + # 2.3 Verify that the checker detected the inconsistency. + if inconsistency not in query_msg: + errors.append( + "Checker didn't detect the {} (1)! msg = {}".format(inconsistency, query_msg)) + dmg_command.check_disable() + return errors + + # 3. Set the repair policy to interaction. + dmg_command.check_set_policy(all_interactive=True) + + # 4.1 start checker with "auto" option, + # that will detect the inconsistency but skip interaction. + dmg_command.check_start(auto="on") + + # 4.2. Query the checker. + query_msg = self.wait_for_check_complete()[0]["msg"] + + # 4.3 Verify that the checker detected the inconsistency. + if inconsistency not in query_msg: + errors.append( + "Checker didn't detect the {} (2)! msg = {}".format(inconsistency, query_msg)) + dmg_command.check_disable() + return errors + + # 5. Reset the repair policy to default. + dmg_command.check_set_policy(reset_defaults=True) + + # 6.1 Start check with auto=off, + # that will find the inconsistency and repair it. + dmg_command.check_start(auto="off", policies=policies) + + # 6.2 Query the checker. + query_msg = self.wait_for_check_complete()[0]["msg"] + + # 6.3 Verify that the checker detected the inconsistency. + if inconsistency not in query_msg: + errors.append( + "Checker didn't detect the {} (3)! msg = {}".format(inconsistency, query_msg)) + + # 7. Disable check mode. + dmg_command.check_disable() + + return errors + + def test_dangling_pool(self): + """Test dangling pool. + + 1. Create a pool. + 2. Remove the pool from the pool shards on engine by calling: + dmg faults pool-svc CIC_POOL_NONEXIST_ON_ENGINE + 3. Show dangling pool entry by calling: + dmg pool list --no-query + 4. Run DAOS checker under kinds of mode. + 5. Verify that the dangling pool was removed. Call dmg pool list and it should + return an empty list. + + Jira ID: DAOS-11711 + + :avocado: tags=all,pr + :avocado: tags=hw,medium + :avocado: tags=recovery,pool_list_consolidation + :avocado: tags=PoolListConsolidationTest,test_dangling_pool + """ + # 1. Create a pool. + self.pool = self.get_pool(connect=False) + + # 2. Remove the pool shards on engine. + dmg_command = self.get_dmg_command() + dmg_command.faults_pool_svc( + pool=self.pool.identifier, checker_report_class="CIC_POOL_NONEXIST_ON_ENGINE") + + # 3. Show dangling pool entry. + pools = dmg_command.get_pool_list_labels(no_query=True) + if self.pool.identifier not in pools: + self.fail("Dangling pool was not found!") + + errors = [] + # 4. Run DAOS checker under kinds of mode. + errors = self.chk_dist_checker(inconsistency="dangling pool") + + # 5. Verify that the dangling pool was removed. + pools = dmg_command.get_pool_list_labels() + if pools: + errors.append(f"Dangling pool was not removed! {pools}") + + # Don't try to destroy the pool during tearDown. + self.pool.skip_cleanup() + + report_errors(test=self, errors=errors) + + def run_checker_on_orphan_pool(self, policies=None): + """Run step 1 to 4 of the orphan pool tests. + + 1. Create a pool. + 2. Remove the PS entry on management service (MS) by calling: + dmg faults mgmt-svc pool CIC_POOL_NONEXIST_ON_MS + 3. At this point, MS doesn't recognize any pool, but it exists on engine (orphan + pool). Call dmg pool list and verify that it doesn't return any pool. + 4. Run DAOS checker under kinds of mode. + + Args: + policies (str): Policies used during dmg check start. Defaults to None. + + Returns: + list: Errors. + + """ + # 1. Create a pool. + self.pool = self.get_pool(connect=False) + + # 2. Remove the PS entry on management service (MS). + dmg_command = self.get_dmg_command() + dmg_command.faults_mgmt_svc_pool( + pool=self.pool.identifier, checker_report_class="CIC_POOL_NONEXIST_ON_MS") + + # 3. At this point, MS doesn't recognize any pool, but it exists on engine. + pools = dmg_command.get_pool_list_labels() + if pools: + msg = f"MS recognized a pool after injecting CIC_POOL_NONEXIST_ON_MS! {pools}" + self.fail(msg) + + errors = [] + # 4. Run DAOS checker under kinds of mode. + errors = self.chk_dist_checker( + inconsistency="orphan pool", policies=policies) + + return errors + + def verify_pool_dir_removed(self, errors): + """Verify pool directory was removed from mount point of all nodes. + + Args: + errors (list): Error list. + + Returns: + list: Error list. + + """ + hosts = list(set(self.server_managers[0].ranks.values())) + nodeset_hosts = NodeSet.fromlist(hosts) + pool_path = f"/mnt/daos0/{self.pool.uuid.lower()}" + check_out = check_file_exists(hosts=nodeset_hosts, filename=pool_path) + if check_out[0]: + msg = f"Pool path still exists! Node without pool path = {check_out[1]}" + errors.append(msg) + + return errors + + def test_orphan_pool_trust_ps(self): + """Test orphan pool with trust PS (default option). + + Jira ID: DAOS-11712 + + :avocado: tags=all,pr + :avocado: tags=hw,medium + :avocado: tags=recovery,pool_list_consolidation + :avocado: tags=PoolListConsolidationTest,test_orphan_pool_trust_ps + """ + errors = [] + # 1. Run DAOS checker under kinds of mode with trusting PS (by default). + errors = self.run_checker_on_orphan_pool() + + # 2. Verify that the orphan pool was reconstructed. + dmg_command = self.get_dmg_command() + pools = dmg_command.get_pool_list_labels() + if self.pool.identifier not in pools: + errors.append(f"Orphan pool was not reconstructed! Pools = {pools}") + + report_errors(test=self, errors=errors) + + def test_orphan_pool_trust_ms(self): + """Test orphan pool with trust MS. + + Jira ID: DAOS-11712 + + :avocado: tags=all,pr + :avocado: tags=hw,medium + :avocado: tags=recovery,pool_list_consolidation + :avocado: tags=PoolListConsolidationTest,test_orphan_pool_trust_ms + """ + errors = [] + # 1. Run DAOS checker under kinds of mode with trusting MS. + errors = self.run_checker_on_orphan_pool( + policies="POOL_NONEXIST_ON_MS:CIA_TRUST_MS") + + # 2. Verify that the orphan pool was destroyed. + dmg_command = self.get_dmg_command() + pools = dmg_command.get_pool_list_labels() + if pools: + errors.append(f"Orphan pool was not destroyed! Pools = {pools}") + + # 3. Verify that the pool directory is removed from the mount point. + errors = self.verify_pool_dir_removed(errors=errors) + + # Don't try to destroy the pool during tearDown. + self.pool.skip_cleanup() + + report_errors(test=self, errors=errors) + + def test_lost_majority_ps_replicas(self): + """Test lost the majority of PS replicas. + + 1. Create a pool with --nsvc=3. Rank 0, 1, and 2 will be pool service replicas. + 2. Stop servers. + 3. Remove /mnt/daos//rdb-pool from rank 0 and 2. + 4. Start servers. + 5. Run DAOS checker under kinds of mode. + 6. Try creating a container. The pool can be started now, so create should succeed. + 7. Show that rdb-pool are recovered. i.e., at least three out of four ranks + should have rdb-pool. + + Jira ID: DAOS-12029 + + :avocado: tags=all,pr + :avocado: tags=hw,medium + :avocado: tags=recovery,pool_list_consolidation + :avocado: tags=PoolListConsolidationTest,test_lost_majority_ps_replicas + """ + # 1. Create a pool with --nsvc=3. + self.pool = self.get_pool(svcn=3) + + # 2. Stop servers. + dmg_command = self.get_dmg_command() + dmg_command.system_stop() + + # 3. Remove /mnt/daos//rdb-pool from two ranks. + rdb_pool_path = f"/mnt/daos0/{self.pool.uuid.lower()}/rdb-pool" + command = f"sudo rm /mnt/daos0/{self.pool.uuid.lower()}/rdb-pool" + hosts = list(set(self.server_managers[0].ranks.values())) + count = 0 + for host in hosts: + node = NodeSet(host) + check_out = check_file_exists(hosts=node, filename=rdb_pool_path, sudo=True) + if check_out[0]: + pcmd(hosts=node, command=command) + self.log.info("rm rdb-pool from %s", str(node)) + count += 1 + if count > 1: + break + + # 4. Start servers. + dmg_command.system_start() + + errors = [] + # 5. Run DAOS checker under kinds of mode. + errors = self.chk_dist_checker( + inconsistency="corrupted pool without quorum") + + # 6. Try creating a container. It should succeed. + cont_create_success = False + for _ in range(5): + time.sleep(5) + try: + self.container = self.get_container(pool=self.pool) + cont_create_success = True + break + except TestFail as error: + msg = (f"## Container create failed after running checker! " + f"error = {error}") + self.log.debug(msg) + + if not cont_create_success: + errors.append("Container create failed after running checker!") + + # 7. Show that rdb-pool are recovered. i.e., at least three out of four ranks + # should have rdb-pool. + hosts = list(set(self.server_managers[0].ranks.values())) + count = 0 + for host in hosts: + node = NodeSet(host) + check_out = check_file_exists(hosts=node, filename=rdb_pool_path, sudo=True) + if check_out[0]: + count += 1 + self.log.info("rdb-pool found at %s", str(node)) + + self.log.info("rdb-pool count = %d", count) + if count < len(hosts) - 1: + errors.append(f"Not enough rdb-pool has been recovered! - {count} ranks") + + report_errors(test=self, errors=errors) + + def test_lost_all_rdb(self): + """Remove rdb-pool from all mount point from all nodes. Now the pool cannot be + recovered, so checker should remove it from both MS and engine. + + 1. Create a pool. + 2. Stop servers. + 3. Remove /mnt/daos0//rdb-pool from all ranks. + 4. Start servers. + 5. Run DAOS checker under kinds of mode. + 6. Check that the pool does not appear with dmg pool list. + 7. Verify that the pool directory was removed from the mount point. + + Jira ID: DAOS-12067 + + :avocado: tags=all,pr + :avocado: tags=hw,medium + :avocado: tags=recovery,pool_list_consolidation + :avocado: tags=PoolListConsolidationTest,test_lost_all_rdb + """ + # 1. Create a pool. + self.pool = self.get_pool() + + # 2. Stop servers. + dmg_command = self.get_dmg_command() + dmg_command.system_stop() + + # 3. Remove /mnt/daos//rdb-pool from all ranks. + hosts = list(set(self.server_managers[0].ranks.values())) + nodeset_hosts = NodeSet.fromlist(hosts) + command = f"sudo rm /mnt/daos0/{self.pool.uuid.lower()}/rdb-pool" + remove_result = pcmd(hosts=nodeset_hosts, command=command) + success_nodes = remove_result[0] + if nodeset_hosts != success_nodes: + msg = (f"Failed to remove rdb-pool! All = {nodeset_hosts}, " + f"Success = {success_nodes}") + self.fail(msg) + + # 4. Start servers. + dmg_command.system_start() + + errors = [] + # 5. Run DAOS checker under kinds of mode. + errors = self.chk_dist_checker( + inconsistency="corrupted pool without quorum") + + # 6. Check that the pool does not appear with dmg pool list. + pools = dmg_command.get_pool_list_all() + if pools: + errors.append(f"Pool still exists after running checker! {pools}") + + # 7. Verify that the pool directory was removed from the mount point. + errors = self.verify_pool_dir_removed(errors=errors) + + # Don't try to destroy the pool during tearDown. + self.pool.skip_cleanup() + + report_errors(test=self, errors=errors) diff --git a/src/tests/ftest/recovery/pool_list_consolidation.yaml b/src/tests/ftest/recovery/pool_list_consolidation.yaml new file mode 100644 index 00000000000..6b3ba51aa1b --- /dev/null +++ b/src/tests/ftest/recovery/pool_list_consolidation.yaml @@ -0,0 +1,23 @@ +hosts: + test_servers: 4 + +timeout: 180 + +server_config: + name: daos_server + engines_per_host: 1 + engines: + 0: + nr_xs_helpers: 1 + storage: auto + +# We need to restart servers and clean up after each test because we manually corrupt the +# pool directory. The tests will have unpredictable behavior if we don't. +setup: + start_servers_once: False + +pool: + size: 60G + +container: + control_method: daos diff --git a/src/tests/ftest/recovery/pool_membership.py b/src/tests/ftest/recovery/pool_membership.py new file mode 100644 index 00000000000..342282225a0 --- /dev/null +++ b/src/tests/ftest/recovery/pool_membership.py @@ -0,0 +1,392 @@ +""" + (C) Copyright 2023 Intel Corporation. + + SPDX-License-Identifier: BSD-2-Clause-Patent +""" +import time + +from ClusterShell.NodeSet import NodeSet +from general_utils import report_errors +from ior_test_base import IorTestBase +from run_utils import run_remote + + +class PoolMembershipTest(IorTestBase): + """Test Pass 2: Pool Membership + + :avocado: recursive + """ + + def get_rank_to_free(self): + """Get the free space for each engine rank. + + Call dmg storage query usage for all servers and return free space (avail_bytes) + for each rank as dictionary. + + Returns: + dict: Key is rank and value is free space of the rank in bytes as int. + + """ + storage_query_out = self.server_managers[0].dmg.storage_query_usage() + rank_to_free = {} + storage_list = storage_query_out["response"]["HostStorage"] + for storage_hash in storage_list.values(): + for scm_namespace in storage_hash["storage"]["scm_namespaces"]: + rank = scm_namespace["mount"]["rank"] + free = scm_namespace["mount"]["avail_bytes"] + rank_to_free[rank] = free + + return rank_to_free + + def wait_for_check_complete(self): + """Repeatedly call dmg check query until status becomes COMPLETED. + + If the status doesn't become COMPLETED, fail the test. + + Returns: + list: List of repair reports. + + """ + repair_reports = None + for _ in range(8): + check_query_out = self.get_dmg_command().check_query() + if check_query_out["response"]["status"] == "COMPLETED": + repair_reports = check_query_out["response"]["reports"] + break + time.sleep(5) + + if not repair_reports: + self.fail("Checker didn't detect or repair any inconsistency!") + + return repair_reports + + def test_orphan_pool_shard(self): + """Test orphan pool shard. + + 1. Create a pool on rank 0. + 2. Call dmg storage query usage to store the default space utilization. + 3. Prepare to copy the pool path before stopping servers. + 4. Stop servers. + 5. Copy /mnt/daos?/ from the engine where we created the pool to + another engine where we didn’t create. Destination engine is in different node. + 6. Enable and start the checker. + 7. Query the checker and verify that the issue was fixed. + i.e., Current status is COMPLETED. + 8. Disable the checker. + 9. Call dmg storage query usage to verify that the pool usage is back to the + original value. + + Jira ID: DAOS-11734 + + :avocado: tags=all,pr + :avocado: tags=hw,medium + :avocado: tags=recovery,pool_membership + :avocado: tags=PoolMembershipTest,test_orphan_pool_shard + """ + # 1. Create a pool. + self.log_step("Creating a pool (dmg pool create)") + pool = self.get_pool(connect=False, target_list="0") + + # 2. Call dmg storage query usage to store the default space utilization. + self.log_step("Collecting free space for each rank (dmg storage query usage)") + rank_to_free_orig = self.get_rank_to_free() + self.log.info("rank_to_free_orig = %s", rank_to_free_orig) + + # 3. Prepare to copy the pool path before stopping servers. + + # In order to copy the pool directory without password, there are several things + # to determine and set up. + + # 3-1. Determine source host and destination host. Source host is where rank 0 is. + # Destination host is the other host. + self.log_step("Determine source host and destination host.") + src_host = dst_host = NodeSet(self.server_managers[0].get_host(0)) + rank = 1 + while rank < self.server_managers[0].engines and dst_host == src_host: + dst_host = NodeSet(self.server_managers[0].get_host(rank)) + rank += 1 + self.log.info("src_host = %s; dst_host = %s", src_host, dst_host) + + # 3-2. Determine source and destination mount point. First, find the source mount + # point that maps to rank 0. Then use the same mount point for the destination. + # This way, we can handle the case where the mount point name is changed in the + # future. At the same time, determine the destination rank, which is where + # dst_mount is mapped. + src_mount = None + dst_mount = None + dst_rank = None + dmg_command = self.get_dmg_command() + storage_query_out = dmg_command.storage_query_usage() + hash_dict = storage_query_out["response"]["HostStorage"] + for storage_dict in hash_dict.values(): + if str(src_host) in storage_dict["hosts"]: + # Determine source mount point that maps to rank 0. + for scm_namespace in storage_dict["storage"]["scm_namespaces"]: + if scm_namespace["mount"]["rank"] == 0: + # For dst_mount, use the same mount point as source. + dst_mount = src_mount = scm_namespace["mount"]["path"] + for storage_dict in hash_dict.values(): + if str(dst_host) in storage_dict["hosts"]: + # Determine destination rank that maps to dst_mount. + for scm_namespace in storage_dict["storage"]["scm_namespaces"]: + if scm_namespace["mount"]["path"] == dst_mount: + dst_rank = scm_namespace["mount"]["rank"] + + # 4. Stop servers. + self.log_step("Stop servers.") + self.server_managers[0].system_stop() + + # 5. Copy /mnt/daos?/ from the engine where we created the pool to + # another engine where we didn’t create. + + # 5-1. Since we're running rsync as user, update the mode of the source pool + # directory to 777. + self.log_step("Update mode of the source pool directory.") + chmod_cmd = (f"sudo chmod 777 {src_mount}; " + f"sudo chmod -R 777 {src_mount}/{pool.uuid.lower()}") + if not run_remote(log=self.log, hosts=src_host, command=chmod_cmd).passed: + self.fail(f"Following command failed on {dst_host}! {chmod_cmd}") + + # 5-2. Update mode of the destination mount point to 777 so that we can send the + # pool files. + self.log_step("Update mode of the destination mount point.") + chmod_cmd = f"sudo chmod 777 {dst_mount}" + if not run_remote(log=self.log, hosts=dst_host, command=chmod_cmd).passed: + self.fail(f"Following command failed on {dst_host}! {chmod_cmd}") + + # 5-3. Since we're sending each file (vos-0 to 7 + rdb-pool) one at a time, we need + # to create the destination fake pool directory first. + self.log_step("Create a fake pool directory at the destination mount point.") + mkdir_cmd = f"sudo mkdir {dst_mount}/{pool.uuid.lower()}" + if not run_remote(log=self.log, hosts=dst_host, command=mkdir_cmd).passed: + self.fail(f"Following command failed on {dst_host}! {mkdir_cmd}") + + # 5-4. Update mode of the destination pool directory to 777 so that we can send + # the pool files. + self.log_step("Update mode of the fake pool directory at destination.") + chmod_cmd = f"sudo chmod 777 {dst_mount}/{pool.uuid.lower()}" + if not run_remote(log=self.log, hosts=dst_host, command=chmod_cmd).passed: + self.fail(f"Following command failed on {dst_host}! {chmod_cmd}") + + # 5-5. Send the files. + # 1. The initial ls command lists the content of the pool directory, which + # contains 8 vos files (because there are 8 targets) and rdb-pool file. + # 2. By using xargs, each item of the ls output is passed into rsync and the rsync + # commands are executed in parallel. i.e., each file is sent by separate rsync + # process in parallel. + + # - More explanations about the command: + # * We use --max-procs=8 to support at most 8 rsync processes to run in parallel. + # * -I% means replace % in the following rsync command by the output of ls. i.e., + # file name. + # * rsync -avz means archive, verbose, and compress. By using compress, we can + # significantly reduce the size of the data and the transfer time. + # * By running rsync in parallel, we can significantly reduce the transfer time. + # e.g., For a 2TB pool with 8 targets per engine, each vos file size is about 7G + # (rdb-pool is smaller). If we run a simple rsync, which runs serially, it takes + # 1 min 50 sec. However, if we run them in parallel, it's reduced to 24 sec. + self.log_step( + f"Copy pool files from {src_host}:{src_mount} to {dst_host}:{dst_mount}.") + xargs_rsync_cmd = (f"ls {src_mount}/{pool.uuid.lower()} | " + f"xargs --max-procs=8 -I% " + f"rsync -avz {src_mount}/{pool.uuid.lower()}/% " + f"{str(dst_host)}:{dst_mount}/{pool.uuid.lower()}") + if not run_remote(log=self.log, hosts=src_host, command=xargs_rsync_cmd).passed: + self.fail(f"Following command failed on {src_host}! {xargs_rsync_cmd}") + + # 6. Enable and start the checker. + self.log_step("Enable and start the checker.") + dmg_command.check_enable() + + # If we call check start immediately after check enable, checker may not detect + # the fault. Developer is fixing this issue. + time.sleep(3) + + dmg_command.check_start() + + # 7. Query the checker and verify that the issue was fixed. + # i.e., Current status is COMPLETED. + errors = [] + repair_reports = self.wait_for_check_complete() + query_msg = repair_reports[0]["msg"] + if "orphan rank" not in query_msg: + errors.append( + "Checker didn't fix orphan pool shard! msg = {}".format(query_msg)) + + # 8. Disable the checker. + self.log_step("Disable and start the checker.") + dmg_command.check_disable() + + # 9. Call dmg storage query usage to verify that the pool usage is back to the + # original value. + self.log_step("Verify that the pool usage is back to the original value.") + rank_to_free_fixed = self.get_rank_to_free() + self.log.info("rank_to_free_fixed = %s", rank_to_free_fixed) + dst_free_orig = rank_to_free_orig[dst_rank] + dst_free_fixed = rank_to_free_fixed[dst_rank] + # Free space should have been recovered to the original value. If not, bring it up + # in the CR working group. + if dst_free_fixed < dst_free_orig: + msg = (f"Destination rank space was not recovered by checker! " + f"Original = {dst_free_orig}; With fixed = {dst_free_fixed}") + errors.append(msg) + + report_errors(test=self, errors=errors) + + def test_dangling_pool_map(self): + """Test dangling pool map. + + 1. Create a pool. + 2. Stop servers. + 3. Manually remove ///vos-0 from rank 0 node. + 4. Enable and start the checker. + 5. Query the checker and verify that the issue was fixed. i.e., Current status is + COMPLETED. + 6. Disable the checker. + 7. Verify that the pool has one less target. + + Jira ID: DAOS-11736 + + :avocado: tags=all,pr + :avocado: tags=hw,medium + :avocado: tags=recovery,pool_membership + :avocado: tags=PoolMembershipTest,test_dangling_pool_map + """ + # 1. Create a pool. + self.log_step("Creating a pool (dmg pool create)") + pool = self.get_pool(connect=False) + + # 2. Stop servers. + dmg_command = self.get_dmg_command() + dmg_command.system_stop() + + # 3. Manually remove ///vos-0 from rank 0 node. + rank_0_host = NodeSet(self.server_managers[0].get_host(0)) + scm_mount = self.server_managers[0].get_config_value("scm_mount") + rm_cmd = f"sudo rm {scm_mount}/{pool.uuid.lower()}/vos-0" + if not run_remote(log=self.log, hosts=rank_0_host, command=rm_cmd).passed: + self.fail(f"Following command failed on {rank_0_host}! {rm_cmd}") + + # 4. Enable and start the checker. + self.log_step("Enable and start the checker.") + dmg_command.check_enable(stop=False) + + # If we call check start immediately after check enable, checker may not detect + # the fault. Developer is fixing this issue. + time.sleep(3) + + # Start checker. + dmg_command.check_start() + + # 5. Query the checker and verify that the issue was fixed. + repair_reports = self.wait_for_check_complete() + + errors = [] + query_msg = repair_reports[0]["msg"] + if "dangling target" not in query_msg: + errors.append( + "Checker didn't fix orphan pool shard! msg = {}".format(query_msg)) + + # 6. Disable the checker. + self.log_step("Disable and start the checker.") + dmg_command.check_disable() + + # 7. Verify that the pool has one less target. + query_out = pool.query() + total_targets = query_out["response"]["total_targets"] + active_targets = query_out["response"]["active_targets"] + expected_targets = total_targets - 1 + if active_targets != expected_targets: + msg = (f"Unexpected number of active targets! Expected = {expected_targets}; " + f"Actual = {active_targets}") + errors.append(msg) + + report_errors(test=self, errors=errors) + + def test_dangling_rank_entry(self): + """Test dangling target entry. + + 1. Create a pool and a container. + 2. Write some data with IOR using SX. + 3. Stop servers. + 4. Remove pool directory from one of the mount points. + 5. Enable checker. + 6. Start checker. + 7. Query the checker until expected number of inconsistencies are repaired. + 8. Disable checker and start servers. + + Jira ID: DAOS-11735 + + :avocado: tags=all,pr + :avocado: tags=hw,medium + :avocado: tags=recovery,pool_membership + :avocado: tags=PoolMembershipTest,test_dangling_rank_entry + """ + targets = self.params.get("targets", "/run/server_config/engines/0/*") + exp_msg = "dangling rank entry" + + # 1. Create a pool and a container. + self.log_step("Create a pool and a container.") + self.pool = self.get_pool(connect=False) + self.container = self.get_container(pool=self.pool) + + # 2. Write some data with IOR using SX. + self.log_step("Write some data with IOR.") + self.ior_cmd.set_daos_params( + self.server_group, self.pool, self.container.identifier) + self.run_ior_with_pool(create_pool=False, create_cont=False) + + # 3. Stop servers. + self.log_step("Stop servers.") + dmg_command = self.get_dmg_command() + dmg_command.system_stop() + + # 4. Remove pool directory from one of the mount points. + self.log_step("Remove pool directory from one of the mount points.") + rank_1_host = NodeSet(self.server_managers[0].get_host(1)) + scm_mount = self.server_managers[0].get_config_value("scm_mount") + rm_cmd = f"sudo rm -rf {scm_mount}/{self.pool.uuid.lower()}" + if not run_remote(log=self.log, hosts=rank_1_host, command=rm_cmd).passed: + self.fail(f"Following command failed on {rank_1_host}! {rm_cmd}") + + # 5. Enable checker. + self.log_step("Enable checker.") + dmg_command.check_enable(stop=False) + + # If we call check start immediately after check enable, checker may not detect + # the fault. Developer is fixing this issue. + time.sleep(3) + + # 6. Start checker. + self.log_step("Start checker.") + dmg_command.check_start() + + # 7. Query the checker until expected number of inconsistencies are repaired. + self.log_step( + "Query the checker until expected number of inconsistencies are repaired.") + repair_reports = self.wait_for_check_complete() + + # Verify that the checker repaired target count + 1 faults. (+1 is for rank. + # Checker marks it as down.) + errors = [] + repair_count = len(repair_reports) + expected_repair_count = targets + 1 + if repair_count != expected_repair_count: + msg = (f"Unexpected number of repair count! Expected = " + f"{expected_repair_count}, Actual = {repair_count}") + errors.append(msg) + + # Verify that the message contains dangling rank entry. + exp_msg_found = False + for repair_report in repair_reports: + if exp_msg in repair_report["msg"]: + exp_msg_found = True + break + if not exp_msg_found: + errors.append(f"{exp_msg} not in repair message!") + + # 8. Disable checker. + self.log_step("Disable checker.") + dmg_command.check_disable() + + report_errors(test=self, errors=errors) diff --git a/src/tests/ftest/recovery/pool_membership.yaml b/src/tests/ftest/recovery/pool_membership.yaml new file mode 100644 index 00000000000..b9f854e4d25 --- /dev/null +++ b/src/tests/ftest/recovery/pool_membership.yaml @@ -0,0 +1,39 @@ +hosts: + test_servers: 2 + +timeout: 360 + +server_config: + engines_per_host: 2 + engines: + 0: + storage: auto + fabric_iface_port: 31416 + log_file: daos_server_0.log + pinned_numa_node: 0 + fabric_iface: ib0 + targets: 8 + 1: + storage: auto + fabric_iface_port: 31516 + log_file: daos_server_1.log + pinned_numa_node: 1 + fabric_iface: ib1 + targets: 8 + +pool: + size: 100G + +container: + control_method: daos + type: POSIX + +ior: + client_processes: + ppn: 1 + flags: -k -D 10 -v -w -W + api: DFS + transfer_size: 1M + block_size: 10G + dfs_oclass: SX + dfs_dir_oclass: SX diff --git a/src/tests/ftest/util/daos_utils.py b/src/tests/ftest/util/daos_utils.py index 679ca432414..96166ce8294 100644 --- a/src/tests/ftest/util/daos_utils.py +++ b/src/tests/ftest/util/daos_utils.py @@ -369,6 +369,23 @@ def pool_list_attrs(self, pool, sys_name=None, verbose=False): ("pool", "list-attrs"), pool=pool, sys_name=sys_name, verbose=verbose) + def pool_list_containers(self, pool, sys_name=None): + """List containers in the pool. + + Args: + pool (str): pool label or UUID + sys_name (str): DAOS system name. Defaults to None. + + Returns: + dict: JSON output + + Raises: + CommandFailure: if the daos pool list-containers command fails. + + """ + return self._get_json_result( + ("pool", "list-containers"), pool=pool, sys_name=sys_name) + def container_query(self, pool, cont, sys_name=None): """Query a container. @@ -660,6 +677,25 @@ def container_list_attrs(self, pool, cont, sys_name=None, verbose=False): ("container", "list-attrs"), pool=pool, cont=cont, sys_name=sys_name, verbose=verbose) + def container_list_objects(self, pool, cont, sys_name=None): + """Call daos container list-objects. + + Args: + pool (str): Pool UUID or label + cont (str): Container UUID or label + sys_name (str, optional): DAOS system name context for servers. + Defaults to None. + + Returns: + dict: the daos json command output converted to a python dictionary + + Raises: + CommandFailure: if the daos container list-objects command fails. + + """ + return self._get_json_result( + ("container", "list-objects"), pool=pool, cont=cont, sys_name=sys_name) + def container_create_snap(self, pool, cont, snap_name=None, epoch=None, sys_name=None): """Call daos container create-snap. @@ -786,6 +822,30 @@ def object_query(self, pool, cont, oid, sys_name=None): return data + def faults_container(self, pool, cont, location, sys_name=None, path=None, rank=None, + frequency=None): + """Inject fault to a container. + + Args: + pool (str): pool label or UUID + cont (str): container name or UUID + location (str): Fault injection location + sys_name (str): DAOS system name. Defaults to None. + path (str): unified namespace path. Defaults to None. + rank (str): Rank to inject fault on (default: 4294967295). Defaults to None. + frequency (str): Fault injection frequency (default: once). Defaults to None. + + Returns: + dict: JSON output + + Raises: + CommandFailure: if the command fails. + + """ + return self._get_json_result( + ("faults", "container"), pool=pool, cont=cont, location=location, + sys_name=sys_name, path=path, rank=rank, frequency=frequency) + def filesystem_copy(self, src, dst, preserve_props=None): """Copy a POSIX container or path to another POSIX container or path. diff --git a/src/tests/ftest/util/daos_utils_base.py b/src/tests/ftest/util/daos_utils_base.py index 9130b214dac..855d17b9bab 100644 --- a/src/tests/ftest/util/daos_utils_base.py +++ b/src/tests/ftest/util/daos_utils_base.py @@ -27,6 +27,8 @@ def get_sub_command_class(self): self.sub_command_class = self.PoolSubCommand() elif self.sub_command.value == "container": self.sub_command_class = self.ContainerSubCommand() + elif self.sub_command.value == "faults": + self.sub_command_class = self.FaultsSubCommand() elif self.sub_command.value == "object": self.sub_command_class = self.ObjectSubCommand() elif self.sub_command.value == "filesystem": @@ -461,6 +463,55 @@ def __init__(self): self.acl_file = FormattedParameter("--acl-file={}") self.entry = FormattedParameter("--entry={}") + class FaultsSubCommand(CommandWithSubCommand): + """Defines an object for the daos faults sub command.""" + + def __init__(self): + """Create a daos faults subcommand object.""" + super().__init__("/run/daos/faults/*", "faults") + + def get_sub_command_class(self): + # pylint: disable=redefined-variable-type + """Get the daos faults sub command object.""" + if self.sub_command.value == "container": + self.sub_command_class = self.ContainerSubCommand() + elif self.sub_command.value == "set-param": + self.sub_command_class = self.SetParamSubCommand() + else: + self.sub_command_class = None + + class CommonFaultsSubCommand(CommandWithParameters): + """Defines an object for the common daos faults sub-command.""" + + def __init__(self, sub_command): + """Create a common daos faults sub-command object. + + Args: + sub_command (str): sub-command name + """ + super().__init__("/run/daos/faults/{}/*".format(sub_command), sub_command) + self.rank = FormattedParameter("--rank={}") + self.frequency = FormattedParameter("--frequency={}", None) + self.location = FormattedParameter("--location={}", None) + + class ContainerSubCommand(CommonFaultsSubCommand): + """Defines an object for the daos faults container command.""" + + def __init__(self): + """Create a daos faults container object.""" + super().__init__("container") + self.pool = BasicParameter(None, position=1) + self.cont = BasicParameter(None, position=2) + self.sys_name = FormattedParameter("--sys-name={}", None) + self.path = FormattedParameter("--path={}", None) + + class SetParamSubCommand(CommonFaultsSubCommand): + """Defines an object for the daos faults set-param command.""" + + def __init__(self): + """Create a daos faults set-param object.""" + super().__init__("set-param") + class ObjectSubCommand(CommandWithSubCommand): """Defines an object for the daos object sub command.""" diff --git a/src/tests/ftest/util/ddb_utils.py b/src/tests/ftest/util/ddb_utils.py new file mode 100644 index 00000000000..cf994a51378 --- /dev/null +++ b/src/tests/ftest/util/ddb_utils.py @@ -0,0 +1,315 @@ +""" + (C) Copyright 2022 Intel Corporation. + + SPDX-License-Identifier: BSD-2-Clause-Patent +""" +import os + +from command_utils_base import BasicParameter, CommandWithParameters, FormattedParameter +from general_utils import run_pcmd + + +class DdbCommandBase(CommandWithParameters): + """Defines the basic structures of ddb command.""" + + def __init__(self, server_host, path, verbose=True, timeout=None, sudo=True): + """Defines the parameters for ddb. + + Args: + server_host (NodeSet): Server host to run the command. + path (str): path to the ddb command. + verbose (bool, optional): Display command output when run_pcmd is called. + Defaults to True. + timeout (int, optional): Command timeout (sec) used in run_pcmd. Defaults to + None. + sudo (bool, optional): Whether to run ddb with sudo. Defaults to True. + """ + super().__init__("/run/ddb/*", "ddb", path) + + # We need to run with sudo. + self.sudo = sudo + + self.host = server_host + + # Write mode that's necessary for the commands that alters the data such as load. + self.write_mode = FormattedParameter("-w", default=False) + + # Command to run on the VOS file that contains container, object info, etc. + self.single_command = BasicParameter(None, position=2) + + # VOS file path. + self.vos_path = BasicParameter(None, position=1) + + # Members needed for run_pcmd(). + self.verbose = verbose + self.timeout = timeout + + def __str__(self): + """Return the command with all of its defined parameters as a string. + + Returns: + str: the command with all the defined parameters + + """ + value = super().__str__() + if self.sudo: + value = " ".join(["sudo -n", value]) + return value + + def run(self): + """Run the command. + + Returns: + list: A list of dictionaries with each entry containing output, exit status, + and interrupted status common to each group of hosts. + + """ + return run_pcmd( + hosts=self.host, command=str(self), verbose=self.verbose, + timeout=self.timeout) + + +class DdbCommand(DdbCommandBase): + # pylint: disable=too-many-public-methods + """ddb command class. + + Component path is needed for most of the commands. They're in the form of: + [container]/[object]/[dkey]/[akey] + + Each component can be indexed by UUID, but indexing is usually more convenient. e.g., + "ls [0]/[1]" means index into the first container, second object, and list the dkeys + in it. Note that the order we add container, object, dkey/akey may not be consistent + with the indices, so it's better for tests to use the UUID. + """ + + def __init__(self, server_host, path, mount_point, pool_uuid, vos_file): + """Constructor that sets the common variables for sub-commands. + + Args: + server_host (NodeSet): Server host to run the command. + path (str): Path to the ddb command. Pass in self.bin for our wolf/CI env. + mount_point (str): DAOS mount point where pool directory is created. e.g., + /mnt/daos, /mnt/daos0. + pool_uuid (str): Pool UUID. + vos_file (str): VOS file name that's located in /mnt/daos/. It's + usually in the form of vos-0, vos-1, and so on. + """ + super().__init__(server_host, path) + + # Construct the VOS file path where ddb will inject the command. + self.update_vos_path(mount_point, pool_uuid, vos_file) + + def update_vos_path(self, mount_point, pool_uuid, vos_file): + """Update the vos_path ddb command argument. + + Args: + mount_point (str): DAOS mount point where pool directory is created. e.g., + /mnt/daos, /mnt/daos0. + pool_uuid (str): Pool UUID. + vos_file (str): VOS file name that's located in /mnt/daos/. It's + usually in the form of vos-0, vos-1, and so on. + """ + vos_path = os.path.join(mount_point, pool_uuid.lower(), vos_file) + self.vos_path.update(vos_path, "vos_path") + + def list_component(self, component_path=None): + """Call ddb -R "ls " + + ls is similar to the Linux ls command. It lists objects inside the container, + dkeys inside the object, and so on. + + Args: + component_path (str): Component that comes after ls. e.g., [0]/[1] for first + container, second object. Defaults to None, in which case "ls" will be + called. + + Returns: + dict: A list of dictionaries with each entry containing output, exit + status, and interrupted status common to each group of hosts. + + """ + cmd = ["ls"] + if component_path: + cmd.append(component_path) + self.write_mode.value = False + self.single_command.value = " ".join(cmd) + + return self.run() + + def value_dump(self, component_path, out_file_path): + """Call ddb -R "value_dump " + + dump_value writes the contents to the file. e.g., if akey is specified, its data + will be dumped. + + Args: + component_path (str): Component that comes after dump_value. e.g., + [0]/[1]/[1]/[0] to dump the data of the akey. + out_file_path (str): Path where the file is saved. Pass in self.test_dir + + "my_out.txt" unless there's a specific reason. This will create a file in + /var/tmp/daos_testing//my_out.txt + + Returns: + dict: A list of dictionaries with each entry containing output, exit + status, and interrupted status common to each group of hosts. + + """ + self.write_mode.value = False + self.single_command.value = " ".join( + ["value_dump", component_path, out_file_path]) + + return self.run() + + def value_load(self, component_path, load_file_path): + """Call ddb -w -R "value_load " + + load writes the given data into the container. e.g., + load new_data.txt [0]/[1]/[1]/[0] + will write the new_data into the akey. + + Args: + component_path (str): Component that comes after load. e.g., + [0]/[1]/[1]/[0] to write the data into the akey. + load_file_path (str): Path of the file that contains the data to load. + + Returns: + dict: A list of dictionaries with each entry containing output, exit + status, and interrupted status common to each group of hosts. + + """ + self.write_mode.value = True + self.single_command.value = " ".join( + ["value_load", load_file_path, component_path]) + + return self.run() + + def remove_component(self, component_path): + """Call ddb -w -R "rm " + + Args: + component_path (str): Component that comes after rm. e.g., [0]/[1] for first + container, second object. + + Returns: + dict: A list of dictionaries with each entry containing output, exit + status, and interrupted status common to each group of hosts. + + """ + self.write_mode.value = True + self.single_command.value = " ".join(["rm", component_path]) + + return self.run() + + def ilog_dump(self, component_path): + """Call ddb -R "ilog_dump " + + Args: + component_path (str): Component that comes after rm. e.g., [0]/[1]/[1] for + first container, second object, second dkey. Needs to be object or after. + + Returns: + dict: A list of dictionaries with each entry containing output, exit + status, and interrupted status common to each group of hosts. + + """ + self.write_mode.value = False + self.single_command.value = " ".join(["ilog_dump", component_path]) + + return self.run() + + def ilog_commit(self, component_path): + """Call ddb -R "ilog_commit " + + Args: + component_path (str): Component that comes after rm. e.g., [0]/[1]/[1] for + first container, second object, second dkey. Needs to be object or after. + + Returns: + dict: A list of dictionaries with each entry containing output, exit + status, and interrupted status common to each group of hosts. + + """ + self.write_mode.value = False + self.single_command.value = " ".join(["ilog_commit", component_path]) + + return self.run() + + def ilog_clear(self, component_path): + """Call ddb -R "ilog_clear " + + Args: + component_path (str): Component that comes after rm. e.g., [0]/[1]/[1] for + first container, second object, second dkey. Needs to be object or after. + + Returns: + dict: A list of dictionaries with each entry containing output, exit + status, and interrupted status common to each group of hosts. + + """ + self.write_mode.value = False + self.single_command.value = " ".join(["ilog_clear", component_path]) + + return self.run() + + def superblock_dump(self, component_path): + """Call ddb -R "superblock_dump " + + Args: + component_path (str): Component that comes after dump_superblock. + e.g., [0]/[1]/[1] for first container, second object, second dkey. + + Returns: + dict: A list of dictionaries with each entry containing output, exit + status, and interrupted status common to each group of hosts. + + """ + self.write_mode.value = False + self.single_command.value = " ".join(["superblock_dump", component_path]) + + return self.run() + + def dtx_dump(self, component_path="[0]", committed=False, active=False): + """Call ddb -R "dtx_dump " + + committed and active can't be set at the same time. + + Args: + component_path (str): Component that comes after dump_dtx. It doesn't matter + as long as it's valid. Defaults to [0]. + committed (str): -c flag. Defaults to False. + active (str): -a flag. Defaults to False. + + Returns: + dict: A list of dictionaries with each entry containing output, exit + status, and interrupted status common to each group of hosts. + + """ + self.write_mode.value = False + + commands = ["dtx_dump"] + if committed: + commands.append("-c") + if active: + commands.append("-a") + commands.append(component_path) + + self.single_command.value = " ".join(commands) + + return self.run() + + def dtx_cmt_clear(self, component_path="[0]"): + """Call ddb -R "dtx_cmt_clear " + + Args: + component_path (str): Component that comes after clear_cmt_dtx. It doesn't + matter as long as it's valid. Defaults to [0]. + + Returns: + dict: A list of dictionaries with each entry containing output, exit + status, and interrupted status common to each group of hosts. + + """ + self.write_mode.value = True + self.single_command.value = " ".join(["dtx_cmt_clear", component_path]) + + return self.run() diff --git a/src/tests/ftest/util/dmg_utils.py b/src/tests/ftest/util/dmg_utils.py index 03ace88e36e..4ac141ae785 100644 --- a/src/tests/ftest/util/dmg_utils.py +++ b/src/tests/ftest/util/dmg_utils.py @@ -1027,6 +1027,26 @@ def system_cleanup(self, machinename=None, verbose=True): return self._get_json_result( ("system", "cleanup"), machinename=machinename, verbose=verbose) + def system_clear_exclude(self, ranks, rank_hosts): + """Clear exclude ranks from system. + + Either ranks or rank_hosts is necessary. Pass in None to one of them. + + Args: + ranks (str): comma separated ranks to exclude. + rank_hosts (str): hostlist representing hosts whose managed ranks are to be + operated on. + + Raises: + CommandFailure: if the dmg system clear-exclude command fails. + + Returns: + dict: the dmg json command output converted to a python dictionary + + """ + return self._get_json_result( + ("system", "clear-exclude"), ranks=ranks, rank_hosts=rank_hosts) + def system_query(self, ranks=None, verbose=True): """Query system to obtain the status of the servers. @@ -1110,6 +1130,26 @@ def system_erase(self): """ return self._get_json_result(("system", "erase")) + def system_exclude(self, ranks, rank_hosts): + """Exclude ranks from system. + + Either ranks or rank_hosts is necessary. Pass in None to one of them. + + Args: + ranks (str): comma separated ranks to exclude. + rank_hosts (str): hostlist representing hosts whose managed ranks are to be + operated on. + + Raises: + CommandFailure: if the dmg system exclude command fails. + + Returns: + dict: the dmg json command output converted to a python dictionary + + """ + return self._get_json_result( + ("system", "exclude"), ranks=ranks, rank_hosts=rank_hosts) + def system_start(self, ranks=None): """Start the system. @@ -1360,6 +1400,146 @@ def version(self): """ return self._get_json_result(("version",)) + def check_enable(self, pool=None, stop=True): + """Call dmg check enable. + + Args: + pool (str): Pool label or UUID. Defaults to None. + stop (bool): Stop the system first before enabling checker. Defaults to True. + + Returns: + dict: the dmg json command output converted to a python dictionary + + """ + if stop: + self.system_stop(force=True) + + return self._get_json_result(("check", "enable"), pool=pool) + + def check_set_policy(self, reset_defaults=False, all_interactive=False, policies=None): + """Call dmg check set-policy [options] [policies]. + + Args: + reset_defaults (bool, optional): Set all policies to default action. Defaults to False. + all_interactive (bool, optional): Set all policies to interactive. Defaults to False. + policies (str, optional): The policies for DAOS checker. Defaults to None. + + Returns: + dict: the dmg json command output converted to a python dictionary. + + """ + return self._get_json_result( + ("check", "set-policy"), reset_defaults=reset_defaults, + all_interactive=all_interactive, policies=policies) + + def check_repair(self, seq_num, action): + """Call dmg check repair. + + Args: + seq_num (str): Pool ID to repair. + action (str): Repair action number. + + Returns: + dict: the dmg json command output converted to a python dictionary + + """ + return self._get_json_result(("check", "repair"), seq_num=seq_num, action=action) + + def check_start(self, pool=None, dry_run=False, reset=False, failout=None, auto=None, + find_orphans=False, policies=None): + """Call dmg check start. + + Args: + pool (str): Pool label or UUID. Defaults to None. + dry_run (bool): Scan only; do not initiate repairs. Defaults to False. + reset (bool): Reset the system check state. Defaults to False. + failout (str): Stop on failure [on|off]. Defaults to None. + auto (str): Attempt to automatically repair problems [on|off]. Defaults to + None. + find_orphans (bool): Find orphaned pools. Defaults to False. + policies (str): Set repair policies. Defaults to None. + + Returns: + dict: the dmg json command output converted to a python dictionary + + """ + return self._get_json_result( + ("check", "start"), pool=pool, dry_run=dry_run, reset=reset, failout=failout, + auto=auto, find_orphans=find_orphans, policies=policies) + + def check_stop(self, pool=None): + """Call dmg check stop. + + Args: + pool (str): Pool label or UUID. Defaults to None. + + Returns: + dict: the dmg json command output converted to a python dictionary + + """ + return self._get_json_result(("check", "stop"), pool=pool) + + def check_query(self, pool=None): + """Call dmg check query. + + Args: + pool (str): Pool label or UUID. Defaults to None. + + Returns: + dict: the dmg json command output converted to a python dictionary + + """ + return self._get_json_result(("check", "query"), pool=pool) + + def check_disable(self, pool=None, start=True): + """Call dmg check disable. + + Args: + pool (str): Pool label or UUID. Defaults to None. + start (bool): Start the system after disabling checker. Defaults to True. + + Returns: + dict: the dmg json command output converted to a python dictionary + + """ + res = self._get_json_result(("check", "disable"), pool=pool) + + if start: + self.system_start() + + return res + + def faults_mgmt_svc_pool(self, pool, checker_report_class): + """Call dmg faults mgmt-svc pool + + Args: + pool (str): Pool label or UUID. + checker_report_class (str): Fault type to inject such as + CIC_POOL_NONEXIST_ON_MS. + + Returns: + dict: the dmg json command output converted to a python dictionary + + """ + return self._get_json_result( + ("faults", "mgmt-svc", "pool"), pool=pool, + checker_report_class=checker_report_class) + + def faults_pool_svc(self, pool, checker_report_class): + """Call dmg faults pool-svc + + Args: + pool (str): Pool label or UUID. + checker_report_class (str): Fault type to inject such as + CIC_POOL_NONEXIST_ON_ENGINE. + + Returns: + dict: the dmg json command output converted to a python dictionary + + """ + return self._get_json_result( + ("faults", "pool-svc"), pool=pool, checker_report_class=checker_report_class) + def check_system_query_status(data): """Check if any server crashed. diff --git a/src/tests/ftest/util/dmg_utils_base.py b/src/tests/ftest/util/dmg_utils_base.py index 773ff48ed2b..c3e7958d534 100644 --- a/src/tests/ftest/util/dmg_utils_base.py +++ b/src/tests/ftest/util/dmg_utils_base.py @@ -87,6 +87,8 @@ def get_sub_command_class(self): self.sub_command_class = self.ConfigSubCommand() elif self.sub_command.value == "cont": self.sub_command_class = self.ContSubCommand() + elif self.sub_command.value == "faults": + self.sub_command_class = self.FaultsSubCommand() elif self.sub_command.value == "network": self.sub_command_class = self.NetworkSubCommand() elif self.sub_command.value == "pool": @@ -126,6 +128,8 @@ def get_sub_command_class(self): self.sub_command_class = self.QuerySubCommand() elif self.sub_command.value == "repair": self.sub_command_class = self.RepairSubCommand() + elif self.sub_command.value == "set-policy": + self.sub_command_class = self.SetpolicySubCommand() elif self.sub_command.value == "start": self.sub_command_class = self.StartSubCommand() elif self.sub_command.value == "stop": @@ -175,6 +179,16 @@ def __init__(self): self.action = BasicParameter(None, position=2) self.for_all = FormattedParameter("--for-all", False) + class SetpolicySubCommand(CommandWithParameters): + """Defines an object for the dmg check set-policy command.""" + + def __init__(self): + """Create a dmg check set-policy object.""" + super().__init__("/run/dmg/check/start/*", "set-policy") + self.reset_defaults = FormattedParameter("--reset-defaults", False) + self.all_interactive = FormattedParameter("--all-interactive", False) + self.policies = FormattedParameter("--policies={}", None) + class StartSubCommand(CommandWithParameters): """Defines an object for the dmg check start command.""" @@ -184,8 +198,10 @@ def __init__(self): self.pool = BasicParameter(None, position=1) self.dry_run = FormattedParameter("--dry-run", False) self.reset = FormattedParameter("--reset", False) - self.failout = FormattedParameter("--failout", False) - self.auto = FormattedParameter("--auto", False) + self.failout = FormattedParameter("--failout={}", None) + self.auto = FormattedParameter("--auto={}", None) + self.find_orphans = FormattedParameter("--find-orphans", False) + self.policies = FormattedParameter("--policies={}", None) class StopSubCommand(CommandWithParameters): """Defines an object for the dmg check stop command.""" @@ -200,8 +216,7 @@ class ConfigSubCommand(CommandWithSubCommand): def __init__(self): """Create a dmg config subcommand object.""" - super(DmgCommandBase.ConfigSubCommand, self).__init__( - "run/dmg/config/*", "config") + super(DmgCommandBase.ConfigSubCommand, self).__init__("run/dmg/config/*", "config") def get_sub_command_class(self): # pylint: disable=redefined-variable-type @@ -218,10 +233,8 @@ def __init__(self): """Create a dmg config generate object.""" super( DmgCommandBase.ConfigSubCommand.GenerateSubCommand, - self).__init__( - "/run/dmg/config/generate/*", "generate") - self.access_points = FormattedParameter( - "--access-points={}", None) + self).__init__("/run/dmg/config/generate/*", "generate") + self.access_points = FormattedParameter("--access-points={}", None) self.num_engines = FormattedParameter("--num-engines={}", None) self.scm_only = FormattedParameter("--scm-only", False) self.net_class = FormattedParameter("--net-class={}", None) @@ -256,6 +269,77 @@ def __init__(self): self.user = FormattedParameter("--user={}", None) self.group = FormattedParameter("--group={}", None) + class FaultsSubCommand(CommandWithSubCommand): + """Defines an object for the dmg faults sub command.""" + + def __init__(self): + """Create a dmg faults subcommand object.""" + super(DmgCommandBase.FaultsSubCommand, self).__init__("run/dmg/faults/*", "faults") + + def get_sub_command_class(self): + # pylint: disable=redefined-variable-type + """Get the dmg faults sub command object.""" + if self.sub_command.value == "add-checker-report": + self.sub_command_class = self.AddCheckerReportSubCommand() + elif self.sub_command.value == "mgmt-svc": + self.sub_command_class = self.MgmtSvcSubCommand() + elif self.sub_command.value == "pool-svc": + self.sub_command_class = self.PoolSvcSubCommand() + else: + self.sub_command_class = None + + class AddCheckerReportSubCommand(CommandWithParameters): + """Defines an object for the dmg faults add-checker-report command.""" + + def __init__(self): + """Create a dmg faults add-checker-report object.""" + super( + DmgCommandBase.FaultsSubCommand.AddCheckerReportSubCommand, + self).__init__("/run/dmg/faults/add-checker-report/*", "add-checker-report") + self.file = FormattedParameter("--file={}", None) + self.checker_report_class = FormattedParameter("--class={}", None) + + class MgmtSvcSubCommand(CommandWithSubCommand): + """Defines an object for the dmg faults mgmt-svc command.""" + + def __init__(self): + """Create a dmg faults mgmt-svc object.""" + super( + DmgCommandBase.FaultsSubCommand.MgmtSvcSubCommand, + self).__init__("/run/dmg/faults/mgmt-svc/*", "mgmt-svc") + + def get_sub_command_class(self): + # pylint: disable=redefined-variable-type + """Get the dmg faults mgmt-svc sub command object.""" + if self.sub_command.value == "pool": + self.sub_command_class = self.PoolSubCommand() + else: + self.sub_command_class = None + + class PoolSubCommand(CommandWithParameters): + """Defines an object for the dmg faults mgmt-svc pool command.""" + + def __init__(self): + """Create a dmg faults mgmt-svc pool command object.""" + super().__init__("/run/dmg/faults/mgmt-svc/pool/*", "pool") + self.pool = BasicParameter(None, position=1) + self.checker_report_class = BasicParameter(None, position=2) + self.svcl = FormattedParameter("--svcl={}", None) + self.label = FormattedParameter("--label={}", None) + + class PoolSvcSubCommand(CommandWithParameters): + """Defines an object for the dmg faults pool-svc command.""" + + def __init__(self): + """Create a dmg faults pool-svc object.""" + super( + DmgCommandBase.FaultsSubCommand.PoolSvcSubCommand, + self).__init__("/run/dmg/faults/pool-svc/*", "pool-svc") + self.pool = BasicParameter(None, position=1) + self.checker_report_class = BasicParameter(None, position=2) + self.svcl = FormattedParameter("--svcl={}", None) + self.label = FormattedParameter("--label={}", None) + class NetworkSubCommand(CommandWithSubCommand): """Defines an object for the dmg network sub command.""" @@ -436,8 +520,7 @@ class OverwriteAclSubCommand(CommandWithParameters): def __init__(self): """Create a dmg pool overwrite-acl command object.""" - super().__init__( - "/run/dmg/pool/overwrite-acl/*", "overwrite-acl") + super().__init__("/run/dmg/pool/overwrite-acl/*", "overwrite-acl") self.pool = BasicParameter(None, position=1) self.acl_file = FormattedParameter("-a {}", None) @@ -749,8 +832,12 @@ def get_sub_command_class(self): """Get the dmg system sub command object.""" if self.sub_command.value == "cleanup": self.sub_command_class = self.CleanupSubCommand() + elif self.sub_command.value == "clear-exclude": + self.sub_command_class = self.ClearExcludeSubCommand() elif self.sub_command.value == "erase": self.sub_command_class = self.EraseSubCommand() + elif self.sub_command.value == "exclude": + self.sub_command_class = self.ExcludeSubCommand() elif self.sub_command.value == "leader-query": self.sub_command_class = self.LeaderQuerySubCommand() elif self.sub_command.value == "list-pools": @@ -773,21 +860,37 @@ def __init__(self): self.machinename = FormattedParameter("{}", None) self.verbose = FormattedParameter("--verbose", False) + class ClearExcludeSubCommand(CommandWithParameters): + """Defines an object for the dmg system clear-exclude command.""" + + def __init__(self): + """Create a dmg system clear-exclude command object.""" + super().__init__("/run/dmg/system/clear-exclude/*", "clear-exclude") + self.ranks = FormattedParameter("--ranks={}") + self.rank_hosts = FormattedParameter("--rank-hosts={}") + class EraseSubCommand(CommandWithParameters): """Defines an object for the dmg system erase command.""" def __init__(self): """Create a dmg system erase command object.""" - super().__init__( - "/run/dmg/system/erase/*", "erase") + super().__init__("/run/dmg/system/erase/*", "erase") + + class ExcludeSubCommand(CommandWithParameters): + """Defines an object for the dmg system exclude command.""" + + def __init__(self): + """Create a dmg system exclude command object.""" + super().__init__("/run/dmg/system/exclude/*", "exclude") + self.ranks = FormattedParameter("--ranks={}") + self.rank_hosts = FormattedParameter("--rank-hosts={}") class LeaderQuerySubCommand(CommandWithParameters): """Defines an object for the dmg system leader-query command.""" def __init__(self): """Create a dmg system leader-query command object.""" - super().__init__( - "/run/dmg/system/leader-query/*", "leader-query") + super().__init__("/run/dmg/system/leader-query/*", "leader-query") class ListPoolsSubCommand(CommandWithParameters): """Defines an object for the dmg system list-pools command.""" @@ -860,8 +963,7 @@ class ListSubCommand(CommandWithParameters): def __init__(self): """Create a dmg telemetry metrics list object.""" - super().__init__( - "/run/dmg/telemetry/metrics/list/*", "list") + super().__init__("/run/dmg/telemetry/metrics/list/*", "list") self.host = FormattedParameter("--host-list={}", None) self.port = FormattedParameter("--port={}", None) @@ -870,8 +972,7 @@ class QuerySubCommand(CommandWithParameters): def __init__(self): """Create a dmg telemetry metrics query object.""" - super().__init__( - "/run/dmg/telemetry/metrics/query/*", "query") + super().__init__("/run/dmg/telemetry/metrics/query/*", "query") self.host = FormattedParameter("--host-list={}", None) self.port = FormattedParameter("--port={}", None) self.metrics = FormattedParameter("--metrics={}", None) @@ -881,8 +982,7 @@ class VersionSubCommand(CommandWithSubCommand): def __init__(self): """Create a dmg version subcommand object.""" - super(DmgCommandBase.VersionSubCommand, self).__init__( - "/run/dmg/version/*", "version") + super(DmgCommandBase.VersionSubCommand, self).__init__("/run/dmg/version/*", "version") def _get_new(self): """Get a new object based upon this one. diff --git a/src/tests/ftest/util/recovery_test_base.py b/src/tests/ftest/util/recovery_test_base.py new file mode 100644 index 00000000000..c67fe46d7d3 --- /dev/null +++ b/src/tests/ftest/util/recovery_test_base.py @@ -0,0 +1,73 @@ +""" + (C) Copyright 2023 Intel Corporation. + + SPDX-License-Identifier: BSD-2-Clause-Patent +""" +import os +import time + +from apricot import TestWithServers +from ClusterShell.NodeSet import NodeSet +from run_utils import run_remote + + +class RecoveryTestBase(TestWithServers): + # pylint: disable=no-member + """Recovery test cases. + + Test Class Description: + Used for recovery tests. + + :avocado: recursive + """ + + def get_vos_file_path(self, pool): + """Get the VOS file path. + + If there are multiple VOS files, returns the first file obtained by "ls". + + Args: + pool (TestPool): Pool. + + Returns: + str: VOS file path such as /mnt/daos0//vos-0 + + """ + hosts = NodeSet(self.hostlist_servers[0]) + scm_mount = self.server_managers[0].get_config_value("scm_mount") + vos_path = os.path.join(scm_mount, pool.uuid.lower()) + command = " ".join(["sudo", "ls", vos_path]) + cmd_out = run_remote(log=self.log, hosts=hosts, command=command) + + # return vos_file + for file in cmd_out.output[0].stdout: + # Assume the VOS file has "vos" in the file name. + if "vos" in file: + self.log.info("vos_file: %s", file) + return file + + self.fail("vos file wasn't found in {}/{}".format(scm_mount, pool.uuid.lower())) + + return None # to appease pylint + + def wait_for_check_complete(self): + """Repeatedly call dmg check query until status becomes COMPLETED. + + If the status doesn't become COMPLETED, fail the test. + + Returns: + list: List of repair reports. + + """ + repair_reports = None + for _ in range(8): + check_query_out = self.get_dmg_command().check_query() + if check_query_out["response"]["status"] == "COMPLETED": + repair_reports = check_query_out["response"]["reports"] + break + time.sleep(5) + + if not repair_reports: + self.fail("Checker didn't detect or repair any inconsistency!") + + return repair_reports diff --git a/src/tests/ftest/util/server_utils.py b/src/tests/ftest/util/server_utils.py index 8824e89af5d..2be8cdb8b3b 100644 --- a/src/tests/ftest/util/server_utils.py +++ b/src/tests/ftest/util/server_utils.py @@ -1021,6 +1021,71 @@ def kill(self): # set stopped servers state to make teardown happy self.update_expected_states(None, ["stopped", "excluded", "errored"]) + @fail_on(CommandFailure) + def system_exclude(self, ranks, copy=False, rank_hosts=None): + """Exclude the specific server ranks. + + Args: + ranks (list): a list of daos server ranks (int) to exclude + copy (bool, optional): Copy dmg command. Defaults to False. + rank_hosts (str): hostlist representing hosts whose managed ranks are to be + operated on. + + Raises: + avocado.core.exceptions.TestFail: if there is an issue excluding the server + ranks. + + """ + msg = "Excluding DAOS ranks {} from server group {}".format( + ranks, self.get_config_value("name")) + self.log.info(msg) + + # Exclude desired ranks using dmg. + if copy: + self.dmg.copy().system_exclude( + ranks=list_to_str(value=ranks), rank_hosts=rank_hosts) + else: + self.dmg.system_exclude(ranks=list_to_str(value=ranks), rank_hosts=rank_hosts) + + # Update the expected status of the excluded ranks + self.update_expected_states(ranks, "adminexcluded") + + # Verify current state is adminexcluded. + self.check_rank_state(ranks=ranks, valid_states=["adminexcluded"]) + + @fail_on(CommandFailure) + def system_clear_exclude(self, ranks, copy=False, rank_hosts=None): + """Clear the exclusion of the specific server ranks. + + Args: + ranks (list): a list of daos server ranks (int) to clear the exclusion + copy (bool, optional): Copy dmg command. Defaults to False. + rank_hosts (str): hostlist representing hosts whose managed ranks are to be + operated on. + + Raises: + avocado.core.exceptions.TestFail: if there is an issue clearing the exclusion + of the server ranks. + + """ + msg = "Clear the exclusion for DAOS ranks {} from server group {}".format( + ranks, self.get_config_value("name")) + self.log.info(msg) + + # Clear the exclusion for desired ranks using dmg. + if copy: + self.dmg.copy().system_clear_exclude( + ranks=list_to_str(value=ranks), rank_hosts=rank_hosts) + else: + self.dmg.system_clear_exclude( + ranks=list_to_str(value=ranks), rank_hosts=rank_hosts) + + # Update the expected status of the excluded ranks + self.update_expected_states(ranks, "excluded") + + # Verify current state is excluded. + self.check_rank_state(ranks=ranks, valid_states=["excluded"]) + def get_host(self, rank): """Get the host name that matches the specified rank. diff --git a/src/tests/suite/SConscript b/src/tests/suite/SConscript index ec377e0977d..a118227fabd 100644 --- a/src/tests/suite/SConscript +++ b/src/tests/suite/SConscript @@ -41,7 +41,7 @@ def scons(): daos_obj_array.c daos_obj.c daos_oid_alloc.c daos_pool.c daos_rebuild.c daos_rebuild_common.c daos_rebuild_ec.c daos_rebuild_simple.c daos_test.c daos_verify_consistency.c - daos_aggregate_ec.c daos_degrade_ec.c + daos_aggregate_ec.c daos_degrade_ec.c daos_cr.c daos_extend_simple.c daos_obj_ec.c daos_upgrade.c daos_pipeline.c""") daostest = newenv.d_program('daos_test', c_files + daos_test_tgt, diff --git a/src/tests/suite/daos_cr.c b/src/tests/suite/daos_cr.c new file mode 100644 index 00000000000..ad401824df3 --- /dev/null +++ b/src/tests/suite/daos_cr.c @@ -0,0 +1,3591 @@ +/** + * (C) Copyright 2023 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ +/** + * This file is part of daos, basic testing for catastrophic recovery. + */ +#define D_LOGFAC DD_FAC(tests) +#include "daos_test.h" + +#include +#include +#include +#include +#include +#include + +#include + +/* + * Will enable accurate query result verification after DAOS-13520 resolved. + * #define CR_ACCURATE_QUERY_RESULT 1 + */ + +/* Start pool service may take sometime, let's wait for at most CR_WAIT_MAX * 2 seconds. */ +#define CR_WAIT_MAX (45) +/* 256MB for CR pool size. */ +#define CR_POOL_SIZE (1 << 28) + +struct test_cont { + uuid_t uuid; + char label[DAOS_PROP_LABEL_MAX_LEN]; +}; + +/* Instance Status */ + +static inline bool +cr_ins_status_init(const char *status) +{ + return status != NULL && strcmp(status, "INIT") == 0; +} + +static inline bool +cr_ins_status_running(const char *status) +{ + return status != NULL && strcmp(status, "RUNNING") == 0; +} + +static inline bool +cr_ins_status_completed(const char *status) +{ + return status != NULL && strcmp(status, "COMPLETED") == 0; +} + +static inline bool +cr_ins_status_stopped(const char *status) +{ + return status != NULL && strcmp(status, "STOPPED") == 0; +} + +static inline bool +cr_ins_status_failed(const char *status) +{ + return status != NULL && strcmp(status, "FAILED") == 0; +} + +static inline bool +cr_ins_status_paused(const char *status) +{ + return status != NULL && strcmp(status, "PAUSED") == 0; +} + +static inline bool +cr_ins_status_implicated(const char *status) +{ + return status != NULL && strcmp(status, "IMPLICATED") == 0; +} + +/* Instance Scan Phase */ + +static inline bool +cr_ins_phase_is_prepare(const char *phase) +{ + return phase != NULL && strcmp(phase, "PREPARE") == 0; +} + +static inline bool +cr_ins_phase_is_done(const char *phase) +{ + return phase != NULL && strcmp(phase, "DONE") == 0; +} + +/* Pool Status */ + +static inline bool +cr_pool_status_unchecked(const char *status) +{ + return status != NULL && strcmp(status, "CPS_UNCHECKED") == 0; +} + +static inline bool +cr_pool_status_checking(const char *status) +{ + return status != NULL && strcmp(status, "CPS_CHECKING") == 0; +} + +static inline bool +cr_pool_status_checked(const char *status) +{ + return status != NULL && strcmp(status, "CPS_CHECKED") == 0; +} + +static inline bool +cr_pool_status_failed(const char *status) +{ + return status != NULL && strcmp(status, "CPS_FAILED") == 0; +} + +static inline bool +cr_pool_status_paused(const char *status) +{ + return status != NULL && strcmp(status, "CPS_PAUSED") == 0; +} + +static inline bool +cr_pool_status_pending(const char *status) +{ + return status != NULL && strcmp(status, "CPS_PENDING") == 0; +} + +static inline bool +cr_pool_status_stopped(const char *status) +{ + return status != NULL && strcmp(status, "CPS_STOPPED") == 0; +} + +static inline bool +cr_pool_status_implicated(const char *status) +{ + return status != NULL && strcmp(status, "CPS_IMPLICATED") == 0; +} + +/* Pool Scan Phase */ + +static inline bool +cr_pool_phase_is_prepare(const char *phase) +{ + return phase != NULL && strcmp(phase, "CSP_PREPARE") == 0; +} + +static inline bool +cr_pool_phase_is_done(const char *phase) +{ + return phase != NULL && strcmp(phase, "CSP_DONE") == 0; +} + +static inline void +cr_dump_pools(uint32_t pool_nr, uuid_t uuids[]) +{ + int i; + + if (pool_nr > 0) { + print_message("For the following %d pool(s):\n", pool_nr); + for (i = 0; i < pool_nr; i++) + print_message(DF_UUIDF "\n", DP_UUID(uuids[i])); + } +} + +/* dmg command */ + +static inline int +cr_debug_set_params_internal(test_arg_t *arg, uint64_t fail_loc, bool nowait) +{ + int rc; + int i = 0; + + /* The system maybe just started, wait for a while for primary group initialization. */ + if (fail_loc != 0 && !nowait) + sleep(5); + + for (i = 0; i < 10; i++) { + rc = daos_debug_set_params(arg->group, -1, DMG_KEY_FAIL_LOC, fail_loc, 0, NULL); + if (rc == 0 || rc != -DER_TIMEDOUT || nowait) + break; + + sleep(2); + } + + print_message("CR: set fail_loc as " DF_X64 ": " DF_RC "\n", fail_loc, DP_RC(rc)); + + return rc; +} + +static inline int +cr_debug_set_params(test_arg_t *arg, uint64_t fail_loc) +{ + return cr_debug_set_params_internal(arg, fail_loc, false); +} + +static inline int +cr_debug_set_params_nowait(test_arg_t *arg, uint64_t fail_loc) +{ + return cr_debug_set_params_internal(arg, fail_loc, true); +} + +static inline int +cr_fault_inject(uuid_t uuid, bool mgmt, const char *fault) +{ + int rc; + + print_message("CR: injecting fault %s for pool " DF_UUID "\n", fault, DP_UUID(uuid)); + rc = dmg_fault_inject(dmg_config_file, uuid, mgmt, fault); + if (rc != 0) + print_message("CR: pool " DF_UUID " inject fault %s failed: "DF_RC"\n", + DP_UUID(uuid), fault, DP_RC(rc)); + + return rc; +} + +static inline int +cr_mode_switch(bool enable) +{ + print_message("CR: %s check mode\n", enable ? "enable" : "disable"); + return dmg_check_switch(dmg_config_file, enable); +} + +static inline int +cr_system_start(void) +{ + print_message("CR: starting system ...\n"); + return dmg_system_start_rank(dmg_config_file, CRT_NO_RANK); +} + +static inline int +cr_system_stop(bool force) +{ + print_message("CR: stopping system with %s ...\n", force ? "force" : "non-force"); + return dmg_system_stop_rank(dmg_config_file, CRT_NO_RANK, force); +} + +static inline int +cr_rank_reint(uint32_t rank, bool start) +{ + int rc; + + print_message("CR: reintegrating the rank %u ...\n", rank); + rc = dmg_system_reint_rank(dmg_config_file, rank); + if (rc != 0) + return rc; + + if (start) { + print_message("CR: starting the rank %u ...\n", rank); + rc = dmg_system_start_rank(dmg_config_file, rank); + } + + return rc; +} + +static inline int +cr_rank_exclude(test_arg_t *arg, struct test_pool *pool, int *rank) +{ + int count; + int rc; + int i; + int j; + + D_ASSERT(pool->svc != NULL); + + /* + * The check leader (elected by control plane, usually on rank 0) and + * PS leader maybe on different ranks, do not exclude such two ranks. + */ + count = pool->svc->rl_nr + 2; + if (!test_runable(arg, count)) { + print_message("Need enough targets (%u/%u vs %d) for test, skip\n", + arg->srv_nnodes, arg->srv_ntgts, count); + return 1; + } + + for (i = 1, *rank = -1; i < count && *rank < 0; i++) { + for (j = 0; j < pool->svc->rl_nr; j++) { + if (pool->svc->rl_ranks[j] == i) + break; + } + + if (j >= pool->svc->rl_nr) + *rank = i; + } + + D_ASSERT(*rank >= 0); + + rc = cr_debug_set_params(arg, DAOS_CHK_ENGINE_DEATH | DAOS_FAIL_ALWAYS); + if (rc != 0) + return rc; + + print_message("CR: stopping the rank %d ...\n", *rank); + rc = dmg_system_stop_rank(dmg_config_file, *rank, false); + if (rc != 0) + return rc; + + /* The *rank is stopped, that may cause set_params to timeout, do not wait. */ + cr_debug_set_params_nowait(arg, 0); + + print_message("CR: excluding the rank %d ...\n", *rank); + return dmg_system_exclude_rank(dmg_config_file, *rank); +} + +static inline int +cr_check_start(uint32_t flags, uint32_t pool_nr, uuid_t uuids[], const char *policies) +{ + print_message("CR: starting checker with flags %x, policies %s ...\n", flags, + policies != NULL ? policies : "(null)"); + cr_dump_pools(pool_nr, uuids); + + return dmg_check_start(dmg_config_file, flags, pool_nr, uuids, policies); +} + +static inline int +cr_check_stop(uint32_t pool_nr, uuid_t uuids[]) +{ + print_message("CR: stopping checker ...\n"); + cr_dump_pools(pool_nr, uuids); + return dmg_check_stop(dmg_config_file, pool_nr, uuids); +} + +static inline int +cr_check_query(uint32_t pool_nr, uuid_t uuids[], struct daos_check_info *dci) +{ + print_message("CR: query checker ...\n"); + cr_dump_pools(pool_nr, uuids); + return dmg_check_query(dmg_config_file, pool_nr, uuids, dci); +} + +static inline int +cr_check_repair(uint64_t seq, uint32_t opt, bool for_all) +{ + print_message("CR: handle check interaction for seq %lu, option %u ...\n", + (unsigned long)seq, opt); + return dmg_check_repair(dmg_config_file, seq, opt, for_all); +} + +static inline int +cr_check_set_policy(uint32_t flags, const char *policies) +{ + print_message("CR: set checker policy with flags %x, policy %s ...\n", + flags, policies != NULL ? policies : "(null)"); + return dmg_check_set_policy(dmg_config_file, flags, policies); +} + +static struct daos_check_report_info * +cr_locate_dcri(struct daos_check_info *dci, struct daos_check_report_info *base, uuid_t uuid) +{ + struct daos_check_report_info *last = &dci->dci_reports[dci->dci_report_nr - 1]; + struct daos_check_report_info *dcri = NULL; + bool found = false; + + if (base != NULL) + dcri = base + 1; + else + dcri = &dci->dci_reports[0]; + + while (dcri <= last) { + if (uuid_compare(dcri->dcri_uuid, uuid) == 0) { + found = true; + break; + } + + dcri++; + } + + D_ASSERTF(found, "Cannot found inconsistency report for "DF_UUIDF"\n", DP_UUID(uuid)); + + return dcri; +} + +static void +cr_dci_fini(struct daos_check_info *dci) +{ + int i; + + D_FREE(dci->dci_status); + D_FREE(dci->dci_phase); + + if (dci->dci_pools != NULL) { + for (i = 0; i < dci->dci_pool_nr; i++) { + D_FREE(dci->dci_pools[i].dcpi_status); + D_FREE(dci->dci_pools[i].dcpi_phase); + } + + D_FREE(dci->dci_pools); + } + + D_FREE(dci->dci_reports); +} + +static void +cr_cleanup(test_arg_t *arg, struct test_pool *pools, uint32_t nr) +{ + int rc; + int i; + + for (i = 0; i < nr; i++) { + d_rank_list_free(pools[i].svc); + d_rank_list_free(pools[i].alive_svc); + D_FREE(pools[i].label); + + if (uuid_is_null(pools[i].pool_uuid) || pools[i].destroyed) + continue; + + if (daos_handle_is_valid(pools[i].poh)) { + print_message("CR: disconnecting pool " DF_UUID "\n", + DP_UUID(pools[i].pool_uuid)); + /* + * The connection may have already been evicted by checker. So disconnect() + * may fail. It is not fatal as long as there is not corruption. + */ + daos_pool_disconnect(pools[i].poh, NULL); + } + + rc = dmg_pool_destroy(dmg_config_file, pools[i].pool_uuid, arg->group, 1); + if (rc != 0 && rc != -DER_NONEXIST && rc != -DER_MISC) + print_message("CR: dmg_pool_destroy failed: "DF_RC"\n", DP_RC(rc)); + } +} + +static void +cr_ins_wait(uint32_t pool_nr, uuid_t uuids[], struct daos_check_info *dci) +{ + int rc; + int i; + + print_message("CR: waiting check instance ...\n"); + + for (i = 0; i < CR_WAIT_MAX; i++) { + cr_dci_fini(dci); + + rc = dmg_check_query(dmg_config_file, pool_nr, uuids, dci); + assert_rc_equal(rc, 0); + + if (!cr_ins_status_init(dci->dci_status) && !cr_ins_status_running(dci->dci_status)) + break; + + sleep(2); + } +} + +static void +cr_pool_wait(uint32_t pool_nr, uuid_t uuids[], struct daos_check_info *dci) +{ + int rc; + int i; + + print_message("CR: waiting check pool ...\n"); + cr_dump_pools(pool_nr, uuids); + + for (i = 0; i < CR_WAIT_MAX; i++) { + cr_dci_fini(dci); + + rc = dmg_check_query(dmg_config_file, pool_nr, uuids, dci); + assert_rc_equal(rc, 0); + + if (!cr_ins_status_init(dci->dci_status) && dci->dci_pools != NULL && + !cr_pool_status_checking(dci->dci_pools[0].dcpi_status)) + break; + + sleep(2); + } +} + +static int +cr_ins_verify(struct daos_check_info *dci, uint32_t exp_status) +{ + print_message("CR: verify instance status, expected %u\n", exp_status); + + switch (exp_status) { + case TCIS_INIT: + if (!cr_ins_status_init(dci->dci_status)) { + print_message("CR instance status %s is not init\n", dci->dci_status); + return -DER_INVAL; + } + if (!cr_ins_phase_is_prepare(dci->dci_phase)) { + print_message("CR instance phase %s is not prepare\n", dci->dci_phase); + return -DER_INVAL; + } + break; + case TCIS_RUNNING: + if (!cr_ins_status_running(dci->dci_status)) { + print_message("CR instance status %s is not running\n", dci->dci_status); + return -DER_INVAL; + } + break; + case TCIS_COMPLETED: + if (!cr_ins_status_completed(dci->dci_status)) { + print_message("CR instance status %s is not completed\n", dci->dci_status); + return -DER_INVAL; + } + if (!cr_ins_phase_is_done(dci->dci_phase)) { + print_message("CR instance phase %s is not done\n", dci->dci_phase); + return -DER_INVAL; + } + break; + case TCIS_STOPPED: + if (!cr_ins_status_stopped(dci->dci_status)) { + print_message("CR instance status %s is not stopped\n", dci->dci_status); + return -DER_INVAL; + } + if (cr_ins_phase_is_done(dci->dci_phase)) { + print_message("CR instance phase should not be done\n"); + return -DER_INVAL; + } + break; + case TCIS_FAILED: + if (!cr_ins_status_failed(dci->dci_status)) { + print_message("CR instance status %s is not failed\n", dci->dci_status); + return -DER_INVAL; + } + if (cr_ins_phase_is_done(dci->dci_phase)) { + print_message("CR instance phase should not be done\n"); + return -DER_INVAL; + } + break; + case TCIS_PAUSED: + if (!cr_ins_status_paused(dci->dci_status)) { + print_message("CR instance status %s is not paused\n", dci->dci_status); + return -DER_INVAL; + } + if (cr_ins_phase_is_done(dci->dci_phase)) { + print_message("CR instance phase should not be done\n"); + return -DER_INVAL; + } + break; + case TCIS_IMPLICATED: + if (!cr_ins_status_implicated(dci->dci_status)) { + print_message("CR instance status %s is not implicated\n", dci->dci_status); + return -DER_INVAL; + } + if (cr_ins_phase_is_done(dci->dci_phase)) { + print_message("CR instance phase should not be done\n"); + return -DER_INVAL; + } + break; + default: + print_message("CR: invalid expected instance status %d\n", exp_status); + break; + } + + return 0; +} + +static int +cr_pool_verify(struct daos_check_info *dci, uuid_t uuid, uint32_t exp_status, + uint32_t inconsistency_nr, uint32_t *classes, uint32_t *actions, int *exp_results) +{ + struct daos_check_pool_info *dcpi; + struct daos_check_report_info *dcri; + int result; + int i; + int j; + + print_message("CR: verify pool " DF_UUID " status, expected %u, inconsistency_nr %u\n", + DP_UUID(uuid), exp_status, inconsistency_nr); + + if (dci->dci_pool_nr != 1) { + print_message("CR pool count %d (pool " DF_UUID ") is not 1\n", + dci->dci_pool_nr, DP_UUID(uuid)); + return -DER_INVAL; + } + + dcpi = &dci->dci_pools[0]; + D_ASSERTF(uuid_compare(dcpi->dcpi_uuid, uuid) == 0, + "Unmatched pool UUID (1): " DF_UUID " vs " DF_UUID "\n", + DP_UUID(dcpi->dcpi_uuid), DP_UUID(uuid)); + + switch (exp_status) { + case TCPS_UNCHECKED: + if (!cr_pool_status_unchecked(dcpi->dcpi_status)) { + print_message("CR pool " DF_UUID " status %s is not unchecked\n", + DP_UUID(uuid), dcpi->dcpi_status); + return -DER_INVAL; + } + if (!cr_pool_phase_is_prepare(dcpi->dcpi_phase)) { + print_message("CR pool " DF_UUID " phase %s is not prepare\n", + DP_UUID(uuid), dcpi->dcpi_phase); + return -DER_INVAL; + } + break; + case TCPS_CHECKING: + if (!cr_pool_status_checking(dcpi->dcpi_status)) { + print_message("CR pool " DF_UUID " status %s is not checking\n", + DP_UUID(uuid), dcpi->dcpi_status); + return -DER_INVAL; + } + break; + case TCPS_CHECKED: + if (!cr_pool_status_checked(dcpi->dcpi_status)) { + print_message("CR pool " DF_UUID " status %s is not checked\n", + DP_UUID(uuid), dcpi->dcpi_status); + return -DER_INVAL; + } + if (inconsistency_nr == 0 && !cr_pool_phase_is_done(dcpi->dcpi_phase)) { + print_message("CR pool " DF_UUID " phase %s is not done\n", + DP_UUID(uuid), dcpi->dcpi_phase); + return -DER_INVAL; + } + break; + case TCPS_FAILED: + if (!cr_pool_status_failed(dcpi->dcpi_status)) { + print_message("CR pool " DF_UUID " status %s is not failed\n", + DP_UUID(uuid), dcpi->dcpi_status); + return -DER_INVAL; + } + if (cr_pool_phase_is_done(dcpi->dcpi_phase)) { + print_message("CR pool " DF_UUID " phase should not be done\n", + DP_UUID(uuid)); + return -DER_INVAL; + } + break; + case TCPS_PAUSED: + if (!cr_pool_status_paused(dcpi->dcpi_status)) { + print_message("CR pool " DF_UUID " status %s is not paused\n", + DP_UUID(uuid), dcpi->dcpi_status); + return -DER_INVAL; + } + if (cr_pool_phase_is_done(dcpi->dcpi_phase)) { + print_message("CR pool " DF_UUID " phase should not be done\n", + DP_UUID(uuid)); + return -DER_INVAL; + } + break; + case TCPS_PENDING: + if (!cr_pool_status_pending(dcpi->dcpi_status)) { + print_message("CR pool " DF_UUID " status %s is not pending\n", + DP_UUID(uuid), dcpi->dcpi_status); + return -DER_INVAL; + } + if (cr_pool_phase_is_done(dcpi->dcpi_phase)) { + print_message("CR pool " DF_UUID " phase should not be done\n", + DP_UUID(uuid)); + return -DER_INVAL; + } + break; + case TCPS_STOPPED: + if (!cr_pool_status_stopped(dcpi->dcpi_status)) { + print_message("CR pool " DF_UUID " status %s is not stopped\n", + DP_UUID(uuid), dcpi->dcpi_status); + return -DER_INVAL; + } + if (cr_pool_phase_is_done(dcpi->dcpi_phase)) { + print_message("CR pool " DF_UUID " phase should not be done\n", + DP_UUID(uuid)); + return -DER_INVAL; + } + break; + case TCPS_IMPLICATED: + if (!cr_pool_status_implicated(dcpi->dcpi_status)) { + print_message("CR pool " DF_UUID " status %s is not implicated\n", + DP_UUID(uuid), dcpi->dcpi_status); + return -DER_INVAL; + } + if (cr_pool_phase_is_done(dcpi->dcpi_phase)) { + print_message("CR pool " DF_UUID " phase should not be done\n", + DP_UUID(uuid)); + return -DER_INVAL; + } + break; + default: + print_message("CR: invalid expected pool status %d\n", exp_status); + break; + } + +#ifdef CR_ACCURATE_QUERY_RESULT + if (dci->dci_report_nr != inconsistency_nr) { + print_message("CR pool " DF_UUID " has unexpected reports: %d vs %d\n", + DP_UUID(uuid), dci->dci_report_nr, inconsistency_nr); + return -DER_INVAL; + } +#endif + + for (i = 0, j = 0; i < dci->dci_report_nr && j < inconsistency_nr; i++) { + dcri = &dci->dci_reports[i]; + if (uuid_compare(dcri->dcri_uuid, uuid) != 0) { +#ifdef CR_ACCURATE_QUERY_RESULT + print_message("Detect unrelated inconsistency report: " + DF_UUID " vs " DF_UUID "\n", + DP_UUID(dcpi->dcpi_uuid), DP_UUID(uuid)); + return -DER_INVAL; +#else + continue; +#endif + } + + if (dcri->dcri_class != classes[j]) { + print_message("CR pool " DF_UUID " reports unexpected inconsistency at " + "%d/%d: %u vs %u\n", + DP_UUID(uuid), i, j, dcri->dcri_class, classes[j]); + return -DER_INVAL; + } + + if (dcri->dcri_act != actions[j]) { + print_message("CR pool " DF_UUID " reports unexpected solution at %d/%d: " + "%u vs %u\n", + DP_UUID(uuid), i, j, dcri->dcri_act, actions[j]); + return -DER_INVAL; + } + + if (exp_results != NULL) + result = exp_results[j]; + else + result = 0; + + if (dcri->dcri_result != result) { + print_message("CR pool " DF_UUID " unexpected result at %d/%d: %d vs %d\n", + DP_UUID(uuid), i, j, dcri->dcri_result, result); + return -DER_INVAL; + } + + j++; + } + + if (j != inconsistency_nr) { + print_message("CR pool " DF_UUID " miss some inconsistency reports: %d vs %d\n", + DP_UUID(uuid), j, inconsistency_nr); + return -DER_INVAL; + } + + return 0; +} + +static int +cr_pool_create(void **state, struct test_pool *pool, bool connect, uint32_t fault) +{ + test_arg_t *arg = *state; + char *ptr; + int rc; + + pool->pool_size = CR_POOL_SIZE; + print_message("CR: creating pool ...\n"); + rc = test_setup_pool_create(state, NULL, pool, NULL); + if (rc != 0) { + print_message("CR: pool creation failed: "DF_RC"\n", DP_RC(rc)); + return rc; + } + + print_message("CR: getting label for pool " DF_UUID "\n", DP_UUID(pool->pool_uuid)); + rc = dmg_pool_get_prop(dmg_config_file, NULL, pool->pool_uuid, "label", &pool->label); + if (rc != 0) { + print_message("CR: pool " DF_UUID " get label failed: "DF_RC"\n", + DP_UUID(pool->pool_uuid), DP_RC(rc)); + return rc; + } + + if (connect) { + print_message("CR: connecting pool " DF_UUID "\n", DP_UUID(pool->pool_uuid)); + rc = daos_pool_connect(pool->pool_str, arg->group, DAOS_PC_RW, &pool->poh, NULL, + NULL); + if (rc != 0) { + print_message("CR: pool " DF_UUID " connect failed: "DF_RC"\n", + DP_UUID(pool->pool_uuid), DP_RC(rc)); + return rc; + } + + if (arg->srv_ntgts == 0) { + daos_pool_info_t info = {0}; + + rc = daos_pool_query(pool->poh, NULL, &info, NULL, NULL); + if (rc != 0) { + print_message("CR: pool " DF_UUID " query failed: "DF_RC"\n", + DP_UUID(pool->pool_uuid), DP_RC(rc)); + return rc; + } + + arg->srv_ntgts = info.pi_ntargets; + arg->srv_nnodes = info.pi_nnodes; + arg->srv_disabled_ntgts = info.pi_ndisabled; + } + } + + switch (fault) { + case TCC_NONE: + break; + case TCC_POOL_NONEXIST_ON_MS: + rc = cr_fault_inject(pool->pool_uuid, true, "CIC_POOL_NONEXIST_ON_MS"); + break; + case TCC_POOL_NONEXIST_ON_ENGINE: + rc = cr_fault_inject(pool->pool_uuid, false, "CIC_POOL_NONEXIST_ON_ENGINE"); + break; + case TCC_POOL_BAD_LABEL: + rc = cr_fault_inject(pool->pool_uuid, true, "CIC_POOL_BAD_LABEL"); + if (rc == 0) { + rc = strlen(pool->label); + D_REALLOC(ptr, pool->label, rc, rc + 7); + if (ptr == NULL) { + print_message("CR: pool " DF_UUID " refresh label failed\n", + DP_UUID(pool->pool_uuid)); + rc = -DER_NOMEM; + } else { + strcat(ptr, "-fault"); + pool->label = ptr; + rc = 0; + } + } + break; + default: + print_message("CR: invalid type %d for pool " DF_UUID " fault injection\n", fault, + DP_UUID(pool->pool_uuid)); + rc = -DER_INVAL; + break; + } + + return rc; +} + +static int +cr_pool_create_with_svc(void **state, struct test_pool *pool, bool connect, uint32_t fault) +{ + pool->svc = d_rank_list_alloc(1); + if (pool->svc == NULL) { + print_message("CR: failed to create svc list for create pool\n"); + return -DER_NOMEM; + } + + return cr_pool_create(state, pool, connect, fault); +} + +static int +cr_cont_create(void **state, struct test_pool *pool, struct test_cont *cont, int fault) +{ + char uuid_str[DAOS_UUID_STR_SIZE]; + test_arg_t *arg = *state; + daos_prop_t *prop = NULL; + daos_handle_t coh; + int fd; + int rc; + int rc1; + + strncpy(cont->label, "/tmp/cr_cont_XXXXXX", sizeof(cont->label) - 1); + fd = mkstemp(cont->label); + if (fd < 0) { + print_message("CR: cont generate label failed: %s\n", strerror(errno)); + return d_errno2der(errno); + } + + close(fd); + unlink(cont->label); + + /* Move cr_cont_XXXXXX (including the terminated '\0') ahead to overwrite '/tmp/' */ + memmove(cont->label, &cont->label[5], strlen(cont->label) - 4); + print_message("CR: creating container ...\n"); + if (fault >= 0) + rc = daos_cont_create_with_label(pool->poh, cont->label, NULL, &cont->uuid, NULL); + else + rc = daos_cont_create(pool->poh, &cont->uuid, NULL, NULL); + if (rc != 0) { + print_message("CR: cont creation failed: "DF_RC"\n", DP_RC(rc)); + return rc; + } + + if (fault != 0) { + print_message("CR: opening container " DF_UUID " ...\n", DP_UUID(cont->uuid)); + if (fault < 0) { + uuid_unparse_lower(cont->uuid, uuid_str); + rc = daos_cont_open(pool->poh, uuid_str, DAOS_COO_RW, &coh, NULL, NULL); + } else { + rc = daos_cont_open(pool->poh, cont->label, DAOS_COO_RW, &coh, NULL, NULL); + } + if (rc != 0) { + print_message("CR: cont " DF_UUID " open failed: "DF_RC"\n", + DP_UUID(cont->uuid), DP_RC(rc)); + return rc; + } + + /* Inject fail_loc to generate inconsistent container label. */ + rc = cr_debug_set_params(arg, DAOS_CHK_CONT_BAD_LABEL | DAOS_FAIL_ALWAYS); + assert_rc_equal(rc, 0); + + prop = daos_prop_alloc(1); + assert_non_null(prop); + + /* cont->label is large enough to hold the new label. */ + D_ASSERT(sizeof(cont->label) > strlen(cont->label) + 7); + strcat(cont->label, "-fault"); + prop->dpp_entries[0].dpe_type = DAOS_PROP_CO_LABEL; + D_STRNDUP(prop->dpp_entries[0].dpe_str, cont->label, strlen(cont->label)); + + print_message("CR: set label for container " DF_UUID "\n", DP_UUID(cont->uuid)); + rc = daos_cont_set_prop(coh, prop, NULL); + if (rc != 0) + print_message("CR: cont " DF_UUID " set label failed: "DF_RC"\n", + DP_UUID(cont->uuid), DP_RC(rc)); + + daos_prop_free(prop); + cr_debug_set_params(arg, 0); + + print_message("CR: closing container " DF_UUID " ...\n", DP_UUID(cont->uuid)); + rc1 = daos_cont_close(coh, NULL); + if (rc1 != 0) { + print_message("CR: cont " DF_UUID " close failed: "DF_RC"\n", + DP_UUID(cont->uuid), DP_RC(rc1)); + if (rc == 0) + rc = rc1; + } + } + + return rc; +} + +static int +cr_cont_get_label(void **state, struct test_pool *pool, struct test_cont *cont, bool connect, + char **label) +{ + char uuid_str[DAOS_UUID_STR_SIZE]; + test_arg_t *arg = *state; + daos_prop_t *prop = NULL; + daos_handle_t coh; + int rc; + int rc1; + + if (connect) { + print_message("CR: connecting pool " DF_UUID "\n", DP_UUID(pool->pool_uuid)); + rc = daos_pool_connect(pool->pool_str, arg->group, DAOS_PC_RW, &pool->poh, NULL, + NULL); + if (rc != 0) { + print_message("CR: pool " DF_UUID " connect failed: "DF_RC"\n", + DP_UUID(pool->pool_uuid), DP_RC(rc)); + return rc; + } + } + + print_message("CR: opening container " DF_UUID " ...\n", DP_UUID(cont->uuid)); + uuid_unparse_lower(cont->uuid, uuid_str); + rc = daos_cont_open(pool->poh, uuid_str, DAOS_COO_RW, &coh, NULL, NULL); + if (rc != 0) { + print_message("CR: cont " DF_UUID " open failed: "DF_RC"\n", + DP_UUID(cont->uuid), DP_RC(rc)); + return rc; + } + + prop = daos_prop_alloc(1); + assert_non_null(prop); + + prop->dpp_entries[0].dpe_type = DAOS_PROP_CO_LABEL; + print_message("CR: getting label for container " DF_UUID "\n", DP_UUID(cont->uuid)); + + rc = daos_cont_query(coh, NULL, prop, NULL); + if (rc != 0) + print_message("CR: cont " DF_UUID " set label failed: "DF_RC"\n", + DP_UUID(cont->uuid), DP_RC(rc)); + else + D_STRNDUP(*label, prop->dpp_entries[0].dpe_str, + strlen(prop->dpp_entries[0].dpe_str)); + + daos_prop_free(prop); + + print_message("CR: closing container " DF_UUID " ...\n", DP_UUID(cont->uuid)); + rc1 = daos_cont_close(coh, NULL); + if (rc1 != 0) { + print_message("CR: cont " DF_UUID " close failed: "DF_RC"\n", + DP_UUID(cont->uuid), DP_RC(rc1)); + if (rc == 0) + rc = rc1; + } + + /* + * Do not disconnect the pool that may be reused by subsequent operation. cr_cleanup() will + * handle that finally. + */ + + return rc; +} + +/* Test Cases. */ + +/* + * 1. Create pool1, pool2 and pool3. + * 2. Fault injection to generate inconsistent pool label for all of them. + * 3. Start checker on pool1 and pool2. + * 4. Query checker, pool1 and pool2 should have been repaired, pool3 should not be repaired. + * 5. Switch to normal mode and verify the labels. + * 6. Cleanup. + */ +static void +cr_start_specified(void **state) +{ + test_arg_t *arg = *state; + struct test_pool pools[3] = { 0 }; + uuid_t uuids[3] = { 0 }; + struct daos_check_info dcis[3] = { 0 }; + char *label = NULL; + uint32_t class = TCC_POOL_BAD_LABEL; + uint32_t action = TCA_TRUST_MS; + int rc; + int i; + + print_message("CR1: start checker for specified pools\n"); + + for (i = 0; i < 3; i++) { + rc = cr_pool_create(state, &pools[i], false, class); + assert_rc_equal(rc, 0); + + uuid_copy(uuids[i], pools[i].pool_uuid); + } + + rc = cr_system_stop(false); + assert_rc_equal(rc, 0); + + rc = cr_mode_switch(true); + assert_rc_equal(rc, 0); + + rc = cr_check_start(TCSF_RESET, 2, uuids, NULL); + assert_rc_equal(rc, 0); + + cr_ins_wait(1, &uuids[0], &dcis[0]); + + for (i = 1; i < 3; i++) { + rc = cr_check_query(1, &uuids[i], &dcis[i]); + assert_rc_equal(rc, 0); + } + + for (i = 0; i < 3; i++) { + rc = cr_ins_verify(&dcis[i], TCIS_COMPLETED); + assert_rc_equal(rc, 0); + } + + for (i = 0; i < 2; i++) { + rc = cr_pool_verify(&dcis[i], uuids[i], TCPS_CHECKED, 1, &class, &action, NULL); + assert_rc_equal(rc, 0); + } + + rc = cr_pool_verify(&dcis[2], uuids[2], TCPS_UNCHECKED, 0, NULL, NULL, NULL); + assert_rc_equal(rc, 0); + + rc = cr_mode_switch(false); + assert_rc_equal(rc, 0); + + rc = cr_system_start(); + assert_rc_equal(rc, 0); + + for (i = 0; i < 3; i++) { + print_message("CR: getting label for pool " DF_UUID " after check\n", + DP_UUID(pools[i].pool_uuid)); + rc = dmg_pool_get_prop(dmg_config_file, pools[i].label, pools[i].pool_uuid, "label", + &label); + assert_rc_equal(rc, 0); + + if (i < 2) + D_ASSERTF(strcmp(label, pools[i].label) == 0, + "Pool (" DF_UUID ") label is not repaired: %s vs %s\n", + DP_UUID(pools[i].pool_uuid), label, pools[i].label); + else + D_ASSERTF(strcmp(label, pools[i].label) != 0, + "Pool (" DF_UUID ") label should not be repaired: %s\n", + DP_UUID(pools[i].pool_uuid), label); + + D_FREE(label); + cr_dci_fini(&dcis[i]); + } + + cr_cleanup(arg, pools, 3); +} + +/* + * 1. Create pool. + * 2. Fault injection to make pool as orphan. + * 3. Start checker with POOL_NONEXIST_ON_MS:CIA_INTERACT. + * 4. Query checker, should show interaction. + * 5. Check repair with re-add the orphan pool. + * 6. Query checker, orphan pool should have been repaired. + * 7. Switch to normal mode and verify the pool. + * 8. Cleanup. + */ +static void +cr_leader_interaction(void **state) +{ + test_arg_t *arg = *state; + struct test_pool pool = { 0 }; + daos_mgmt_pool_info_t mgmt_pool = { 0 }; + struct daos_check_info dci = { 0 }; + struct daos_check_report_info *dcri; + uint32_t class = TCC_POOL_NONEXIST_ON_MS; + uint32_t action; + daos_size_t pool_nr = 1; + int rc; + int i; + + print_message("CR2: check leader side interaction\n"); + + rc = cr_pool_create(state, &pool, false, class); + assert_rc_equal(rc, 0); + + rc = cr_system_stop(false); + assert_rc_equal(rc, 0); + + rc = cr_mode_switch(true); + assert_rc_equal(rc, 0); + + rc = cr_check_start(TCSF_RESET, 0, NULL, "POOL_NONEXIST_ON_MS:CIA_INTERACT"); + assert_rc_equal(rc, 0); + + cr_pool_wait(1, &pool.pool_uuid, &dci); + + rc = cr_ins_verify(&dci, TCIS_RUNNING); + assert_rc_equal(rc, 0); + + action = TCA_INTERACT; + rc = cr_pool_verify(&dci, pool.pool_uuid, TCPS_PENDING, 1, &class, &action, NULL); + assert_rc_equal(rc, 0); + + dcri = cr_locate_dcri(&dci, NULL, pool.pool_uuid); + action = TCA_READD; + rc = -DER_MISC; + + for (i = 0; i < dcri->dcri_option_nr; i++) { + if (dcri->dcri_options[i] == action) { + rc = cr_check_repair(dcri->dcri_seq, i, false); + break; + } + } + assert_rc_equal(rc, 0); + + cr_ins_wait(1, &pool.pool_uuid, &dci); + + rc = cr_ins_verify(&dci, TCIS_COMPLETED); + assert_rc_equal(rc, 0); + + rc = cr_pool_verify(&dci, pool.pool_uuid, TCPS_CHECKED, 1, &class, &action, NULL); + assert_rc_equal(rc, 0); + + rc = cr_mode_switch(false); + assert_rc_equal(rc, 0); + + rc = cr_system_start(); + assert_rc_equal(rc, 0); + + rc = dmg_pool_list(dmg_config_file, arg->group, &pool_nr, &mgmt_pool); + assert_rc_equal(rc, 0); + + assert_rc_equal(pool_nr, 1); + D_ASSERTF(uuid_compare(pool.pool_uuid, mgmt_pool.mgpi_uuid) == 0, + "Unmatched pool UUID: " DF_UUID " vs " DF_UUID "\n", + DP_UUID(pool.pool_uuid), DP_UUID(mgmt_pool.mgpi_uuid)); + + cr_dci_fini(&dci); + clean_pool_info(1, &mgmt_pool); + cr_cleanup(arg, &pool, 1); +} + +/* + * 1. Create pool and container. + * 2. Fault injection to make container label inconsistent. + * 3. Start checker with CONT_BAD_LABEL:CIA_INTERACT + * 4. Query checker, should show interaction. + * 5. Check repair the container label with trust PS (pool/container service). + * 6. Query checker, container label should have been repaired. + * 7. Switch to normal mode and verify the container label. + * 8. Cleanup. + */ +static void +cr_engine_interaction(void **state) +{ + test_arg_t *arg = *state; + struct test_pool pool = { 0 }; + struct test_cont cont = { 0 }; + struct daos_check_info dci = { 0 }; + struct daos_check_report_info *dcri; + char *label = NULL; + uint32_t class = TCC_CONT_BAD_LABEL; + uint32_t action; + int rc; + int i; + + print_message("CR3: check engine side interaction\n"); + + rc = cr_pool_create(state, &pool, true, TCC_NONE); + assert_rc_equal(rc, 0); + + rc = cr_cont_create(state, &pool, &cont, 1); + assert_rc_equal(rc, 0); + + rc = cr_system_stop(false); + assert_rc_equal(rc, 0); + + rc = cr_mode_switch(true); + assert_rc_equal(rc, 0); + + rc = cr_check_start(TCSF_RESET, 0, NULL, "CONT_BAD_LABEL:CIA_INTERACT"); + assert_rc_equal(rc, 0); + + cr_pool_wait(1, &pool.pool_uuid, &dci); + + rc = cr_ins_verify(&dci, TCIS_RUNNING); + assert_rc_equal(rc, 0); + + action = TCA_INTERACT; + rc = cr_pool_verify(&dci, pool.pool_uuid, TCPS_PENDING, 1, &class, &action, NULL); + assert_rc_equal(rc, 0); + + dcri = cr_locate_dcri(&dci, NULL, pool.pool_uuid); + action = TCA_TRUST_PS; + rc = -DER_MISC; + + for (i = 0; i < dcri->dcri_option_nr; i++) { + if (dcri->dcri_options[i] == action) { + rc = cr_check_repair(dcri->dcri_seq, i, false); + break; + } + } + assert_rc_equal(rc, 0); + + cr_ins_wait(1, &pool.pool_uuid, &dci); + + rc = cr_ins_verify(&dci, TCIS_COMPLETED); + assert_rc_equal(rc, 0); + + rc = cr_pool_verify(&dci, pool.pool_uuid, TCPS_CHECKED, 1, &class, &action, NULL); + assert_rc_equal(rc, 0); + + rc = cr_mode_switch(false); + assert_rc_equal(rc, 0); + + rc = cr_system_start(); + assert_rc_equal(rc, 0); + + /* Former connection for the pool has been evicted by checkre. Let's re-connect the pool. */ + rc = cr_cont_get_label(state, &pool, &cont, true, &label); + assert_rc_equal(rc, 0); + + D_ASSERTF(strcmp(label, cont.label) == 0, + "Cont (" DF_UUID ") label is not repaired: %s vs %s\n", + DP_UUID(cont.uuid), label, cont.label); + + D_FREE(label); + cr_dci_fini(&dci); + cr_cleanup(arg, &pool, 1); +} + +/* + * 1. Create pool1 and pool2. + * 2. Fault injection to make inconsistent label for both of them. + * 3. Start checker on pool1 and pool2 with POOL_BAD_LABEL:CIA_INTERACT + * 4. Query checker, should show interaction. + * 5. Check repair pool1's label with trust PS (trust MS is the default) and "for-all" option. + * 6. Query checker, should be completed, both pool1 and pool2 label should have been repaired. + * 7. Switch to normal mode and verify pools' labels. + * 8. Cleanup. + */ +static void +cr_repair_forall_leader(void **state) +{ + test_arg_t *arg = *state; + struct test_pool pools[2] = { 0 }; + struct daos_check_info dci = { 0 }; + struct daos_check_report_info *dcri; + char *ps_label = NULL; + char *ptr; + char ms_label[DAOS_PROP_LABEL_MAX_LEN]; + uint32_t class = TCC_POOL_BAD_LABEL; + uint32_t action; + int rc; + int i; + + print_message("CR4: check repair option - for-all, on leader\n"); + + for (i = 0; i < 2; i++) { + rc = cr_pool_create(state, &pools[i], false, class); + assert_rc_equal(rc, 0); + } + + rc = cr_system_stop(false); + assert_rc_equal(rc, 0); + + rc = cr_mode_switch(true); + assert_rc_equal(rc, 0); + + rc = cr_check_start(TCSF_RESET, 0, NULL, "POOL_BAD_LABEL:CIA_INTERACT"); + assert_rc_equal(rc, 0); + + cr_pool_wait(1, &pools[0].pool_uuid, &dci); + + rc = cr_ins_verify(&dci, TCIS_RUNNING); + assert_rc_equal(rc, 0); + + action = TCA_INTERACT; + rc = cr_pool_verify(&dci, pools[0].pool_uuid, TCPS_PENDING, 1, &class, &action, NULL); + assert_rc_equal(rc, 0); + + dcri = cr_locate_dcri(&dci, NULL, pools[0].pool_uuid); + action = TCA_TRUST_PS; + rc = -DER_MISC; + + for (i = 0; i < dcri->dcri_option_nr; i++) { + if (dcri->dcri_options[i] == action) { + rc = cr_check_repair(dcri->dcri_seq, i, true); + break; + } + } + assert_rc_equal(rc, 0); + + for (i = 0; i < 2; i++) { + cr_ins_wait(1, &pools[i].pool_uuid, &dci); + + rc = cr_ins_verify(&dci, TCIS_COMPLETED); + assert_rc_equal(rc, 0); + + rc = cr_pool_verify(&dci, pools[i].pool_uuid, TCPS_CHECKED, 1, &class, &action, + NULL); + assert_rc_equal(rc, 0); + } + + rc = cr_mode_switch(false); + assert_rc_equal(rc, 0); + + rc = cr_system_start(); + assert_rc_equal(rc, 0); + + for (i = 0; i < 2; i++) { + /* The last 6 characters of pools[i].label is '-fault'. */ + ptr = strrchr(pools[i].label, '-'); + assert_non_null(ptr); + + memcpy(ms_label, pools[i].label, ptr - pools[i].label); + ms_label[ptr - pools[i].label] = '\0'; + + print_message("CR: getting label for pool " DF_UUID " after check\n", + DP_UUID(pools[i].pool_uuid)); + rc = dmg_pool_get_prop(dmg_config_file, ms_label, pools[i].pool_uuid, "label", + &ps_label); + assert_rc_equal(rc, 0); + + D_ASSERTF(strcmp(ps_label, ms_label) == 0, + "Pool (" DF_UUID ") label is not repaired: %s vs %s\n", + DP_UUID(pools[i].pool_uuid), ps_label, ms_label); + D_FREE(ps_label); + } + + cr_dci_fini(&dci); + cr_cleanup(arg, pools, 2); +} + +/* + * 1. Create pool1 and pool2. Create container under both of them. + * 2. Fault injection to make inconsistent container label for both of them. + * 3. Start checker on pool1 and pool2 with CONT_BAD_LABEL:CIA_INTERACT + * 4. Query checker, should show interaction. + * 5. Check repair pool1/cont's label with trust target (trust PS/CS is the default) and "for-all". + * 6. Query checker, should be completed, both containers' label should have been repaired. + * 7. Switch to normal mode and verify containers' labels. + * 8. Cleanup. + */ +static void +cr_repair_forall_engine(void **state) +{ + test_arg_t *arg = *state; + struct test_pool pools[2] = { 0 }; + struct test_cont conts[2] = { 0 }; + struct daos_check_info dci = { 0 }; + struct daos_check_report_info *dcri; + char *target_label = NULL; + char *ptr; + char ps_label[DAOS_PROP_LABEL_MAX_LEN]; + uint32_t class = TCC_CONT_BAD_LABEL; + uint32_t action; + int rc; + int i; + + print_message("CR5: check repair option - for-all, on engine\n"); + + for (i = 0; i < 2; i++) { + rc = cr_pool_create(state, &pools[i], true, TCC_NONE); + assert_rc_equal(rc, 0); + + rc = cr_cont_create(state, &pools[i], &conts[i], 1); + assert_rc_equal(rc, 0); + } + + rc = cr_system_stop(false); + assert_rc_equal(rc, 0); + + rc = cr_mode_switch(true); + assert_rc_equal(rc, 0); + + rc = cr_check_start(TCSF_RESET, 0, NULL, "CONT_BAD_LABEL:CIA_INTERACT"); + assert_rc_equal(rc, 0); + + cr_pool_wait(1, &pools[0].pool_uuid, &dci); + + rc = cr_ins_verify(&dci, TCIS_RUNNING); + assert_rc_equal(rc, 0); + + action = TCA_INTERACT; + rc = cr_pool_verify(&dci, pools[0].pool_uuid, TCPS_PENDING, 1, &class, &action, NULL); + assert_rc_equal(rc, 0); + + dcri = cr_locate_dcri(&dci, NULL, pools[0].pool_uuid); + action = TCA_TRUST_TARGET; + rc = -DER_MISC; + + for (i = 0; i < dcri->dcri_option_nr; i++) { + if (dcri->dcri_options[i] == action) { + rc = cr_check_repair(dcri->dcri_seq, i, true); + break; + } + } + assert_rc_equal(rc, 0); + + for (i = 0; i < 2; i++) { + cr_ins_wait(1, &pools[i].pool_uuid, &dci); + + rc = cr_ins_verify(&dci, TCIS_COMPLETED); + assert_rc_equal(rc, 0); + + rc = cr_pool_verify(&dci, pools[i].pool_uuid, TCPS_CHECKED, 1, &class, &action, + NULL); + assert_rc_equal(rc, 0); + } + + rc = cr_mode_switch(false); + assert_rc_equal(rc, 0); + + rc = cr_system_start(); + assert_rc_equal(rc, 0); + + for (i = 0; i < 2; i++) { + /* The last 6 characters of conts[i].label is '-fault'. */ + ptr = strrchr(conts[i].label, '-'); + assert_non_null(ptr); + + memcpy(ps_label, conts[i].label, ptr - conts[i].label); + ps_label[ptr - conts[i].label] = '\0'; + + rc = cr_cont_get_label(state, &pools[i], &conts[i], true, &target_label); + assert_rc_equal(rc, 0); + + D_ASSERTF(strcmp(target_label, ps_label) == 0, + "Cont (" DF_UUID ") label is not repaired: %s vs %s\n", + DP_UUID(conts[i].uuid), target_label, ps_label); + D_FREE(target_label); + } + + cr_dci_fini(&dci); + cr_cleanup(arg, pools, 2); +} + +/* + * 1. Create pool. + * 2. Fault injection to generate inconsistent pool label. + * 3. Start checker with POOL_NONEXIST_ON_MS:CIA_INTERACT. + * 4. Query checker, should show interaction. + * 5. Stop checker. + * 6. Query checker, instance should be stopped. + * 7. Switch to normal mode to verify the pool label that should not be repaired. + * 8. Cleanup. + */ +static void +cr_stop_leader_interaction(void **state) +{ + test_arg_t *arg = *state; + struct test_pool pool = { 0 }; + struct daos_check_info dci = { 0 }; + char *label = NULL; + uint32_t class = TCC_POOL_BAD_LABEL; + uint32_t action = TCA_INTERACT; + int rc; + + print_message("CR6: stop checker with pending check leader interaction\n"); + + rc = cr_pool_create(state, &pool, false, class); + assert_rc_equal(rc, 0); + + rc = cr_system_stop(false); + assert_rc_equal(rc, 0); + + rc = cr_mode_switch(true); + assert_rc_equal(rc, 0); + + rc = cr_check_start(TCSF_RESET, 0, NULL, "POOL_BAD_LABEL:CIA_INTERACT"); + assert_rc_equal(rc, 0); + + cr_pool_wait(1, &pool.pool_uuid, &dci); + + rc = cr_ins_verify(&dci, TCIS_RUNNING); + assert_rc_equal(rc, 0); + + rc = cr_pool_verify(&dci, pool.pool_uuid, TCPS_PENDING, 1, &class, &action, NULL); + assert_rc_equal(rc, 0); + + rc = cr_check_stop(0, NULL); + assert_rc_equal(rc, 0); + + cr_ins_wait(1, &pool.pool_uuid, &dci); + + rc = cr_ins_verify(&dci, TCIS_STOPPED); + assert_rc_equal(rc, 0); + + rc = cr_pool_verify(&dci, pool.pool_uuid, TCPS_STOPPED, 1, &class, &action, NULL); + assert_rc_equal(rc, 0); + + rc = cr_mode_switch(false); + assert_rc_equal(rc, 0); + + rc = cr_system_start(); + assert_rc_equal(rc, 0); + + print_message("CR: getting label for pool " DF_UUID " after check\n", + DP_UUID(pool.pool_uuid)); + rc = dmg_pool_get_prop(dmg_config_file, pool.label, pool.pool_uuid, "label", &label); + assert_rc_equal(rc, 0); + + D_ASSERTF(strcmp(label, pool.label) != 0, + "Pool (" DF_UUID ") label should not be repaired: %s\n", + DP_UUID(pool.pool_uuid), label); + + D_FREE(label); + cr_dci_fini(&dci); + cr_cleanup(arg, &pool, 1); +} + +/* + * 1. Create pool and container. + * 2. Fault injection to make container label inconsistent. + * 3. Start checker with CONT_BAD_LABEL:CIA_INTERACT + * 4. Query checker, should show interaction. + * 5. Stop checker. + * 6. Query checker, instance should be stopped. + * 7. Switch to normal mode to verify the container label that should not be repaired. + * 8. Cleanup. + */ +static void +cr_stop_engine_interaction(void **state) +{ + test_arg_t *arg = *state; + struct test_pool pool = { 0 }; + struct test_cont cont = { 0 }; + struct daos_check_info dci = { 0 }; + char *label = NULL; + uint32_t class = TCC_CONT_BAD_LABEL; + uint32_t action = TCA_INTERACT; + int rc; + + print_message("CR7: stop checker with pending check engine interaction\n"); + + rc = cr_pool_create(state, &pool, true, TCC_NONE); + assert_rc_equal(rc, 0); + + rc = cr_cont_create(state, &pool, &cont, 1); + assert_rc_equal(rc, 0); + + rc = cr_system_stop(false); + assert_rc_equal(rc, 0); + + rc = cr_mode_switch(true); + assert_rc_equal(rc, 0); + + rc = cr_check_start(TCSF_RESET, 0, NULL, "CONT_BAD_LABEL:CIA_INTERACT"); + assert_rc_equal(rc, 0); + + cr_pool_wait(1, &pool.pool_uuid, &dci); + + rc = cr_ins_verify(&dci, TCIS_RUNNING); + assert_rc_equal(rc, 0); + + rc = cr_pool_verify(&dci, pool.pool_uuid, TCPS_PENDING, 1, &class, &action, NULL); + assert_rc_equal(rc, 0); + + rc = cr_check_stop(0, NULL); + assert_rc_equal(rc, 0); + + cr_ins_wait(1, &pool.pool_uuid, &dci); + + rc = cr_ins_verify(&dci, TCIS_STOPPED); + assert_rc_equal(rc, 0); + + rc = cr_pool_verify(&dci, pool.pool_uuid, TCPS_STOPPED, 1, &class, &action, NULL); + assert_rc_equal(rc, 0); + + rc = cr_mode_switch(false); + assert_rc_equal(rc, 0); + + rc = cr_system_start(); + assert_rc_equal(rc, 0); + + /* Former connection for the pool has been evicted by checkre. Let's re-connect the pool. */ + rc = cr_cont_get_label(state, &pool, &cont, true, &label); + assert_rc_equal(rc, 0); + + D_ASSERTF(strcmp(label, cont.label) != 0, + "Cont (" DF_UUID ") label should not be repaired: %s\n", + DP_UUID(cont.uuid), label); + + D_FREE(label); + cr_dci_fini(&dci); + cr_cleanup(arg, &pool, 1); +} + +/* + * 1. Create pool1, pool2 and pool3. + * 2. Fault injection to generate inconsistent pool label for all of them. + * 3. Start checker on pools with BAD_POOL_LABEL:CIA_INTERACT. + * 4. Query checker, should show interaction. + * 5. Stop checker on pool1 and pool2. + * 6. Query checker, instance should still run, but checking of pool1 and pool2 should be stopped. + * 7. Check repair pool3's label with trust MS. + * 8. Query checker, instance should be completed. + * 9. Switch to normal mode to verify the labels: + * pool1 and pool2 should not be fixed, pool3 should have been fixed. + * 10. Cleanup. + */ +static void +cr_stop_specified(void **state) +{ + test_arg_t *arg = *state; + struct test_pool pools[3] = { 0 }; + uuid_t uuids[3] = { 0 }; + struct daos_check_info dcis[3] = { 0 }; + struct daos_check_report_info *dcri; + char *label = NULL; + uint32_t class = TCC_POOL_BAD_LABEL; + uint32_t action; + int rc; + int i; + + print_message("CR8: stop checker for specified pools\n"); + + for (i = 0; i < 3; i++) { + rc = cr_pool_create(state, &pools[i], false, class); + assert_rc_equal(rc, 0); + + uuid_copy(uuids[i], pools[i].pool_uuid); + } + + rc = cr_system_stop(false); + assert_rc_equal(rc, 0); + + rc = cr_mode_switch(true); + assert_rc_equal(rc, 0); + + rc = cr_check_start(TCSF_RESET, 0, NULL, "POOL_BAD_LABEL:CIA_INTERACT"); + assert_rc_equal(rc, 0); + + action = TCA_INTERACT; + for (i = 0; i < 3; i++) { + cr_pool_wait(1, &uuids[i], &dcis[i]); + + rc = cr_ins_verify(&dcis[i], TCIS_RUNNING); + assert_rc_equal(rc, 0); + + rc = cr_pool_verify(&dcis[i], uuids[i], TCPS_PENDING, 1, &class, &action, NULL); + assert_rc_equal(rc, 0); + } + + rc = cr_check_stop(2, uuids); + assert_rc_equal(rc, 0); + + for (i = 0; i < 3; i++) { + cr_dci_fini(&dcis[i]); + rc = cr_check_query(1, &uuids[i], &dcis[i]); + assert_rc_equal(rc, 0); + } + + for (i = 0; i < 2; i++) { + rc = cr_ins_verify(&dcis[i], TCIS_RUNNING); + assert_rc_equal(rc, 0); + + rc = cr_pool_verify(&dcis[i], uuids[i], TCPS_STOPPED, 1, &class, &action, NULL); + assert_rc_equal(rc, 0); + } + + rc = cr_ins_verify(&dcis[2], TCIS_RUNNING); + assert_rc_equal(rc, 0); + + rc = cr_pool_verify(&dcis[2], uuids[2], TCPS_PENDING, 1, &class, &action, NULL); + assert_rc_equal(rc, 0); + + dcri = cr_locate_dcri(&dcis[2], NULL, uuids[2]); + action = TCA_TRUST_MS; + rc = -DER_MISC; + + for (i = 0; i < dcri->dcri_option_nr; i++) { + if (dcri->dcri_options[i] == action) { + rc = cr_check_repair(dcri->dcri_seq, i, false); + break; + } + } + assert_rc_equal(rc, 0); + + cr_ins_wait(1, &uuids[2], &dcis[2]); + + rc = cr_ins_verify(&dcis[2], TCIS_COMPLETED); + assert_rc_equal(rc, 0); + + rc = cr_pool_verify(&dcis[2], uuids[2], TCPS_CHECKED, 1, &class, &action, NULL); + assert_rc_equal(rc, 0); + + rc = cr_mode_switch(false); + assert_rc_equal(rc, 0); + + rc = cr_system_start(); + assert_rc_equal(rc, 0); + + for (i = 0; i < 3; i++) { + print_message("CR: getting label for pool " DF_UUID " after check\n", + DP_UUID(pools[i].pool_uuid)); + rc = dmg_pool_get_prop(dmg_config_file, pools[i].label, pools[i].pool_uuid, "label", + &label); + assert_rc_equal(rc, 0); + + if (i > 1) + D_ASSERTF(strcmp(label, pools[i].label) == 0, + "Pool (" DF_UUID ") label is not repaired: %s vs %s\n", + DP_UUID(pools[i].pool_uuid), label, pools[i].label); + else + D_ASSERTF(strcmp(label, pools[i].label) != 0, + "Pool (" DF_UUID ") label should not be repaired: %s\n", + DP_UUID(pools[i].pool_uuid), label); + + D_FREE(label); + cr_dci_fini(&dcis[i]); + } + + cr_cleanup(arg, pools, 3); +} + +/* + * 1. Create pool. + * 2. Fault injection to make the pool as orphan. + * 3. Start checker with POOL_NONEXIST_ON_MS:CIA_IGNORE + * 4. Query checker, instance should be completed, but orphan pool is ignored. + * 5. Restart checker with specified pool uuid and POOL_NONEXIST_ON_MS:CIA_INTERACT. + * 6. Query checker, that should show interaction for the orphan pool. + * 7. Check repair with ignore the orphan pool. + * 8. Restart checker with POOL_NONEXIST_ON_MS:CIA_DEFAULT but not specify pool uuid. + * 9. Query checker, the orphan pool should have been repaired. + * 10. Switch to normal mode and verify the pool. + * 11. Cleanup. + */ +static void +cr_auto_reset(void **state) +{ + test_arg_t *arg = *state; + struct test_pool pool = { 0 }; + daos_mgmt_pool_info_t mgmt_pool = { 0 }; + struct daos_check_info dci = { 0 }; + struct daos_check_report_info *dcri; + uint32_t class = TCC_POOL_NONEXIST_ON_MS; + uint32_t action; + daos_size_t pool_nr = 1; + int rc; + int i; + + print_message("CR9: reset checker automatically if former instance completed\n"); + + rc = cr_pool_create(state, &pool, false, class); + assert_rc_equal(rc, 0); + + rc = cr_system_stop(false); + assert_rc_equal(rc, 0); + + rc = cr_mode_switch(true); + assert_rc_equal(rc, 0); + + rc = cr_check_start(TCSF_RESET, 0, NULL, "POOL_NONEXIST_ON_MS:CIA_IGNORE"); + assert_rc_equal(rc, 0); + + cr_ins_wait(1, &pool.pool_uuid, &dci); + + rc = cr_ins_verify(&dci, TCIS_COMPLETED); + assert_rc_equal(rc, 0); + + action = TCA_IGNORE; + rc = cr_pool_verify(&dci, pool.pool_uuid, TCPS_CHECKED, 1, &class, &action, NULL); + assert_rc_equal(rc, 0); + + rc = cr_check_start(TCSF_NONE, 1, &pool.pool_uuid, "POOL_NONEXIST_ON_MS:CIA_INTERACT"); + assert_rc_equal(rc, 0); + + cr_pool_wait(1, &pool.pool_uuid, &dci); + + rc = cr_ins_verify(&dci, TCIS_RUNNING); + assert_rc_equal(rc, 0); + + action = TCA_INTERACT; + rc = cr_pool_verify(&dci, pool.pool_uuid, TCPS_PENDING, 1, &class, &action, NULL); + assert_rc_equal(rc, 0); + + dcri = cr_locate_dcri(&dci, NULL, pool.pool_uuid); + action = TCA_IGNORE; + rc = -DER_MISC; + + for (i = 0; i < dcri->dcri_option_nr; i++) { + if (dcri->dcri_options[i] == action) { + rc = cr_check_repair(dcri->dcri_seq, i, false); + break; + } + } + assert_rc_equal(rc, 0); + + cr_ins_wait(1, &pool.pool_uuid, &dci); + + rc = cr_ins_verify(&dci, TCIS_COMPLETED); + assert_rc_equal(rc, 0); + + rc = cr_pool_verify(&dci, pool.pool_uuid, TCPS_CHECKED, 1, &class, &action, NULL); + assert_rc_equal(rc, 0); + + rc = cr_check_start(TCSF_NONE, 0, NULL, "POOL_NONEXIST_ON_MS:CIA_DEFAULT"); + assert_rc_equal(rc, 0); + + cr_ins_wait(1, &pool.pool_uuid, &dci); + + rc = cr_ins_verify(&dci, TCIS_COMPLETED); + assert_rc_equal(rc, 0); + + action = TCA_READD; + rc = cr_pool_verify(&dci, pool.pool_uuid, TCPS_CHECKED, 1, &class, &action, NULL); + assert_rc_equal(rc, 0); + + rc = cr_mode_switch(false); + assert_rc_equal(rc, 0); + + rc = cr_system_start(); + assert_rc_equal(rc, 0); + + rc = dmg_pool_list(dmg_config_file, arg->group, &pool_nr, &mgmt_pool); + assert_rc_equal(rc, 0); + + assert_rc_equal(pool_nr, 1); + D_ASSERTF(uuid_compare(pool.pool_uuid, mgmt_pool.mgpi_uuid) == 0, + "Unmatched pool UUID: " DF_UUID " vs " DF_UUID "\n", + DP_UUID(pool.pool_uuid), DP_UUID(mgmt_pool.mgpi_uuid)); + + cr_dci_fini(&dci); + clean_pool_info(1, &mgmt_pool); + cr_cleanup(arg, &pool, 1); +} + +static void +cr_pause(void **state, bool force) +{ + test_arg_t *arg = *state; + struct test_pool pool = { 0 }; + struct daos_check_info dci = { 0 }; + uint32_t class = TCC_POOL_BAD_LABEL; + uint32_t action = TCA_INTERACT; + int rc; + + rc = cr_pool_create(state, &pool, false, class); + assert_rc_equal(rc, 0); + + rc = cr_system_stop(false); + assert_rc_equal(rc, 0); + + rc = cr_mode_switch(true); + assert_rc_equal(rc, 0); + + rc = cr_check_start(TCSF_RESET, 0, NULL, "POOL_BAD_LABEL:CIA_INTERACT"); + assert_rc_equal(rc, 0); + + cr_pool_wait(1, &pool.pool_uuid, &dci); + + rc = cr_ins_verify(&dci, TCIS_RUNNING); + assert_rc_equal(rc, 0); + + rc = cr_pool_verify(&dci, pool.pool_uuid, TCPS_PENDING, 1, &class, &action, NULL); + assert_rc_equal(rc, 0); + + rc = cr_system_stop(force); + assert_rc_equal(rc, 0); + + rc = cr_system_start(); + assert_rc_equal(rc, 0); + + /* Sleep for a while after system re-started under check mode. */ + sleep(5); + + cr_dci_fini(&dci); + rc = cr_check_query(1, &pool.pool_uuid, &dci); + assert_rc_equal(rc, 0); + + rc = cr_ins_verify(&dci, TCIS_PAUSED); + assert_rc_equal(rc, 0); + + /* Only show the old repair information. */ + rc = cr_pool_verify(&dci, pool.pool_uuid, TCPS_PAUSED, 1, &class, &action, NULL); + assert_rc_equal(rc, 0); + + rc = cr_mode_switch(false); + assert_rc_equal(rc, 0); + + rc = cr_system_start(); + assert_rc_equal(rc, 0); + + cr_dci_fini(&dci); + cr_cleanup(arg, &pool, 1); +} + +/* + * 1. Create pool. + * 2. Fault injection to generate inconsistent pool label. + * 3. Start checker with "-p POOL_BAD_LABEL:CIA_INTERACT". + * 4. Query checker, it will show the interaction. + * 5. Stop the system, that will pause the check instance. + * 6. Start the system and query the checker, it should show 'pause' status. + * 7. Switch to normal mode and cleanup. + */ +static void +cr_shutdown(void **state) +{ + print_message("CR10: checker shutdown\n"); + + cr_pause(state, false); +} + +/* + * 1. Create pool. + * 2. Fault injection to generate inconsistent pool label. + * 3. Start checker with "-p POOL_BAD_LABEL:CIA_INTERACT". + * 4. Query checker, it will show the interaction. + * 5. Stop the system by force, that will stop the check instance without cleanup. + * 6. Start the system and query the checker, it should show 'pause' status. + * 7. Switch to normal mode and cleanup. + */ +static void +cr_crash(void **state) +{ + print_message("CR11: checker crash\n"); + + cr_pause(state, true); +} + +/* + * 1. Create pool. + * 2. Fault injection to make the pool as orphan. + * 3. Set fail_loc to make check leader to be blocked after CHK__CHECK_SCAN_PHASE__CSP_POOL_LIST. + * 4. Start checker. + * 5. Query checker, it will show that the orphan pool has been repaired. + * 6. Switch to normal mode that will pause the check instance. + * 7. Start the system. + * 8. Fault injection to make the pool as orphan again. + * 9. Start checker again without any option. + * 10. Query checker, it will only show the old repair information, the new orphan inconsistency + * should be skipped. + * 11. Switch to normal mode. + * 12. Verify the pool is still orphan. + * 13. Reset fail_loc and cleanup. + */ +static void +cr_leader_resume(void **state) +{ + test_arg_t *arg = *state; + struct test_pool pool = { 0 }; + daos_mgmt_pool_info_t mgmt_pool = { 0 }; + struct daos_check_info dci = { 0 }; + uint32_t class = TCC_POOL_NONEXIST_ON_MS; + uint32_t action = TCA_READD; + daos_size_t pool_nr = 1; + int rc; + + print_message("CR12: check leader resume from former stop/paused phase\n"); + + rc = cr_pool_create(state, &pool, false, class); + assert_rc_equal(rc, 0); + + rc = cr_system_stop(false); + assert_rc_equal(rc, 0); + + rc = cr_mode_switch(true); + assert_rc_equal(rc, 0); + + /* Inject fail_loc to block pool ult and wait for the pause signal. */ + rc = cr_debug_set_params(arg, DAOS_CHK_LEADER_BLOCK | DAOS_FAIL_ALWAYS); + assert_rc_equal(rc, 0); + + rc = cr_check_start(TCSF_RESET, 0, NULL, NULL); + assert_rc_equal(rc, 0); + + cr_pool_wait(1, &pool.pool_uuid, &dci); + + rc = cr_ins_verify(&dci, TCIS_RUNNING); + assert_rc_equal(rc, 0); + + rc = cr_pool_verify(&dci, pool.pool_uuid, TCPS_CHECKING, 1, &class, &action, NULL); + assert_rc_equal(rc, 0); + + rc = cr_mode_switch(false); + assert_rc_equal(rc, 0); + + rc = cr_system_start(); + assert_rc_equal(rc, 0); + + rc = cr_fault_inject(pool.pool_uuid, true, "CIC_POOL_NONEXIST_ON_MS"); + assert_rc_equal(rc, 0); + + rc = cr_system_stop(false); + assert_rc_equal(rc, 0); + + rc = cr_mode_switch(true); + assert_rc_equal(rc, 0); + + rc = cr_check_start(TCSF_NONE, 0, NULL, NULL); + assert_rc_equal(rc, 0); + + cr_ins_wait(1, &pool.pool_uuid, &dci); + + rc = cr_ins_verify(&dci, TCIS_COMPLETED); + assert_rc_equal(rc, 0); + + /* Only show the old repair information. */ + rc = cr_pool_verify(&dci, pool.pool_uuid, TCPS_CHECKED, 1, &class, &action, NULL); + assert_rc_equal(rc, 0); + + rc = cr_mode_switch(false); + assert_rc_equal(rc, 0); + + rc = cr_system_start(); + assert_rc_equal(rc, 0); + + rc = dmg_pool_list(dmg_config_file, arg->group, &pool_nr, &mgmt_pool); + assert_rc_equal(rc, 0); + + /* No pool will be found since the pool become orphan again and is not repaired. */ + assert_rc_equal(pool_nr, 0); + + /* The following is for cleanup, include the repairing of orphan pool before destroy. */ + + cr_debug_set_params(arg, 0); + + rc = cr_system_stop(false); + assert_rc_equal(rc, 0); + + rc = cr_mode_switch(true); + assert_rc_equal(rc, 0); + + rc = cr_check_start(TCSF_RESET, 0, NULL, NULL); + assert_rc_equal(rc, 0); + + cr_ins_wait(1, &pool.pool_uuid, &dci); + + rc = cr_ins_verify(&dci, TCIS_COMPLETED); + assert_rc_equal(rc, 0); + + rc = cr_pool_verify(&dci, pool.pool_uuid, TCPS_CHECKED, 1, &class, &action, NULL); + assert_rc_equal(rc, 0); + + rc = cr_mode_switch(false); + assert_rc_equal(rc, 0); + + rc = cr_system_start(); + assert_rc_equal(rc, 0); + + cr_dci_fini(&dci); + cr_cleanup(arg, &pool, 1); +} + +/* + * 1. Create pool. + * 2. Fault injection to generate inconsistent pool label. + * 3. Set fail_loc to make check engine to be blocked after CHK__CHECK_SCAN_PHASE__CSP_POOL_CLEANUP. + * 4. Start checker with option "-p POOL_BAD_LABEL:CIA_TRUST_PS". + * 5. Query checker, it will show that the inconsistent pool label has been repaired. + * 6. Switch to normal mode that will pause the check instance. + * 7. Start the system. + * 8. Fault injection to make the pool label to be inconsistent again. + * 9. Start checker again without any option. + * 10. Query checker, it will only show the old repair information, the new inconsistent pool label + * should be skipped. + * 11. Switch to normal mode. + * 12. Verify the pool label is still inconsistent since related phase is skipped. + * 13. Reset fail_loc and cleanup. + */ +static void +cr_engine_resume(void **state) +{ + test_arg_t *arg = *state; + struct test_pool pool = { 0 }; + struct daos_check_info dci = { 0 }; + char *label = NULL; + uint32_t class = TCC_POOL_BAD_LABEL; + uint32_t action = TCA_TRUST_PS; + int rc; + + print_message("CR13: check engine resume from former stop/paused phase\n"); + + rc = cr_pool_create(state, &pool, false, class); + assert_rc_equal(rc, 0); + + rc = cr_system_stop(false); + assert_rc_equal(rc, 0); + + rc = cr_mode_switch(true); + assert_rc_equal(rc, 0); + + /* Inject fail_loc to block pool ult and wait for the pause signal. */ + rc = cr_debug_set_params(arg, DAOS_CHK_LEADER_BLOCK | DAOS_FAIL_ALWAYS); + assert_rc_equal(rc, 0); + + rc = cr_check_start(TCSF_RESET, 0, NULL, "POOL_BAD_LABEL:CIA_TRUST_PS"); + assert_rc_equal(rc, 0); + + cr_pool_wait(1, &pool.pool_uuid, &dci); + + rc = cr_ins_verify(&dci, TCIS_RUNNING); + assert_rc_equal(rc, 0); + + rc = cr_pool_verify(&dci, pool.pool_uuid, TCPS_CHECKING, 1, &class, &action, NULL); + assert_rc_equal(rc, 0); + + rc = cr_mode_switch(false); + assert_rc_equal(rc, 0); + + rc = cr_system_start(); + assert_rc_equal(rc, 0); + + rc = cr_fault_inject(pool.pool_uuid, true, "CIC_POOL_BAD_LABEL"); + assert_rc_equal(rc, 0); + + rc = cr_system_stop(false); + assert_rc_equal(rc, 0); + + rc = cr_mode_switch(true); + assert_rc_equal(rc, 0); + + rc = cr_check_start(TCSF_NONE, 0, NULL, NULL); + assert_rc_equal(rc, 0); + + cr_ins_wait(1, &pool.pool_uuid, &dci); + + rc = cr_ins_verify(&dci, TCIS_COMPLETED); + assert_rc_equal(rc, 0); + + /* Only show the old repair information. */ + rc = cr_pool_verify(&dci, pool.pool_uuid, TCPS_CHECKED, 1, &class, &action, NULL); + assert_rc_equal(rc, 0); + + cr_debug_set_params(arg, 0); + + rc = cr_mode_switch(false); + assert_rc_equal(rc, 0); + + rc = cr_system_start(); + assert_rc_equal(rc, 0); + + print_message("CR: getting label for pool " DF_UUID " after check\n", + DP_UUID(pool.pool_uuid)); + rc = dmg_pool_get_prop(dmg_config_file, pool.label, pool.pool_uuid, "label", &label); + assert_rc_equal(rc, 0); + + D_ASSERTF(strcmp(label, pool.label) != 0, + "Pool (" DF_UUID ") label should not be repaired: %s\n", + DP_UUID(pool.pool_uuid), label); + + D_FREE(label); + cr_dci_fini(&dci); + cr_cleanup(arg, &pool, 1); +} + +/* + * 1. Create pool1 and pool2. + * 2. Create pool1/cont1, pool2/cont2. + * 3. Fault injection to generate inconsistent label for both pool1 and pool2. + * 4. Fault injection to generate inconsistent label for both cont1 and cont2. + * 5. Start checker with "POOL_BAD_LABEL:CIA_IGNORE,CONT_BAD_LABEL:CIA_INTERACT". + * 6. Query checker, should show interaction for cont1's label and cont2's label. + * 7. Stop checker. + * 8. Restart checker on pool1 with "POOL_BAD_LABEL:CIA_INTERACT" and 'reset' option. + * 9. Query checker, should show interaction for pool1's label, pool2 should be in stopped status. + * 10. Stop checker. + * 11. Query checker, instance should be stopped. + * 12. Restart checker on pool2 with "POOL_BAD_LABEL:CIA_INTERACT,CONT_BAD_LABEL:CIA_INTERACT". + * 13. Query checker, should show interaction for cont2's label. + * 14. Stop checker and switch to normal mode. + * 15. Cleanup. + */ +static void +cr_reset_specified(void **state) +{ + test_arg_t *arg = *state; + struct test_pool pools[2] = { 0 }; + struct test_cont conts[2] = { 0 }; + struct daos_check_info dcis[2] = { 0 }; + uint32_t classes[3]; + uint32_t actions[3]; + int rc; + int i; + + print_message("CR14: reset checker for specified pools\n"); + + /* + * The classes are sorted with order, otherwise the subsequent + * cr_pool_verify with multiple inconsistency will hit toruble. + */ + classes[0] = TCC_POOL_BAD_LABEL; + classes[1] = TCC_CONT_BAD_LABEL; + classes[2] = TCC_CONT_BAD_LABEL; + actions[0] = TCA_IGNORE; + actions[1] = TCA_INTERACT; + actions[2] = TCA_INTERACT; + + for (i = 0; i < 2; i++) { + rc = cr_pool_create(state, &pools[i], true, classes[0]); + assert_rc_equal(rc, 0); + + rc = cr_cont_create(state, &pools[i], &conts[i], 1); + assert_rc_equal(rc, 0); + } + + rc = cr_system_stop(false); + assert_rc_equal(rc, 0); + + rc = cr_mode_switch(true); + assert_rc_equal(rc, 0); + + rc = cr_check_start(TCSF_RESET, 0, NULL, + "POOL_BAD_LABEL:CIA_IGNORE,CONT_BAD_LABEL:CIA_INTERACT"); + assert_rc_equal(rc, 0); + + for (i = 0; i < 2; i++) { + cr_pool_wait(1, &pools[i].pool_uuid, &dcis[i]); + + rc = cr_ins_verify(&dcis[i], TCIS_RUNNING); + assert_rc_equal(rc, 0); + + rc = cr_pool_verify(&dcis[i], pools[i].pool_uuid, TCPS_PENDING, 2, classes, actions, + NULL); + assert_rc_equal(rc, 0); + } + + rc = cr_check_stop(0, NULL); + assert_rc_equal(rc, 0); + + rc = cr_check_start(TCSF_RESET, 1, &pools[0].pool_uuid, "POOL_BAD_LABEL:CIA_INTERACT"); + assert_rc_equal(rc, 0); + + cr_pool_wait(1, &pools[0].pool_uuid, &dcis[0]); + + rc = cr_ins_verify(&dcis[0], TCIS_RUNNING); + assert_rc_equal(rc, 0); + + /* Pool1's report is for pool label interaction. */ + rc = cr_pool_verify(&dcis[0], pools[0].pool_uuid, TCPS_PENDING, 1, &classes[0], &actions[1], + NULL); + assert_rc_equal(rc, 0); + + cr_dci_fini(&dcis[1]); + rc = cr_check_query(1, &pools[1].pool_uuid, &dcis[1]); + assert_rc_equal(rc, 0); + + /* Pool2's (old) report should be still there. */ + rc = cr_pool_verify(&dcis[1], pools[1].pool_uuid, TCPS_STOPPED, 2, classes, actions, NULL); + assert_rc_equal(rc, 0); + + rc = cr_check_stop(0, NULL); + assert_rc_equal(rc, 0); + + rc = cr_check_start(TCSF_NONE, 1, &pools[1].pool_uuid, + "POOL_BAD_LABEL:CIA_INTERACT,CONT_BAD_LABEL:CIA_INTERACT"); + assert_rc_equal(rc, 0); + + cr_pool_wait(1, &pools[1].pool_uuid, &dcis[1]); + + rc = cr_ins_verify(&dcis[1], TCIS_RUNNING); + assert_rc_equal(rc, 0); + + /* There are 3 reports for pool2: two are old (since not reset), another one is new. */ + rc = cr_pool_verify(&dcis[1], pools[1].pool_uuid, TCPS_PENDING, 3, classes, actions, NULL); + assert_rc_equal(rc, 0); + + rc = cr_check_stop(0, NULL); + assert_rc_equal(rc, 0); + + rc = cr_mode_switch(false); + assert_rc_equal(rc, 0); + + rc = cr_system_start(); + assert_rc_equal(rc, 0); + + for (i = 0; i < 2; i++) + cr_dci_fini(&dcis[i]); + cr_cleanup(arg, pools, 2); +} + +/* + * 1. Create pool. + * 2. Fault injection to generate inconsistent pool label. + * 3. Set fail_loc to fail pool label update. + * 4. Start checker with option "--failout=on" and "POOL_BAD_LABEL:CIA_TRUST_PS". + * 5. Query checker, instance should failed, pool should be "failed". + * 6. Restart checker with option "--reset --failout=off" and "POOL_BAD_LABEL:CIA_TRUST_PS". + * 7. Query checker, pool should be "checked" with failed inconsistency repair report. + * 8. Reset fail_loc. + * 9. Switch to normal mode to verify the pool label. + * 10. Cleanup. + */ +static void +cr_failout(void **state) +{ + test_arg_t *arg = *state; + struct test_pool pool = { 0 }; + struct daos_check_info dci = { 0 }; + char *label = NULL; + uint32_t class = TCC_POOL_BAD_LABEL; + uint32_t action = TCA_TRUST_PS; + int result = -DER_IO; + int rc; + + print_message("CR15: check start option - failout\n"); + + rc = cr_pool_create(state, &pool, false, class); + assert_rc_equal(rc, 0); + + rc = cr_system_stop(false); + assert_rc_equal(rc, 0); + + rc = cr_mode_switch(true); + assert_rc_equal(rc, 0); + + /* Inject fail_loc to fail pool label repair. */ + rc = cr_debug_set_params(arg, DAOS_CHK_LEADER_FAIL_REGPOOL | DAOS_FAIL_ALWAYS); + assert_rc_equal(rc, 0); + + rc = cr_check_start(TCSF_FAILOUT | TCSF_RESET, 0, NULL, "POOL_BAD_LABEL:CIA_TRUST_PS"); + assert_rc_equal(rc, 0); + + cr_ins_wait(1, &pool.pool_uuid, &dci); + + rc = cr_ins_verify(&dci, TCIS_FAILED); + assert_rc_equal(rc, 0); + + rc = cr_pool_verify(&dci, pool.pool_uuid, TCPS_FAILED, 1, &class, &action, &result); + assert_rc_equal(rc, 0); + + rc = cr_check_start(TCSF_RESET | TCSF_NO_FAILOUT, 0, NULL, "POOL_BAD_LABEL:CIA_TRUST_PS"); + assert_rc_equal(rc, 0); + + cr_ins_wait(1, &pool.pool_uuid, &dci); + + rc = cr_ins_verify(&dci, TCIS_COMPLETED); + assert_rc_equal(rc, 0); + + rc = cr_pool_verify(&dci, pool.pool_uuid, TCPS_CHECKED, 1, &class, &action, &result); + assert_rc_equal(rc, 0); + + cr_debug_set_params(arg, 0); + + rc = cr_mode_switch(false); + assert_rc_equal(rc, 0); + + rc = cr_system_start(); + assert_rc_equal(rc, 0); + + print_message("CR: getting label for pool " DF_UUID " after check\n", + DP_UUID(pool.pool_uuid)); + rc = dmg_pool_get_prop(dmg_config_file, pool.label, pool.pool_uuid, "label", &label); + assert_rc_equal(rc, 0); + + D_ASSERTF(strcmp(label, pool.label) != 0, + "Pool (" DF_UUID ") label should not be repaired: %s\n", + DP_UUID(pool.pool_uuid), label); + + D_FREE(label); + cr_dci_fini(&dci); + cr_cleanup(arg, &pool, 1); +} + +/* + * 1. Create pool and cont. + * 2. Fault injection to generate empty label for the container property. + * 3. Start checker with option "--auto=on -p CONT_BAD_LABEL:CIA_TRUST_TARGET". + * 4. For bad container label, if the trusted label is empty, then need interaction by default, + * but under auto mode, it will be ignored. + * 5. Query checker, should be completed, inconsistent container label should be "ignored". + * 6. Restart checker with option "--reset --auto=off" and "-p CONT_BAD_LABEL:CIA_TRUST_TARGET". + * 7. Query checker, it will show the interaction for the inconsistent container label. + * 8. Switch to normal mode and cleanup. + */ +static void +cr_auto_repair(void **state) +{ + test_arg_t *arg = *state; + struct test_pool pool = { 0 }; + struct test_cont cont = { 0 }; + struct daos_check_info dci = { 0 }; + uint32_t class = TCC_CONT_BAD_LABEL; + uint32_t action; + int rc; + + print_message("CR16: check start option - auto repair\n"); + + rc = cr_pool_create(state, &pool, true, TCC_NONE); + assert_rc_equal(rc, 0); + + rc = cr_cont_create(state, &pool, &cont, -1); + assert_rc_equal(rc, 0); + + rc = cr_system_stop(false); + assert_rc_equal(rc, 0); + + rc = cr_mode_switch(true); + assert_rc_equal(rc, 0); + + rc = cr_check_start(TCSF_AUTO | TCSF_RESET, 0, NULL, "CONT_BAD_LABEL:CIA_TRUST_TARGET"); + assert_rc_equal(rc, 0); + + cr_ins_wait(1, &pool.pool_uuid, &dci); + + rc = cr_ins_verify(&dci, TCIS_COMPLETED); + assert_rc_equal(rc, 0); + + action = TCA_IGNORE; + rc = cr_pool_verify(&dci, pool.pool_uuid, TCPS_CHECKED, 1, &class, &action, NULL); + assert_rc_equal(rc, 0); + + rc = cr_check_start(TCSF_RESET | TCSF_NO_AUTO, 0, NULL, "CONT_BAD_LABEL:CIA_TRUST_TARGET"); + assert_rc_equal(rc, 0); + + cr_pool_wait(1, &pool.pool_uuid, &dci); + + rc = cr_ins_verify(&dci, TCIS_RUNNING); + assert_rc_equal(rc, 0); + + action = TCA_INTERACT; + rc = cr_pool_verify(&dci, pool.pool_uuid, TCPS_PENDING, 1, &class, &action, NULL); + assert_rc_equal(rc, 0); + + rc = cr_mode_switch(false); + assert_rc_equal(rc, 0); + + rc = cr_system_start(); + assert_rc_equal(rc, 0); + + cr_dci_fini(&dci); + cr_cleanup(arg, &pool, 1); +} + +/* + * 1. Create pool1 and pool2. + * 2. Fault injection to make pool2 as orphan. + * 3. Start checker on pool1 without any option. + * 4. Query checker, no inconsistency should be reported. + * 5. Restart checker on pool1 with option "-O". + * 6. Query checker, it should find out the orphan pool2 and repair it. + * 7. Switch to normal mode to verify the pools. + * 8. Cleanup. + */ +static void +cr_orphan_pool(void **state) +{ + test_arg_t *arg = *state; + struct test_pool pools[2] = { 0 }; + daos_mgmt_pool_info_t mgmt_pools[2] = { 0 }; + struct daos_check_info dci = { 0 }; + uint32_t class = TCC_POOL_NONEXIST_ON_MS; + uint32_t action = TCA_READD; + daos_size_t pool_nr = 2; + int rc; + + print_message("CR17: check start option - scan orphan pools by force\n"); + + rc = cr_pool_create(state, &pools[0], false, TCC_NONE); + assert_rc_equal(rc, 0); + + rc = cr_pool_create(state, &pools[1], false, class); + assert_rc_equal(rc, 0); + + rc = cr_system_stop(false); + assert_rc_equal(rc, 0); + + rc = cr_mode_switch(true); + assert_rc_equal(rc, 0); + + rc = cr_check_start(TCSF_RESET, 1, &pools[0].pool_uuid, NULL); + assert_rc_equal(rc, 0); + + cr_ins_wait(1, &pools[0].pool_uuid, &dci); + + rc = cr_ins_verify(&dci, TCIS_COMPLETED); + assert_rc_equal(rc, 0); + + rc = cr_pool_verify(&dci, pools[0].pool_uuid, TCPS_CHECKED, 0, NULL, NULL, NULL); + assert_rc_equal(rc, 0); + + cr_pool_wait(1, &pools[1].pool_uuid, &dci); + + rc = cr_pool_verify(&dci, pools[1].pool_uuid, TCPS_UNCHECKED, 0, NULL, NULL, NULL); + assert_rc_equal(rc, 0); + + rc = cr_check_start(TCSF_ORPHAN, 1, &pools[0].pool_uuid, NULL); + assert_rc_equal(rc, 0); + + cr_ins_wait(1, &pools[1].pool_uuid, &dci); + + rc = cr_ins_verify(&dci, TCIS_COMPLETED); + assert_rc_equal(rc, 0); + + rc = cr_pool_verify(&dci, pools[1].pool_uuid, TCPS_CHECKED, 1, &class, &action, NULL); + assert_rc_equal(rc, 0); + + rc = cr_mode_switch(false); + assert_rc_equal(rc, 0); + + rc = cr_system_start(); + assert_rc_equal(rc, 0); + + rc = dmg_pool_list(dmg_config_file, arg->group, &pool_nr, mgmt_pools); + assert_rc_equal(rc, 0); + + assert_rc_equal(pool_nr, 2); + + cr_dci_fini(&dci); + clean_pool_info(2, mgmt_pools); + cr_cleanup(arg, pools, 2); +} + +static void +cr_fail_ps_sync(void **state, bool leader) +{ + test_arg_t *arg = *state; + struct test_pool pool = { 0 }; + struct daos_check_info dci = { 0 }; + uint32_t class = TCC_POOL_BAD_LABEL; + uint32_t action = TCA_TRUST_PS; + uint32_t fail_loc; + int rc; + + rc = cr_pool_create(state, &pool, false, class); + assert_rc_equal(rc, 0); + + rc = cr_system_stop(false); + assert_rc_equal(rc, 0); + + rc = cr_mode_switch(true); + assert_rc_equal(rc, 0); + + if (leader) + fail_loc = DAOS_CHK_PS_NOTIFY_LEADER; + else + fail_loc = DAOS_CHK_PS_NOTIFY_ENGINE; + + /* Inject fail_loc to skip notification from PS leader to check leader or pool shards. */ + rc = cr_debug_set_params(arg, fail_loc | DAOS_FAIL_ALWAYS); + assert_rc_equal(rc, 0); + + rc = cr_check_start(TCSF_RESET, 0, NULL, "POOL_BAD_LABEL:CIA_TRUST_PS"); + assert_rc_equal(rc, 0); + + /* The pool wait will timeout since failed to notify some check engine/leader when done. */ + cr_pool_wait(1, &pool.pool_uuid, &dci); + + rc = cr_ins_verify(&dci, TCIS_RUNNING); + assert_rc_equal(rc, 0); + +#if 0 + /* Disable the check because of DAOS-13989. */ + rc = cr_pool_verify(&dci, pool.pool_uuid, TCPS_CHECKING, 1, &class, &action, NULL); + assert_rc_equal(rc, 0); +#endif + + /* Start checker should fail since some check leader/engines are still running. */ + rc = cr_check_start(TCSF_NONE, 0, NULL, NULL); + assert_rc_equal(rc, -DER_ALREADY); + + /* The pool wait will timeout. */ + cr_pool_wait(1, &pool.pool_uuid, &dci); + + /* Current running instance should not be affected by above failed check start. */ + rc = cr_ins_verify(&dci, TCIS_RUNNING); + assert_rc_equal(rc, 0); + +#if 0 + /* Disable the check because of DAOS-13989. */ + rc = cr_pool_verify(&dci, pool.pool_uuid, TCPS_CHECKING, 1, &class, &action, NULL); + assert_rc_equal(rc, 0); +#endif + + rc = cr_check_stop(0, NULL); + assert_rc_equal(rc, 0); + + cr_ins_wait(1, &pool.pool_uuid, &dci); + + rc = cr_ins_verify(&dci, TCIS_STOPPED); + assert_rc_equal(rc, 0); + + cr_debug_set_params(arg, 0); + + rc = cr_check_start(TCSF_NONE, 0, NULL, NULL); + assert_rc_equal(rc, 0); + + cr_ins_wait(1, &pool.pool_uuid, &dci); + + rc = cr_ins_verify(&dci, TCIS_COMPLETED); + assert_rc_equal(rc, 0); + + if (leader) + /* The instance is resumed, so still hold former inconsistency report. */ + rc = cr_pool_verify(&dci, pool.pool_uuid, TCPS_CHECKED, 1, &class, &action, NULL); + else + /* Instance is reset automatically, old inconsistency report should have been discarded. */ + rc = cr_pool_verify(&dci, pool.pool_uuid, TCPS_CHECKED, 0, NULL, NULL, NULL); + assert_rc_equal(rc, 0); + + rc = cr_mode_switch(false); + assert_rc_equal(rc, 0); + + rc = cr_system_start(); + assert_rc_equal(rc, 0); + + cr_dci_fini(&dci); + cr_cleanup(arg, &pool, 1); +} + +/* + * 1. Create pool. + * 2. Fault injection to generate inconsistent pool label. + * 3. Set fail_loc to simulate PS leader failed to notify status update to check leader. + * 4. Start checker with option "-p POOL_BAD_LABEL:CIA_TRUST_PS". + * 5. Query checker, the instance should be in running with pool label repaired, although all + * engines have completed. + * 6. Restart checker should fail since leader is still running. + * 7. Query checker, the instance should still be in running, not stopped for the failed restart. + * 8. Stop checker. + * 9. Reset fail_loc. + * 10. Restart checker without any option. The leader should resume from stopped point, + * engines will notify the completion. + * 11. Query checker, it should be completed without repeatedly repairing the pool label. + * 12. Switch to normal mode and cleanup. + */ +static void +cr_fail_sync_leader(void **state) +{ + print_message("CR18: PS leader fails to sync pool status with check leader\n"); + + cr_fail_ps_sync(state, true); +} + +/* + * 1. Create pool. + * 2. Fault injection to generate inconsistent pool label. + * 3. Set fail_loc to simulate PS leader failed to notify status update to pool shards. + * 4. Start checker with option "-p POOL_BAD_LABEL:CIA_TRUST_PS". + * 5. Query checker, the instance should be in running, although the leader is already completed. + * 6. Restart checker should fail since some engines are still running. + * 7. Query checker, the instance should still be in running, not stopped for the failed restart. + * 8. Stop checker. + * 9. Reset fail_loc. + * 10. Restart checker without any option. The leader instance will reset automatically since former + * leader was completed. Then the engines will be also reset accordingly. + * 11. Query checker, it should be completed without repeatedly repairing the pool label. + * 12. Switch to normal mode and cleanup. + */ +static void +cr_fail_sync_engine(void **state) +{ + print_message("CR19: PS leader fails to sync pool status with check engines\n"); + + cr_fail_ps_sync(state, false); +} + +/* + * 1. Create pool. + * 2. Fault injection to generate inconsistent pool label. + * 3. Start checker with option "-p POOL_BAD_LABEL:CIA_INTERACT". + * 4. Query checker, it should show the interaction. + * 5. Stop some rank in the system. + * 6. Check repair with trust MS to repair the pool label. + * 7. Query checker, instance should be completed, the pool label should has been repaired. + * 8. Switch to normal mode to verify the pool label. + * 9. Cleanup. + */ +static void +cr_engine_death(void **state) +{ + test_arg_t *arg = *state; + struct test_pool pool = { 0 }; + struct daos_check_info dci = { 0 }; + struct daos_check_report_info *dcri; + char *label = NULL; + uint32_t class = TCC_POOL_BAD_LABEL; + uint32_t action; + int rank = -1; + int rc; + int i; + + print_message("CR20: check engine death during check\n"); + + rc = cr_pool_create_with_svc(state, &pool, true, class); + assert_rc_equal(rc, 0); + + rc = cr_system_stop(false); + assert_rc_equal(rc, 0); + + rc = cr_mode_switch(true); + assert_rc_equal(rc, 0); + + rc = cr_check_start(TCSF_RESET, 0, NULL, "POOL_BAD_LABEL:CIA_INTERACT"); + assert_rc_equal(rc, 0); + + cr_pool_wait(1, &pool.pool_uuid, &dci); + + rc = cr_ins_verify(&dci, TCIS_RUNNING); + assert_rc_equal(rc, 0); + + action = TCA_INTERACT; + rc = cr_pool_verify(&dci, pool.pool_uuid, TCPS_PENDING, 1, &class, &action, NULL); + assert_rc_equal(rc, 0); + + rc = cr_rank_exclude(arg, &pool, &rank); + if (rc > 0) + goto cleanup; + assert_rc_equal(rc, 0); + + print_message("CR: sleep seconds for the rank death event\n"); + sleep(20); + + dcri = cr_locate_dcri(&dci, NULL, pool.pool_uuid); + action = TCA_TRUST_MS; + rc = -DER_MISC; + + for (i = 0; i < dcri->dcri_option_nr; i++) { + if (dcri->dcri_options[i] == action) { + /* Repair the pool label with the lost rank. */ + rc = cr_check_repair(dcri->dcri_seq, i, false); + break; + } + } + assert_rc_equal(rc, 0); + + cr_ins_wait(1, &pool.pool_uuid, &dci); + + rc = cr_ins_verify(&dci, TCIS_COMPLETED); + assert_rc_equal(rc, 0); + + rc = cr_pool_verify(&dci, pool.pool_uuid, TCPS_CHECKED, 1, &class, &action, NULL); + assert_rc_equal(rc, 0); + + /* Reint the rank for subsequent test. */ + rc = cr_rank_reint(rank, false); + assert_rc_equal(rc, 0); + + rc = cr_mode_switch(false); + assert_rc_equal(rc, 0); + + rc = cr_system_start(); + assert_rc_equal(rc, 0); + + print_message("CR: getting label for pool " DF_UUID " after check\n", + DP_UUID(pool.pool_uuid)); + rc = dmg_pool_get_prop(dmg_config_file, pool.label, pool.pool_uuid, "label", &label); + assert_rc_equal(rc, 0); + + D_ASSERTF(strcmp(label, pool.label) != 0, + "Pool (" DF_UUID ") label should not be repaired: %s\n", + DP_UUID(pool.pool_uuid), label); + + D_FREE(label); + cr_dci_fini(&dci); + +cleanup: + cr_cleanup(arg, &pool, 1); +} + +/* + * 1. Create pool. + * 2. Fault injection to make the pool as orphan. + * 3. Start checker with option "-p POOL_NONEXIST_ON_MS:CIA_INTERACT". + * 4. Query checker, it should show the interaction. + * 5. Stop some rank in the system. + * 6. Start the rank that is stopped just now - rejoin succeed. + * 7. Query checker, it should still wait for the interaction. + * 8. Check repair with destroying the orphan pool. + * 9. Query checker, instance should be completed, the pool should has been destroyed. + * 10. Restart checker with option "--reset". + * 11. Query checker, it should complete without any inconsistency reported. + * 12. Switch to normal mode and cleanup. + */ +static void +cr_engine_rejoin_succ(void **state) +{ + test_arg_t *arg = *state; + struct test_pool pool = { 0 }; + struct daos_check_info dci = { 0 }; + struct daos_check_report_info *dcri; + uint32_t class = TCC_POOL_NONEXIST_ON_MS; + uint32_t action; + int rank = -1; + int rc; + int i; + + print_message("CR21: check engine rejoins check instance successfully\n"); + + rc = cr_pool_create_with_svc(state, &pool, true, class); + assert_rc_equal(rc, 0); + + rc = cr_system_stop(false); + assert_rc_equal(rc, 0); + + rc = cr_mode_switch(true); + assert_rc_equal(rc, 0); + + rc = cr_check_start(TCSF_RESET, 0, NULL, "POOL_NONEXIST_ON_MS:CIA_INTERACT"); + assert_rc_equal(rc, 0); + + cr_pool_wait(1, &pool.pool_uuid, &dci); + + rc = cr_ins_verify(&dci, TCIS_RUNNING); + assert_rc_equal(rc, 0); + + action = TCA_INTERACT; + rc = cr_pool_verify(&dci, pool.pool_uuid, TCPS_PENDING, 1, &class, &action, NULL); + assert_rc_equal(rc, 0); + + rc = cr_rank_exclude(arg, &pool, &rank); + if (rc > 0) + goto cleanup; + assert_rc_equal(rc, 0); + + /* Reint the rank immediately before the rank death event being detected. */ + rc = cr_rank_reint(rank, true); + assert_rc_equal(rc, 0); + + cr_pool_wait(1, &pool.pool_uuid, &dci); + + rc = cr_ins_verify(&dci, TCIS_RUNNING); + assert_rc_equal(rc, 0); + + /* Still wait for the interaction. */ + rc = cr_pool_verify(&dci, pool.pool_uuid, TCPS_PENDING, 1, &class, &action, NULL); + assert_rc_equal(rc, 0); + + dcri = cr_locate_dcri(&dci, NULL, pool.pool_uuid); + action = TCA_DISCARD; + rc = -DER_MISC; + + for (i = 0; i < dcri->dcri_option_nr; i++) { + if (dcri->dcri_options[i] == action) { + rc = cr_check_repair(dcri->dcri_seq, i, false); + break; + } + } + assert_rc_equal(rc, 0); + + cr_ins_wait(1, &pool.pool_uuid, &dci); + + rc = cr_ins_verify(&dci, TCIS_COMPLETED); + assert_rc_equal(rc, 0); + + rc = cr_pool_verify(&dci, pool.pool_uuid, TCPS_CHECKED, 1, &class, &action, NULL); + assert_rc_equal(rc, 0); + + rc = cr_check_start(TCSF_RESET, 0, NULL, NULL); + assert_rc_equal(rc, 0); + + cr_ins_wait(0, NULL, &dci); + + rc = cr_ins_verify(&dci, TCIS_COMPLETED); + assert_rc_equal(rc, 0); + + /* Neither pools nor inconsistency reports. */ + D_ASSERTF(dci.dci_pool_nr == 0, "The pool " DF_UUID "was not destroyed completedly (%d)\n", + DP_UUID(pool.pool_uuid), dci.dci_pool_nr); + + rc = cr_mode_switch(false); + assert_rc_equal(rc, 0); + + rc = cr_system_start(); + assert_rc_equal(rc, 0); + + cr_dci_fini(&dci); + +cleanup: + cr_cleanup(arg, &pool, 1); +} + +/* + * 1. Create pool. + * 2. Fault injection to make the pool as orphan. + * 3. Start checker with option "-p POOL_NONEXIST_ON_MS:CIA_INTERACT". + * 4. Query checker, it should show the interaction. + * 5. Stop some rank in the system. + * 6. Check repair with destroying the orphan pool, that should fail since we lost some pool shards + * during the check. + * 7. Query checker, the instance should be completed, the pool should be failed. + * 8. Start the rank that is stopped just now - rejoin failed since the former checker instance has + * already completed. + * 9. Restart checker with option "--reset" and + * "POOL_LESS_SVC_WITHOUT_QUORUM:CIA_DISCARD,POOL_NONEXIST_ON_MS:CIA_DISCARD". + * 10. Query checker, it should complete with the orphan pool destroyed. + * 11. Switch to normal mode and cleanup. + */ +static void +cr_engine_rejoin_fail(void **state) +{ + test_arg_t *arg = *state; + struct test_pool pool = { 0 }; + struct daos_check_info dci = { 0 }; + struct daos_check_report_info *dcri; + uint32_t class = TCC_POOL_NONEXIST_ON_MS; + uint32_t action; + int rank = -1; + int result; + int rc; + int i; + + print_message("CR22: check engine fails to rejoin check instance\n"); + + rc = cr_pool_create_with_svc(state, &pool, true, class); + assert_rc_equal(rc, 0); + + rc = cr_system_stop(false); + assert_rc_equal(rc, 0); + + rc = cr_mode_switch(true); + assert_rc_equal(rc, 0); + + rc = cr_check_start(TCSF_RESET, 0, NULL, "POOL_NONEXIST_ON_MS:CIA_INTERACT"); + assert_rc_equal(rc, 0); + + cr_pool_wait(1, &pool.pool_uuid, &dci); + + rc = cr_ins_verify(&dci, TCIS_RUNNING); + assert_rc_equal(rc, 0); + + action = TCA_INTERACT; + rc = cr_pool_verify(&dci, pool.pool_uuid, TCPS_PENDING, 1, &class, &action, NULL); + assert_rc_equal(rc, 0); + + rc = cr_rank_exclude(arg, &pool, &rank); + if (rc > 0) + goto cleanup; + assert_rc_equal(rc, 0); + + print_message("CR: sleep seconds for the rank death event\n"); + sleep(20); + + /* Destroy the pool, then related shard will be left on the stopped rank. */ + dcri = cr_locate_dcri(&dci, NULL, pool.pool_uuid); + action = TCA_DISCARD; + rc = -DER_MISC; + + for (i = 0; i < dcri->dcri_option_nr; i++) { + if (dcri->dcri_options[i] == action) { + /* Repair the inconsistency with the lost rank. */ + rc = cr_check_repair(dcri->dcri_seq, i, false); + break; + } + } + assert_rc_equal(rc, 0); + + cr_ins_wait(1, &pool.pool_uuid, &dci); + + rc = cr_ins_verify(&dci, TCIS_COMPLETED); + assert_rc_equal(rc, 0); + + /* The check on the pool will fail as -DER_HG or -DER_TIMEDOUT. */ + result = -DER_HG; + rc = cr_pool_verify(&dci, pool.pool_uuid, TCPS_FAILED, 1, &class, &action, &result); + if (rc == -DER_INVAL) { + result = -DER_TIMEDOUT; + rc = cr_pool_verify(&dci, pool.pool_uuid, TCPS_FAILED, 1, &class, &action, &result); + } + assert_rc_equal(rc, 0); + + /* Reint the rank, rejoin will fail but not affect the rank start. */ + rc = cr_rank_reint(rank, true); + assert_rc_equal(rc, 0); + + /* Wait for a while until the control plane to be ready for new check start. */ + cr_pool_wait(1, &pool.pool_uuid, &dci); + + rc = cr_check_start(TCSF_RESET, 0, NULL, + "POOL_LESS_SVC_WITHOUT_QUORUM:CIA_DISCARD,POOL_NONEXIST_ON_MS:CIA_DISCARD"); + assert_rc_equal(rc, 0); + + cr_ins_wait(1, &pool.pool_uuid, &dci); + + rc = cr_ins_verify(&dci, TCIS_COMPLETED); + assert_rc_equal(rc, 0); + + /* Some pool shards may have been destroyed, the left ones may have (or not) quorum. */ + class = TCC_POOL_LESS_SVC_WITHOUT_QUORUM; + rc = cr_pool_verify(&dci, pool.pool_uuid, TCPS_CHECKED, 1, &class, &action, NULL); + if (rc == -DER_INVAL) { + class = TCC_POOL_NONEXIST_ON_MS; + rc = cr_pool_verify(&dci, pool.pool_uuid, TCPS_CHECKED, 1, &class, &action, NULL); + } + assert_rc_equal(rc, 0); + + /* The former excluded rank is not in the check ranks set, stop it explicitly. */ + rc = dmg_system_stop_rank(dmg_config_file, rank, false); + assert_rc_equal(rc, 0); + + rc = cr_mode_switch(false); + assert_rc_equal(rc, 0); + + rc = cr_system_start(); + assert_rc_equal(rc, 0); + + cr_dci_fini(&dci); + +cleanup: + cr_cleanup(arg, &pool, 1); +} +/* + * 1. Create pool1, pool2, pool3 and pool4. Create container under each of them. + * 2. Fault injection to generate inconsistent pool label for pool1 and pool2, inconsistent + * container label for pool3/cont and pool4/cont. + * 3. Set checker policies as all-interactive. + * 4. Start checker on pool1 and pool3. + * 5. Query checker, should show interaction. + * 6. Stop checker on pool1. + * 7. Start checker on pool2, should fail since former checker is still running for pool3. + * 8. Check repair pool3/cont's label. + * 9. Query checker, it should be completed, pool3/cont's label should have been fixed. + * 10. Restart checker on pool1 (from stopped point) and pool2 (from beginning). + * 11. Query checker, should show interaction. + * 12. Stop checker on all pools. + * 13. Query checker, should show stopped. + * 14. Restart checker without any option, resume former check for pool1 and pool2. + * 15. Check repair all reported inconsistency. + * 16. Query checker, it should be completed. + * 17. Restart checker without any option, it should check all pools. + * 18. Query checker, it should be running, only pool4/cont's bad label needs interaction. + * 19. Check repair pool4/cont's bad label. + * 20. Query checker, it should be completed. + * 21. Switch to normal mode and cleanup. + */ +static void +cr_multiple_pools(void **state) +{ + test_arg_t *arg = *state; + struct test_pool pools[4] = { 0 }; + struct test_cont conts[4] = { 0 }; + uuid_t uuids[4] = { 0 }; + struct daos_check_info dci = { 0 }; + struct daos_check_report_info *dcri; + uint32_t classes[2]; + uint32_t actions[3]; + int rc; + int i; + int j; + + print_message("CR23: control multiple pools check start/stop sequence\n"); + + classes[0] = TCC_POOL_BAD_LABEL; + classes[1] = TCC_CONT_BAD_LABEL; + actions[0] = TCA_TRUST_MS; + actions[1] = TCA_TRUST_PS; + actions[2] = TCA_INTERACT; + + for (i = 0; i < 4; i++) { + rc = cr_pool_create(state, &pools[i], true, i < 2 ? classes[0] : TCC_NONE); + assert_rc_equal(rc, 0); + + rc = cr_cont_create(state, &pools[i], &conts[i], i < 2 ? 0 : 1); + assert_rc_equal(rc, 0); + } + + rc = cr_system_stop(false); + assert_rc_equal(rc, 0); + + rc = cr_mode_switch(true); + assert_rc_equal(rc, 0); + + rc = cr_check_set_policy(TCPF_INTERACT, NULL); + assert_rc_equal(rc, 0); + + uuid_copy(uuids[0], pools[0].pool_uuid); + uuid_copy(uuids[1], pools[2].pool_uuid); + + rc = cr_check_start(TCSF_RESET, 2, uuids, NULL); + assert_rc_equal(rc, 0); + + cr_pool_wait(1, &uuids[1], &dci); + + rc = cr_ins_verify(&dci, TCIS_RUNNING); + assert_rc_equal(rc, 0); + + rc = cr_pool_verify(&dci, uuids[1], TCPS_PENDING, 1, &classes[1], &actions[2], NULL); + assert_rc_equal(rc, 0); + + rc = cr_check_stop(1, &uuids[0]); + assert_rc_equal(rc, 0); + + rc = cr_check_start(TCSF_NONE, 1, &pools[1].pool_uuid, NULL); + assert_rc_equal(rc, -DER_ALREADY); + + dcri = cr_locate_dcri(&dci, NULL, uuids[1]); + rc = -DER_MISC; + + for (i = 0; i < dcri->dcri_option_nr; i++) { + if (dcri->dcri_options[i] == actions[1]) { + rc = cr_check_repair(dcri->dcri_seq, i, false); + break; + } + } + assert_rc_equal(rc, 0); + + cr_ins_wait(1, &uuids[1], &dci); + + rc = cr_ins_verify(&dci, TCIS_COMPLETED); + assert_rc_equal(rc, 0); + + rc = cr_pool_verify(&dci, uuids[1], TCPS_CHECKED, 1, &classes[1], &actions[1], NULL); + assert_rc_equal(rc, 0); + + uuid_copy(uuids[1], pools[1].pool_uuid); + + rc = cr_check_start(TCSF_NONE, 2, uuids, NULL); + assert_rc_equal(rc, 0); + + cr_pool_wait(1, &uuids[1], &dci); + + rc = cr_ins_verify(&dci, TCIS_RUNNING); + assert_rc_equal(rc, 0); + + rc = cr_pool_verify(&dci, uuids[1], TCPS_PENDING, 1, &classes[0], &actions[2], NULL); + assert_rc_equal(rc, 0); + + rc = cr_check_stop(0, NULL); + assert_rc_equal(rc, 0); + + cr_ins_wait(1, &uuids[1], &dci); + + rc = cr_ins_verify(&dci, TCIS_STOPPED); + assert_rc_equal(rc, 0); + + rc = cr_pool_verify(&dci, uuids[1], TCPS_STOPPED, 1, &classes[0], &actions[2], NULL); + assert_rc_equal(rc, 0); + + rc = cr_check_start(TCSF_NONE, 2, uuids, NULL); + assert_rc_equal(rc, 0); + + for (i = 0; i < 2; i++) { + cr_pool_wait(1, &uuids[i], &dci); + + rc = cr_ins_verify(&dci, TCIS_RUNNING); + assert_rc_equal(rc, 0); + + rc = cr_pool_verify(&dci, uuids[i], TCPS_PENDING, 1, &classes[0], &actions[2], NULL); + assert_rc_equal(rc, 0); + + dcri = NULL; + rc = -DER_MISC; + +again: + dcri = cr_locate_dcri(&dci, dcri, uuids[i]); + for (j = 0; j < dcri->dcri_option_nr; j++) { + if (dcri->dcri_options[j] == actions[0]) { + rc = cr_check_repair(dcri->dcri_seq, j, false); + break; + } + } + + /* + * Because of DAOS-13205, the inconsistency report may contain stale information, + * let's try next one. + */ + if (rc != 0) + goto again; + } + + cr_ins_wait(0, NULL, &dci); + + rc = cr_ins_verify(&dci, TCIS_COMPLETED); + assert_rc_equal(rc, 0); + + rc = cr_check_start(TCSF_NONE, 0, NULL, NULL); + assert_rc_equal(rc, 0); + + for (i = 0; i < 4; i++) { + cr_pool_wait(1, &pools[i].pool_uuid, &dci); + + rc = cr_ins_verify(&dci, TCIS_RUNNING); + assert_rc_equal(rc, 0); + + if (i < 3) + rc = cr_pool_verify(&dci, pools[i].pool_uuid, TCPS_CHECKED, 0, NULL, NULL, + NULL); + else + rc = cr_pool_verify(&dci, pools[i].pool_uuid, TCPS_PENDING, 1, &classes[1], + &actions[2], NULL); + assert_rc_equal(rc, 0); + } + + dcri = cr_locate_dcri(&dci, NULL, pools[3].pool_uuid); + rc = -DER_MISC; + + for (i = 0; i < dcri->dcri_option_nr; i++) { + if (dcri->dcri_options[i] == actions[1]) { + rc = cr_check_repair(dcri->dcri_seq, i, false); + break; + } + } + assert_rc_equal(rc, 0); + + cr_ins_wait(0, NULL, &dci); + + rc = cr_ins_verify(&dci, TCIS_COMPLETED); + assert_rc_equal(rc, 0); + + rc = cr_mode_switch(false); + assert_rc_equal(rc, 0); + + rc = cr_system_start(); + assert_rc_equal(rc, 0); + + cr_dci_fini(&dci); + cr_cleanup(arg, pools, 4); +} + +/* + * 1. Create pool. + * 2. Set fail_loc to bypass notification about orphan process to check engines. + * 3. Start checker without any option. + * 4. Query checker, it should be completed. + * 5. Switch to normal mode and cleanup. + */ +static void +cr_fail_sync_orphan(void **state) +{ + test_arg_t *arg = *state; + struct test_pool pool = { 0 }; + struct daos_check_info dci = { 0 }; + int rc; + + print_message("CR24: check leader failed to notify check engine about orphan process\n"); + + rc = cr_pool_create(state, &pool, false, TCC_NONE); + assert_rc_equal(rc, 0); + + rc = cr_system_stop(false); + assert_rc_equal(rc, 0); + + rc = cr_mode_switch(true); + assert_rc_equal(rc, 0); + + /* Inject fail_loc to bypass notification about orphan process to check engines. */ + rc = cr_debug_set_params(arg, DAOS_CHK_SYNC_ORPHAN_PROCESS | DAOS_FAIL_ALWAYS); + assert_rc_equal(rc, 0); + + rc = cr_check_start(TCSF_RESET, 0, NULL, NULL); + assert_rc_equal(rc, 0); + + cr_ins_wait(1, &pool.pool_uuid, &dci); + + rc = cr_ins_verify(&dci, TCIS_COMPLETED); + assert_rc_equal(rc, 0); + + rc = cr_pool_verify(&dci, pool.pool_uuid, TCPS_CHECKED, 0, NULL, NULL, NULL); + assert_rc_equal(rc, 0); + + /* Check leader may be completed earlier than check engines in this case, double check. */ + cr_ins_wait(0, NULL, &dci); + + rc = cr_ins_verify(&dci, TCIS_COMPLETED); + assert_rc_equal(rc, 0); + + cr_debug_set_params(arg, 0); + + rc = cr_mode_switch(false); + assert_rc_equal(rc, 0); + + rc = cr_system_start(); + assert_rc_equal(rc, 0); + + cr_dci_fini(&dci); + cr_cleanup(arg, &pool, 1); +} + +/* + * 1. Create pool1 and pool2. + * 2. Fault injection to make inconsistent label for both of them. + * 3. Start checker on pool1 and pool2 with POOL_BAD_LABEL:CIA_INTERACT + * 4. Query checker, should show interaction for both pool1 and pool2. + * 5. Check repair pool2's label with trust PS (trust MS is the default) and "for-all" option. + * 6. Query checker, both pool1's and pool2's label should be fixed with trust PS. + * 7. Switch to normal mode and verify pools' labels. + * 8. Cleanup. + */ +static void +cr_inherit_policy(void **state) +{ + test_arg_t *arg = *state; + struct test_pool pools[2] = { 0 }; + struct daos_check_info dci = { 0 }; + struct daos_check_report_info *dcri; + char *ps_label = NULL; + char *ptr; + char ms_label[DAOS_PROP_LABEL_MAX_LEN]; + uint32_t class = TCC_POOL_BAD_LABEL; + uint32_t action; + int rc; + int i; + + print_message("CR25: inherit check policy from former check repair\n"); + + for (i = 0; i < 2; i++) { + rc = cr_pool_create(state, &pools[i], false, class); + assert_rc_equal(rc, 0); + } + + rc = cr_system_stop(false); + assert_rc_equal(rc, 0); + + rc = cr_mode_switch(true); + assert_rc_equal(rc, 0); + + rc = cr_check_start(TCSF_RESET, 0, NULL, "POOL_BAD_LABEL:CIA_INTERACT"); + assert_rc_equal(rc, 0); + + for (i = 0; i < 2; i++) { + cr_pool_wait(1, &pools[i].pool_uuid, &dci); + + rc = cr_ins_verify(&dci, TCIS_RUNNING); + assert_rc_equal(rc, 0); + + action = TCA_INTERACT; + rc = cr_pool_verify(&dci, pools[i].pool_uuid, TCPS_PENDING, 1, &class, &action, + NULL); + assert_rc_equal(rc, 0); + } + + dcri = cr_locate_dcri(&dci, NULL, pools[1].pool_uuid); + action = TCA_TRUST_PS; + rc = -DER_MISC; + + for (i = 0; i < dcri->dcri_option_nr; i++) { + if (dcri->dcri_options[i] == action) { + rc = cr_check_repair(dcri->dcri_seq, i, true); + break; + } + } + assert_rc_equal(rc, 0); + + for (i = 0; i < 2; i++) { + cr_ins_wait(1, &pools[i].pool_uuid, &dci); + + rc = cr_ins_verify(&dci, TCIS_COMPLETED); + assert_rc_equal(rc, 0); + + rc = cr_pool_verify(&dci, pools[i].pool_uuid, TCPS_CHECKED, 1, &class, &action, + NULL); + assert_rc_equal(rc, 0); + } + + rc = cr_mode_switch(false); + assert_rc_equal(rc, 0); + + rc = cr_system_start(); + assert_rc_equal(rc, 0); + + for (i = 0; i < 2; i++) { + /* The last 6 characters of pools[i].label is '-fault'. */ + ptr = strrchr(pools[i].label, '-'); + assert_non_null(ptr); + + memcpy(ms_label, pools[i].label, ptr - pools[i].label); + ms_label[ptr - pools[i].label] = '\0'; + + print_message("CR: getting label for pool " DF_UUID " after check\n", + DP_UUID(pools[i].pool_uuid)); + rc = dmg_pool_get_prop(dmg_config_file, ms_label, pools[i].pool_uuid, "label", + &ps_label); + assert_rc_equal(rc, 0); + + D_ASSERTF(strcmp(ps_label, ms_label) == 0, + "Pool (" DF_UUID ") label is not repaired: %s vs %s\n", + DP_UUID(pools[i].pool_uuid), ps_label, ms_label); + D_FREE(ps_label); + } + + cr_dci_fini(&dci); + cr_cleanup(arg, pools, 2); +} + +/* + * 1. Create pool without inconsistency. + * 2. Set fail_loc to simulate some engine failed to report pool shard when start checker. + * 3. Start checker without options. + * 4. Query checker, it should be completed, but the check for the pool should be failed. + * 5. Switch to normal mode and cleanup. + */ +static void +cr_handle_fail_pool1(void **state) +{ + test_arg_t *arg = *state; + struct test_pool pool = { 0 }; + struct daos_check_info dci = { 0 }; + int rc; + + print_message("CR26: skip the pool if some engine failed to report some pool shard\n"); + + rc = cr_pool_create(state, &pool, false, TCC_NONE); + assert_rc_equal(rc, 0); + + rc = cr_system_stop(false); + assert_rc_equal(rc, 0); + + rc = cr_mode_switch(true); + assert_rc_equal(rc, 0); + + rc = cr_debug_set_params(arg, DAOS_CHK_FAIL_REPORT_POOL1 | DAOS_FAIL_ALWAYS); + assert_rc_equal(rc, 0); + + rc = cr_check_start(TCSF_RESET, 0, NULL, NULL); + assert_rc_equal(rc, 0); + + cr_ins_wait(1, &pool.pool_uuid, &dci); + + rc = cr_ins_verify(&dci, TCIS_COMPLETED); + assert_rc_equal(rc, 0); + + rc = cr_pool_verify(&dci, pool.pool_uuid, TCPS_FAILED, 0, NULL, NULL, NULL); + assert_rc_equal(rc, 0); + + cr_debug_set_params(arg, 0); + + rc = cr_mode_switch(false); + assert_rc_equal(rc, 0); + + rc = cr_system_start(); + assert_rc_equal(rc, 0); + + cr_dci_fini(&dci); + cr_cleanup(arg, &pool, 1); +} + +/* + * 1. Create pool without inconsistency. + * 2. Set fail_loc to simulate some engine failed to report pool shard when start checker. + * 3. Start checker without options. + * 4. Query checker, it should be completed, but the check for the pool maybe failed, + * depends on PS replicas count. + * 5. Switch to normal mode and cleanup. + */ +static void +cr_handle_fail_pool2(void **state) +{ + test_arg_t *arg = *state; + struct test_pool pool = { 0 }; + struct daos_check_info dci = { 0 }; + daos_mgmt_pool_info_t mgmt_pool = { 0 }; + daos_size_t pool_nr = 1; + uint32_t class; + uint32_t action; + uint32_t count; + int rc; + + print_message("CR27: handle the pool if some engine failed to report some pool service\n"); + + rc = cr_pool_create(state, &pool, false, TCC_NONE); + assert_rc_equal(rc, 0); + + rc = dmg_pool_list(dmg_config_file, arg->group, &pool_nr, &mgmt_pool); + assert_rc_equal(rc, 0); + + assert_rc_equal(pool_nr, 1); + + if (mgmt_pool.mgpi_svc->rl_nr == 1) { + count = 1; + class = TCC_POOL_LESS_SVC_WITHOUT_QUORUM; + action = TCA_DISCARD; + } else if (mgmt_pool.mgpi_svc->rl_nr == 2) { + count = 1; + class = TCC_POOL_LESS_SVC_WITHOUT_QUORUM; + action = TCA_TRUST_PS; + } else { + count = 0; + class = TCC_NONE; + action = TCA_DEFAULT; + } + + rc = cr_system_stop(false); + assert_rc_equal(rc, 0); + + rc = cr_mode_switch(true); + assert_rc_equal(rc, 0); + + rc = cr_debug_set_params(arg, DAOS_CHK_FAIL_REPORT_POOL2 | DAOS_FAIL_ALWAYS); + assert_rc_equal(rc, 0); + + rc = daos_debug_set_params(arg->group, -1, DMG_KEY_FAIL_VALUE, + mgmt_pool.mgpi_svc->rl_ranks[0], 0, NULL); + assert_rc_equal(rc, 0); + + rc = cr_check_start(TCSF_RESET, 0, NULL, NULL); + assert_rc_equal(rc, 0); + + cr_ins_wait(1, &pool.pool_uuid, &dci); + + rc = cr_ins_verify(&dci, TCIS_COMPLETED); + assert_rc_equal(rc, 0); + + rc = cr_pool_verify(&dci, pool.pool_uuid, TCPS_CHECKED, count, &class, &action, NULL); + assert_rc_equal(rc, 0); + + rc = daos_debug_set_params(arg->group, -1, DMG_KEY_FAIL_VALUE, 0, 0, NULL); + assert_rc_equal(rc, 0); + + cr_debug_set_params(arg, 0); + + rc = cr_mode_switch(false); + assert_rc_equal(rc, 0); + + rc = cr_system_start(); + assert_rc_equal(rc, 0); + + cr_dci_fini(&dci); + clean_pool_info(1, &mgmt_pool); + cr_cleanup(arg, &pool, 1); +} + +static const struct CMUnitTest cr_tests[] = { + { "CR1: start checker for specified pools", + cr_start_specified, async_disable, test_case_teardown}, + { "CR2: check leader side interaction", + cr_leader_interaction, async_disable, test_case_teardown}, + { "CR3: check engine side interaction", + cr_engine_interaction, async_disable, test_case_teardown}, + { "CR4: check repair option - for-all, on leader", + cr_repair_forall_leader, async_disable, test_case_teardown}, + { "CR5: check repair option - for-all, on engine", + cr_repair_forall_engine, async_disable, test_case_teardown}, + { "CR6: stop checker with pending check leader interaction", + cr_stop_leader_interaction, async_disable, test_case_teardown}, + { "CR7: stop checker with pending check engine interaction", + cr_stop_engine_interaction, async_disable, test_case_teardown}, + { "CR8: stop checker for specified pools", + cr_stop_specified, async_disable, test_case_teardown}, + { "CR9: reset checker automatically if former instance completed", + cr_auto_reset, async_disable, test_case_teardown}, + { "CR10: checker shutdown", + cr_shutdown, async_disable, test_case_teardown}, + { "CR11: checker crash", + cr_crash, async_disable, test_case_teardown}, + { "CR12: check leader resume from former stop/paused phase", + cr_leader_resume, async_disable, test_case_teardown}, + { "CR13: check engine resume from former stop/paused phase", + cr_engine_resume, async_disable, test_case_teardown}, + { "CR14: reset checker for specified pools", + cr_reset_specified, async_disable, test_case_teardown}, + { "CR15: check start option - failout", + cr_failout, async_disable, test_case_teardown}, + { "CR16: check start option - auto repair", + cr_auto_repair, async_disable, test_case_teardown}, + { "CR17: check start option - scan orphan pools by force", + cr_orphan_pool, async_disable, test_case_teardown}, + { "CR18: PS leader fails to sync pool status with check leader", + cr_fail_sync_leader, async_disable, test_case_teardown}, + { "CR19: PS leader fails to sync pool status with check engines", + cr_fail_sync_engine, async_disable, test_case_teardown}, + { "CR20: check engine death during check", + cr_engine_death, async_disable, test_case_teardown}, + { "CR21: check engine rejoins check instance successfully", + cr_engine_rejoin_succ, async_disable, test_case_teardown}, + { "CR22: check engine fails to rejoin check instance", + cr_engine_rejoin_fail, async_disable, test_case_teardown}, + { "CR23: control multiple pools check start/stop sequence", + cr_multiple_pools, async_disable, test_case_teardown}, + { "CR24: check leader failed to notify check engine about orphan process", + cr_fail_sync_orphan, async_disable, test_case_teardown}, + { "CR25: inherit check policy from former check repair", + cr_inherit_policy, async_disable, test_case_teardown}, + { "CR26: skip the pool if some engine failed to report some pool shard", + cr_handle_fail_pool1, async_disable, test_case_teardown}, + { "CR27: handle the pool if some engine failed to report some pool service", + cr_handle_fail_pool2, async_disable, test_case_teardown}, +}; + +static int +cr_setup(void **state) +{ + return test_setup(state, SETUP_EQ, false, SMALL_POOL_SIZE, 0, NULL); +} + +int +run_daos_cr_test(int rank, int size, int *sub_tests, int sub_tests_size) +{ + int rc = 0; + + if (rank == 0) { + if (sub_tests_size == 0) + rc = cmocka_run_group_tests_name("DAOS_CR", cr_tests, cr_setup, + test_teardown); + else + rc = run_daos_sub_tests("DAOS_CR", cr_tests, ARRAY_SIZE(cr_tests), + sub_tests, sub_tests_size, cr_setup, test_teardown); + } + + par_bcast(PAR_COMM_WORLD, &rc, 1, PAR_INT, 0); + + return rc; +} diff --git a/src/tests/suite/daos_mgmt.c b/src/tests/suite/daos_mgmt.c index c165a8162f3..af645532cef 100644 --- a/src/tests/suite/daos_mgmt.c +++ b/src/tests/suite/daos_mgmt.c @@ -162,22 +162,6 @@ setup_manypools(void **state) return setup_pools(state, npools); } -/* zero out uuids, free svc rank lists in pool info returned by DAOS API */ -static void -clean_pool_info(daos_size_t npools, daos_mgmt_pool_info_t *pools) { - int i; - - if (pools) { - for (i = 0; i < npools; i++) { - uuid_clear(pools[i].mgpi_uuid); - if (pools[i].mgpi_svc) { - d_rank_list_free(pools[i].mgpi_svc); - pools[i].mgpi_svc = NULL; - } - } - } -} - /* Search for pool information in pools created in setup (mgmt_lp_args) * Match pool UUID and service replica ranks. * Return matching index or -1 if no match. diff --git a/src/tests/suite/daos_test.c b/src/tests/suite/daos_test.c index 9a5bc0bc8e1..e4e680a72d8 100644 --- a/src/tests/suite/daos_test.c +++ b/src/tests/suite/daos_test.c @@ -17,7 +17,7 @@ * all will be run if no test is specified. Tests will be run in order * so tests that kill nodes must be last. */ -#define TESTS "mpcetTViADKCoRvSXbOzZUdrNbBIPG" +#define TESTS "mFpcetTViADKCoRvSXbOzZUdrNbBIPG" /** * These tests will only be run if explicitly specified. They don't get @@ -43,6 +43,7 @@ print_usage(int rank) print_message("\n\nDAOS TESTS\n=============================\n"); print_message("Tests: Use one of these arg(s) for specific test\n"); print_message("daos_test -m|--mgmt\n"); + print_message("daos_test -F|--cat_recov\n"); print_message("daos_test -p|--pool\n"); print_message("daos_test -c|--cont\n"); print_message("daos_test -C|--capa\n"); @@ -107,6 +108,12 @@ run_specified_tests(const char *tests, int rank, int size, nr_failed = run_daos_mgmt_test(rank, size, sub_tests, sub_tests_size); break; + case 'F': + daos_test_print(rank, "\n\n================="); + daos_test_print(rank, "DAOS catastrophic recovery tests.."); + daos_test_print(rank, "================="); + nr_failed += run_daos_cr_test(rank, size, sub_tests, sub_tests_size); + break; case 'p': daos_test_print(rank, "\n\n================="); daos_test_print(rank, "DAOS pool tests.."); @@ -344,6 +351,7 @@ main(int argc, char **argv) static struct option long_options[] = { {"all", no_argument, NULL, 'a'}, {"mgmt", no_argument, NULL, 'm'}, + {"cat_recov", no_argument, NULL, 'F'}, {"pool", no_argument, NULL, 'p'}, {"cont", no_argument, NULL, 'c'}, {"capa", no_argument, NULL, 'C'}, @@ -400,7 +408,7 @@ main(int argc, char **argv) while ((opt = getopt_long(argc, argv, - "ampcCdtTViIzUZxADKeoROg:n:s:u:E:f:w:W:hrNvbBSXl:GP", + "amFpcCdtTViIzUZxADKeoROg:n:s:u:E:f:w:W:hrNvbBSXl:GP", long_options, &index)) != -1) { if (strchr(all_tests_defined, opt) != NULL) { tests[ntests] = opt; diff --git a/src/tests/suite/daos_test.h b/src/tests/suite/daos_test.h index 022ac402f18..881c141a48b 100644 --- a/src/tests/suite/daos_test.h +++ b/src/tests/suite/daos_test.h @@ -40,6 +40,7 @@ #include #include #include +#include #if D_HAS_WARNING(4, "-Wframe-larger-than=") #pragma GCC diagnostic ignored "-Wframe-larger-than=" @@ -89,6 +90,7 @@ struct test_pool { * can not be changed. */ d_rank_list_t *svc; + char *label; /* flag of slave that share the pool of other test_arg_t */ bool slave; bool destroyed; @@ -334,6 +336,7 @@ int run_daos_ec_io_test(int rank, int size, int *sub_tests, int sub_tests_size); int run_daos_epoch_io_test(int rank, int size, int *tests, int test_size); int run_daos_obj_array_test(int rank, int size); int run_daos_array_test(int rank, int size, int *sub_tests, int sub_tests_size); +int run_daos_cr_test(int rank, int size, int *sub_tests, int sub_tests_size); int run_daos_kv_test(int rank, int size); int run_daos_epoch_test(int rank, int size); int run_daos_epoch_recovery_test(int rank, int size); @@ -675,6 +678,23 @@ test_rmdir(const char *path, bool force) return rc; } +/* Zero out uuids, free svc rank lists in pool info returned by DAOS API */ +static inline void +clean_pool_info(daos_size_t npools, daos_mgmt_pool_info_t *pools) +{ + int i; + + if (pools) { + for (i = 0; i < npools; i++) { + uuid_clear(pools[i].mgpi_uuid); + if (pools[i].mgpi_svc) { + d_rank_list_free(pools[i].mgpi_svc); + pools[i].mgpi_svc = NULL; + } + } + } +} + void test_set_engine_fail_loc(test_arg_t *arg, d_rank_t engine_rank, uint64_t fail_loc); void test_set_engine_fail_value(test_arg_t *arg, d_rank_t engine_rank, uint64_t fail_value); void test_set_engine_fail_num(test_arg_t *arg, d_rank_t engine_rank, uint64_t fail_num); diff --git a/src/tests/suite/daos_test_common.c b/src/tests/suite/daos_test_common.c index 26d00050b00..fe6c3c6d500 100644 --- a/src/tests/suite/daos_test_common.c +++ b/src/tests/suite/daos_test_common.c @@ -641,6 +641,7 @@ test_teardown(void **state) d_rank_list_free(arg->pool.svc); if (arg->pool.alive_svc) d_rank_list_free(arg->pool.alive_svc); + D_FREE(arg->pool.label); D_FREE(arg); *state = NULL; return 0; diff --git a/src/vea/vea_api.c b/src/vea/vea_api.c index 7cb3c0d76ff..44cc2198460 100644 --- a/src/vea/vea_api.c +++ b/src/vea/vea_api.c @@ -966,3 +966,30 @@ vea_flush(struct vea_space_info *vsi, uint32_t nr_flush, uint32_t *nr_flushed) return trigger_aging_flush(vsi, false, nr_flush, nr_flushed); } + +struct vea_cb_args { + vea_free_callback_t vca_cb; + void *vca_cb_args; +}; + +static int +vea_free_extent_cb(daos_handle_t ih, d_iov_t *key, d_iov_t *val, void *cb_arg) +{ + struct vea_cb_args *args = cb_arg; + struct vea_free_extent *vfe; + + vfe = (struct vea_free_extent *)val->iov_buf; + + if (args->vca_cb) + return args->vca_cb(args->vca_cb_args, vfe); + + return 0; +} + +int +vea_enumerate_free(struct vea_space_info *vsi, vea_free_callback_t cb, void *cb_arg) +{ + struct vea_cb_args args = { .vca_cb = cb, .vca_cb_args = cb_arg }; + + return dbtree_iterate(vsi->vsi_md_free_btr, 0, false, vea_free_extent_cb, &args); +} diff --git a/src/vea/vea_internal.h b/src/vea/vea_internal.h index 09e838a33c4..176f966caca 100644 --- a/src/vea/vea_internal.h +++ b/src/vea/vea_internal.h @@ -18,13 +18,6 @@ #define VEA_BLK_SZ (4 * 1024) /* 4K */ #define VEA_TREE_ODR 20 -/* Common free extent structure for both SCM & in-memory index */ -struct vea_free_extent { - uint64_t vfe_blk_off; /* Block offset of the extent */ - uint32_t vfe_blk_cnt; /* Total blocks of the extent */ - uint32_t vfe_age; /* Monotonic timestamp */ -}; - /* Min bitmap allocation class */ #define VEA_MIN_BITMAP_CLASS 1 /* Max bitmap allocation class */ diff --git a/src/vos/vos_common.c b/src/vos/vos_common.c index de246dab654..b2322c9df2c 100644 --- a/src/vos/vos_common.c +++ b/src/vos/vos_common.c @@ -981,7 +981,7 @@ vos_self_fini(void) #define LMMDB_PATH "/var/daos/" int -vos_self_init(const char *db_path, bool use_sys_db, int tgt_id) +vos_self_init_ext(const char *db_path, bool use_sys_db, int tgt_id, bool nvme_init) { char *evt_mode; int rc = 0; @@ -1006,9 +1006,11 @@ vos_self_init(const char *db_path, bool use_sys_db, int tgt_id) goto out; } #endif - rc = vos_self_nvme_init(db_path); - if (rc) - goto failed; + if (nvme_init) { + rc = vos_self_nvme_init(db_path); + if (rc) + goto failed; + } rc = vos_mod_init(); if (rc) @@ -1064,3 +1066,9 @@ vos_self_init(const char *db_path, bool use_sys_db, int tgt_id) D_MUTEX_UNLOCK(&self_mode.self_lock); return rc; } + +int +vos_self_init(const char *db_path, bool use_sys_db, int tgt_id) +{ + return vos_self_init_ext(db_path, use_sys_db, tgt_id, true); +} diff --git a/src/vos/vos_pool.c b/src/vos/vos_pool.c index dd2ce0c6f3a..611d9adfe19 100644 --- a/src/vos/vos_pool.c +++ b/src/vos/vos_pool.c @@ -1447,7 +1447,8 @@ vos_pool_open_metrics(const char *path, uuid_t uuid, unsigned int flags, void *m vos_pool_decref(pool); return -DER_BUSY; } - if ((flags & VOS_POF_EXCL) || pool->vp_excl) { + if (!(flags & VOS_POF_FOR_CHECK_QUERY) && + ((flags & VOS_POF_EXCL) || pool->vp_excl)) { vos_pool_decref(pool); return -DER_BUSY; } @@ -1616,6 +1617,13 @@ vos_pool_query(daos_handle_t poh, vos_pool_info_t *pinfo) pinfo->pif_cont_nr = pool_df->pd_cont_nr; pinfo->pif_gc_stat = pool->vp_gc_stat_global; + /* + * NOTE: The chk_pool_info::cpi_statistics contains the inconsistency statistics during + * phase range [CSP_DTX_RESYNC, CSP_AGGREGATION] for the pool shard on the target. + * Related information will be filled in subsequent CR project milestone. + */ + memset(&pinfo->pif_chk, 0, sizeof(pinfo->pif_chk)); + rc = vos_space_query(pool, &pinfo->pif_space, true); if (rc) D_ERROR("Query pool "DF_UUID" failed. "DF_RC"\n", diff --git a/utils/completion/daos.bash b/utils/completion/daos.bash index ad62305aa8f..1d3f498e381 100644 --- a/utils/completion/daos.bash +++ b/utils/completion/daos.bash @@ -25,5 +25,6 @@ _daos_control_comp() # these commands take advantage of automatic completion complete -F _daos_control_comp dmg -o nospace complete -F _daos_control_comp daos -o nospace +complete -F _daos_control_comp ddb -o nospace complete -F _daos_control_comp daos_agent complete -F _daos_control_comp daos_server diff --git a/utils/cq/words.dict b/utils/cq/words.dict index 3d1fc6f9376..c5ca4dd5d41 100644 --- a/utils/cq/words.dict +++ b/utils/cq/words.dict @@ -63,6 +63,7 @@ Uncomment VMD XUnitResult acl +adminexcluded akey akeys allocator @@ -100,6 +101,7 @@ centric chdir checksum chgrp +chk chmod chown chowned @@ -129,6 +131,7 @@ datamover dataset dbench dcp +ddb ddict dealloc debian @@ -171,6 +174,7 @@ epcrange epilog errored ethernet +fallocate fchmod fcntl filename @@ -224,6 +228,7 @@ iod iodepth ioengine ior +ioreqs iotype iov ip @@ -381,6 +386,7 @@ rpaths rpc rpms rsvc +rsync runtime runuser rw @@ -498,6 +504,7 @@ vm vos wildcard wipefs +xargs xattr xattrs xfer diff --git a/utils/cr_demo/demo_utils.py b/utils/cr_demo/demo_utils.py new file mode 100644 index 00000000000..1007ce77e24 --- /dev/null +++ b/utils/cr_demo/demo_utils.py @@ -0,0 +1,379 @@ +""" + (C) Copyright 2023 Intel Corporation. + + SPDX-License-Identifier: BSD-2-Clause-Patent +""" +import subprocess # nosec + +import yaml + + +# Storage-related methods +def format_storage(host_list): + """Call dmg storage format. + + Args: + host_list (str): List of hosts to format. + """ + format_cmd = ["dmg", "storage", "format", "--host-list=" + host_list] + run_command(command=format_cmd) + + +def storage_query_usage(host_list): + """Call dmg storage query usage. + + Args: + host_list (str): List of hosts to query. + """ + storage_query_cmd = ["dmg", "storage", "query", "usage", "--host-list=" + host_list] + run_command(command=storage_query_cmd) + + +# Pool-related methods +def create_pool(pool_size, pool_label, ranks=None, nsvc=None): + """Call dmg pool create. + + Args: + pool_size (str): Pool size. + pool_label (str): Pool label. + ranks (str): Ranks to create pool. Defaults to None. + nsvc (str): Number of service replicas. Defaults to None. + """ + create_pool_cmd = ["dmg", "pool", "create", pool_label, "--size=" + pool_size] + if ranks: + create_pool_cmd.append("--ranks=" + ranks) + if nsvc: + create_pool_cmd.append("--nsvc=" + nsvc) + run_command(command=create_pool_cmd) + + +def list_pool(verbose=False, json=False, no_query=False): + """Call dmg pool list. + + Args: + verbose (bool): Whether to use --verbose. Defaults to False. + json (bool): Whether to use --json. If used, verbose value would be irrelevant. + Defaults to False. + no_query (bool): Whether to use --no-query. Defaults to False. + + Returns: + str: If --json is used, return stdout. Otherwise None. + + """ + list_pool_cmd = ["dmg", "pool", "list"] + if json: + list_pool_cmd.append("--json") + if verbose: + list_pool_cmd.append("--verbose") + if no_query: + list_pool_cmd.append("--no-query") + command = " ".join(list_pool_cmd) + print(f"Command: {command}") + + if json: + result = subprocess.run( + list_pool_cmd, stdout=subprocess.PIPE, universal_newlines=True, check=False) + return result.stdout + + subprocess.run(list_pool_cmd, check=False) + return None + + +def pool_get_prop(pool_label, properties): + """Call dmg pool get-prop + + Args: + pool_label (str): Pool label. + properties (str): Properties to query. Separate them with comma if there are + multiple properties. + """ + get_prop_cmd = ["dmg", "pool", "get-prop", pool_label, properties] + run_command(command=get_prop_cmd) + + +def pool_query(pool_label): + """Call dmg pool query + + Args: + pool_label (str): Pool label. + """ + pool_query_cmd = ["dmg", "pool", "query", pool_label] + run_command(command=pool_query_cmd) + + +# Container-related methods +def create_container(pool_label, cont_label): + """Call daos container create. + + Args: + pool_label (str): Pool label. + cont_label (str): Container label. + """ + cont_create_cmd = ["daos", "container", "create", pool_label, cont_label] + run_command(command=cont_create_cmd) + + +def cont_get_prop(pool_label, cont_label, properties=None): + """Call daos container get-prop + + Args: + pool_label (str): Pool label. + cont_label (str): Container label. + properties (str): Properties to query. Separate them with comma if there are + multiple properties. Defaults to None. + """ + get_prop_cmd = ["daos", "container", "get-prop", pool_label, cont_label] + if properties: + get_prop_cmd.append("--properties=" + properties) + run_command(command=get_prop_cmd) + + +# Fault-related methods +def inject_fault_mgmt(pool_label, fault_type): + """Call dmg faults mgmt-svc to inject fault. + + Args: + pool_label (str): Pool label. + fault_type (str): Fault type. + """ + inject_fault_cmd = ["dmg", "faults", "mgmt-svc", "pool", pool_label, fault_type] + run_command(command=inject_fault_cmd) + + +def inject_fault_pool(pool_label, fault_type): + """Call dmg faults pool-svc to inject fault. + + Args: + pool_label (str): Pool label. + fault_type (str): Fault type. + """ + inject_fault_cmd = ["dmg", "faults", "pool-svc", pool_label, fault_type] + run_command(command=inject_fault_cmd) + + +def inject_fault_daos(pool_label, cont_label, fault_type): + """Call daos faults to inject fault. + + Args: + pool_label (str): Pool label. + cont_label (str): Container label. + fault_type (str): Fault type. + """ + location = "--location=" + fault_type + inject_fault_cmd = ["daos", "faults", "container", pool_label, cont_label, location] + run_command(command=inject_fault_cmd) + + +# Check-related methods +def check_enable(): + """Call dmg check enable""" + check_enable_cmd = ["dmg", "check", "enable"] + run_command(command=check_enable_cmd) + + +def check_set_policy(reset_defaults=False, all_interactive=False): + """Call dmg check set-policy with --reset-defaults or --all-interactive. + + Args: + reset_defaults (bool): Set all policies to their default action. Defaults to + False. + all_interactive (bool): Set all policies to interactive. Defaults to False. + """ + if reset_defaults != all_interactive: + check_set_policy_cmd = ["dmg", "check", "set-policy"] + if reset_defaults: + check_set_policy_cmd.append("--reset-defaults") + if all_interactive: + check_set_policy_cmd.append("--all-interactive") + run_command(command=check_set_policy_cmd) + + +def check_start(policies=None): + """Call dmg check start + + Args: + policies (str): Repair policies such as POOL_BAD_LABEL:CIA_INTERACT + """ + check_start_cmd = ["dmg", "check", "start"] + if policies: + check_start_cmd.extend(["-p", policies]) + run_command(command=check_start_cmd) + + +def check_query(json=False): + """Call dmg check query + + Args: + json (bool): Whether to use --json. Defaults to False. + + Returns: + str: If --json is used, return stdout. Otherwise None. + + """ + if json: + check_query_cmd = ["dmg", "--json", "check", "query"] + else: + check_query_cmd = ["dmg", "check", "query"] + command = " ".join(check_query_cmd) + print(f"Command: {command}") + + if json: + result = subprocess.run( + check_query_cmd, stdout=subprocess.PIPE, universal_newlines=True, check=False) + return result.stdout + + subprocess.run(check_query_cmd, check=False) + return None + + +def check_disable(): + """Call dmg check disable""" + check_disable_cmd = ["dmg", "check", "disable"] + run_command(command=check_disable_cmd) + + +def repeat_check_query(): + """Allow user to repeatedly call dmg check query.""" + while True: + user_input = input("Hit y to query, n to proceed to next step: ") + if user_input == "y": + check_query() + elif user_input == "n": + break + else: + print("Please enter y or n.") + + +def check_repair(sequence_num, action): + """Call dmg check repair + + Args: + sequence_num (str): Sequence number for repair action. + action (str): Repair action number. + """ + check_repair_cmd = ["dmg", "check", "repair", sequence_num, action] + run_command(command=check_repair_cmd) + + +# System-related methods +def system_stop(force=False): + """Stop servers. + + Args: + force (bool): Whether to use --force. Defaults to None. + """ + system_stop_cmd = ["dmg", "system", "stop"] + if force: + system_stop_cmd.append("--force") + run_command(command=system_stop_cmd) + + +def system_start(): + """Start servers.""" + system_start_cmd = ["dmg", "system", "start"] + run_command(command=system_start_cmd) + + +def system_query(json=False, verbose=False): + """Call dmg system query + + Args: + json (bool): Whether to use --json. Defaults to False. + verbose (bool): Whether to use --verbose. Defaults to False. + + Returns: + str: Command output. + + """ + if json: + system_query_cmd = ["dmg", "--json", "system", "query"] + else: + system_query_cmd = ["dmg", "system", "query"] + if verbose: + system_query_cmd.append("--verbose") + command = " ".join(system_query_cmd) + print(f"Command: {command}") + + if json: + result = subprocess.run( + system_query_cmd, stdout=subprocess.PIPE, universal_newlines=True, + check=False) + return result.stdout + + subprocess.run(system_query_cmd, check=False) + return None + + +# Utility methods +def create_uuid_to_seqnum(): + """Create pool UUID to sequence number mapping. + + Returns: + dict: UUID to sequence number mapping for each pool. Sequence number will be used + during repair. + + """ + uuid_to_seqnum = {} + stdout = check_query(json=True) + generated_yaml = yaml.safe_load(stdout) + for report in generated_yaml["response"]["reports"]: + uuid_to_seqnum[report["pool_uuid"]] = report["seq"] + + return uuid_to_seqnum + + +def create_label_to_uuid(): + """Create label to UUID mapping. + + Returns: + dict: Pool label to UUID. + + """ + label_to_uuid = {} + stdout = list_pool(json=True) + generated_yaml = yaml.safe_load(stdout) + for pool in generated_yaml["response"]["pools"]: + label_to_uuid[pool["label"]] = pool["uuid"] + + return label_to_uuid + + +def get_current_labels(): + """Get current pool labels from MS. + + Returns: + list: Current pool labels. + + """ + pool_labels = [] + stdout = list_pool(json=True) + generated_yaml = yaml.safe_load(stdout) + for pool in generated_yaml["response"]["pools"]: + pool_labels.append(pool["label"]) + + return pool_labels + + +def convert_list_to_str(original_list, separator): + """Convert given list to a string with each item separated by separator. + + Args: + original_list (list): List of items. + separator (str): Separator to separate each item in the new string list. + + Returns: + str: String list. + + """ + return separator.join(map(str, original_list)) + + +def run_command(command): + """Print given command and run. + + Args: + command (list): List of characters that make up the command. + """ + cmd_str = " ".join(command) + print(f"Command: {cmd_str}") + subprocess.run(command, check=False) diff --git a/utils/cr_demo/run_demo_aurora.py b/utils/cr_demo/run_demo_aurora.py new file mode 100644 index 00000000000..c7f1962c2be --- /dev/null +++ b/utils/cr_demo/run_demo_aurora.py @@ -0,0 +1,434 @@ +""" + (C) Copyright 2023 Intel Corporation. + + SPDX-License-Identifier: BSD-2-Clause-Patent +""" +import argparse +import re +import subprocess # nosec +import time +from collections import defaultdict + +import yaml +from ClusterShell.NodeSet import NodeSet +from demo_utils import (check_disable, check_enable, check_repair, check_set_policy, check_start, + cont_get_prop, convert_list_to_str, create_container, create_pool, + create_uuid_to_seqnum, format_storage, inject_fault_daos, + inject_fault_mgmt, inject_fault_pool, list_pool, pool_get_prop, + repeat_check_query, storage_query_usage, system_query, system_start, + system_stop) + +# Run this script on Aurora node as user. e.g., +# python3 run_demo_aurora.py -l aurora-daos-[0001-0100] + +TEST_CMD = "sudo date" +test_cmd_list = TEST_CMD.split(" ") +print(f"Check sudo works by calling: {TEST_CMD}") +subprocess.run(test_cmd_list, check=False) + +POOL_SIZE = "5T" +POOL_SIZE_F5 = "3T" +POOL_LABEL = "tank" +CONT_LABEL = "bucket" +# Number of seconds to wait for engines to start for 1 group setup. +FORMAT_SLEEP_SEC = 35 + +print("\nF1: Dangling pool") +print("F2: Lost the majority of pool service replicas") +print("F3: Orphan pool") +print("F4: Inconsistent pool label between MS and PS") +print("F5: Orphan pool shard") +print("F6: Dangling pool map") +print("F7: Orphan container") +print("F8: Inconsistent container label between CS and container property") + +PARSER = argparse.ArgumentParser() +PARSER.add_argument( + "-l", "--hostlist", required=True, help="List of hosts to run the demo") +ARGS = vars(PARSER.parse_args()) + +HOSTLIST = ARGS["hostlist"] + +print(f"\n1. Format storage on {HOSTLIST}.") +format_storage(host_list=HOSTLIST) + +print(f"\nWait for {FORMAT_SLEEP_SEC} sec for format...") +time.sleep(FORMAT_SLEEP_SEC) + +# Call dmg system query to obtain the IP address of necessary ranks. +rank_to_ip = {} +stdout = system_query(json=True) +# Printing system query output helps, but the output will be long if there are many ranks. +# print(f"dmg system query stdout = {stdout}") +generated_yaml = yaml.safe_load(stdout) +RANK_COUNT = 0 +JOINED_COUNT = 0 +for member in generated_yaml["response"]["members"]: + rank_to_ip[member["rank"]] = member["addr"].split(":")[0] + RANK_COUNT += 1 + if member["state"] == "joined": + JOINED_COUNT += 1 +# Print the number of ranks and joined ranks as a reference. +node_set = NodeSet(HOSTLIST) +hostlist = list(node_set) +print(f"\n{len(hostlist)} nodes; {RANK_COUNT} ranks; {JOINED_COUNT} joined") + +# Create rank to mount point map and host to ranks map for F2 and F5. +# 1. scp daos_control.log from all nodes to here, where this script runs. scp the local +# file as well. Add hostname to the end of the file name. The log contains rank and PID. +# Number of nodes used for F2. +NODE_COUNT = 2 +for i in range(NODE_COUNT): + scp_cmd_list = ["scp", f"{hostlist[i]}:/var/tmp/daos_testing/daos_control.log", + f"/var/tmp/daos_testing/daos_control_{hostlist[i]}.log"] + subprocess.run(scp_cmd_list, check=False) + +# 2. Determine the rank to PID mapping from the control logs. In addition, determine the +# host to ranks mapping for creating the pool. We need to know the four ranks for the +# first two nodes. We'll use many nodes in Aurora, but only two nodes for F2. +rank_to_pid = {} +host_to_ranks = defaultdict(list) +SEARCH_STR = r"DAOS I/O Engine.*process (\d+) started on rank (\d+)" +for i in range(NODE_COUNT): + with open( + f"/var/tmp/daos_testing/daos_control_{hostlist[i]}.log", "r", + encoding="utf-8") as file: + for line in file: + match = re.findall(SEARCH_STR, line) + if match: + print(match) + pid = int(match[0][0]) + rank = int(match[0][1]) + rank_to_pid[rank] = pid + host_to_ranks[hostlist[i]].append(rank) + +# 3. Determine the PID to mount point mapping by calling ps ax and search for daos_engine. +# Sample line: +# 84877 ? SLl 102:04 /usr/bin/daos_engine -t 8 -x 1 -g daos_server -d +# /var/run/daos_server -T 2 -n /mnt/daos1/daos_nvme.conf -p 1 -I 1 -r 8192 -H 2 -s +# /mnt/daos1 +pid_to_mount = {} +MOUNT_0 = "/mnt/daos0" +MOUNT_1 = "/mnt/daos1" +for i in range(NODE_COUNT): + clush_ps_ax = ["clush", "-w", hostlist[i], "ps ax"] + result = subprocess.check_output(clush_ps_ax) + result_list = result.decode("utf-8").split("\n") + for result in result_list: + if "daos_engine" in result: + print(result) + if MOUNT_0 in result: + pid = re.split(r"\s+", result)[1] + pid = int(pid) + pid_to_mount[pid] = MOUNT_0 + elif MOUNT_1 in result: + pid = re.split(r"\s+", result)[1] + pid = int(pid) + pid_to_mount[pid] = MOUNT_1 + +# 4. Determine the four ranks in hostlist[0] and hostlist[1] to create F2 pool. +f2_ranks = [] +f2_ranks.extend(host_to_ranks[hostlist[0]]) +f2_ranks.extend(host_to_ranks[hostlist[1]]) +# Ranks in the map are int, so convert them to string and separate them with comma. +F2_RANKS_STR = convert_list_to_str(original_list=f2_ranks, separator=",") + +# 5. Determine the two ranks in hostlist[0] to create F5 pool. +f5_ranks = [] +f5_ranks.extend(host_to_ranks[hostlist[0]]) +# Ranks in the map are int, so convert them to string and separate them with comma. +F5_RANKS_STR = convert_list_to_str(original_list=f5_ranks, separator=",") + +# Add input here to make sure all ranks are joined before starting the script. +input("\n2. Create 8 pools and containers. Hit enter...") +POOL_LABEL_1 = POOL_LABEL + "_F1" +POOL_LABEL_2 = POOL_LABEL + "_F2" +POOL_LABEL_3 = POOL_LABEL + "_F3" +POOL_LABEL_4 = POOL_LABEL + "_F4" +POOL_LABEL_5 = POOL_LABEL + "_F5" +POOL_LABEL_6 = POOL_LABEL + "_F6" +POOL_LABEL_7 = POOL_LABEL + "_F7" +POOL_LABEL_8 = POOL_LABEL + "_F8" +CONT_LABEL_7 = CONT_LABEL + "_F7" +CONT_LABEL_8 = CONT_LABEL + "_F8" + +# F1. CIC_POOL_NONEXIST_ON_ENGINE - dangling pool +create_pool(pool_size=POOL_SIZE, pool_label=POOL_LABEL_1) +# F2. CIC_POOL_LESS_SVC_WITHOUT_QUORUM +create_pool(pool_size=POOL_SIZE, pool_label=POOL_LABEL_2, ranks=F2_RANKS_STR, nsvc="3") +# F3. CIC_POOL_NONEXIST_ON_MS - orphan pool +create_pool(pool_size=POOL_SIZE, pool_label=POOL_LABEL_3) +# F4. CIC_POOL_BAD_LABEL - inconsistent pool label between MS and PS +create_pool(pool_size=POOL_SIZE, pool_label=POOL_LABEL_4) +# F5. CIC_ENGINE_NONEXIST_IN_MAP - orphan pool shard +create_pool(pool_size=POOL_SIZE_F5, pool_label=POOL_LABEL_5, ranks=F5_RANKS_STR) +# F6. CIC_ENGINE_HAS_NO_STORAGE - dangling pool map +create_pool(pool_size=POOL_SIZE, pool_label=POOL_LABEL_6) +# F7. CIC_CONT_NONEXIST_ON_PS - orphan container +create_pool(pool_size=POOL_SIZE, pool_label=POOL_LABEL_7) +create_container(pool_label=POOL_LABEL_7, cont_label=CONT_LABEL_7) +print() +# F8. CIC_CONT_BAD_LABEL +create_pool(pool_size=POOL_SIZE, pool_label=POOL_LABEL_8) +create_container(pool_label=POOL_LABEL_8, cont_label=CONT_LABEL_8) + +print("(Create label to UUID mapping and obtain service replicas for F2.)") +label_to_uuid = {} +f2_service_replicas = [] +stdout = list_pool(json=True) +generated_yaml = yaml.safe_load(stdout) +for pool in generated_yaml["response"]["pools"]: + label_to_uuid[pool["label"]] = pool["uuid"] + # Collect service replicas for F2. + if pool["label"] == POOL_LABEL_2: + f2_service_replicas = pool["svc_reps"] + +print(f"\n(F2 service replicas = {f2_service_replicas})") + +print(f"\n3-F5. Print storage usage to show original usage of {POOL_LABEL_5}. " + f"Pool is created on {hostlist[0]}.") +# F5 pool is created on hostlist[0] ranks, but we'll copy the pool dir from there to one +# of the ranks in hostlist[1], so show both. +f5_host_list = f"{hostlist[0]},{hostlist[1]}" +storage_query_usage(host_list=f5_host_list) + +print("\n4. Inject fault with dmg for F1, F3, F4, F7, F8.") +# F1 +inject_fault_pool(pool_label=POOL_LABEL_1, fault_type="CIC_POOL_NONEXIST_ON_ENGINE") + +# F3 +inject_fault_mgmt(pool_label=POOL_LABEL_3, fault_type="CIC_POOL_NONEXIST_ON_MS") + +# F4 +inject_fault_mgmt(pool_label=POOL_LABEL_4, fault_type="CIC_POOL_BAD_LABEL") + +# F7 +inject_fault_daos( + pool_label=POOL_LABEL_7, cont_label=CONT_LABEL_7, fault_type="DAOS_CHK_CONT_ORPHAN") + +# F8 +inject_fault_daos( + pool_label=POOL_LABEL_8, cont_label=CONT_LABEL_8, + fault_type="DAOS_CHK_CONT_BAD_LABEL") + +input("\n5-1. Stop servers to manipulate for F2, F5, F6, F7. Hit enter...") +system_stop(force=True) + +# F2: Destroy tank_2 rdb-pool on two of the three service replicas. Call them rank a and +# b. Select the first two service replicas. +svc_rep_a = f2_service_replicas[0] +svc_rep_b = f2_service_replicas[1] +rank_a_ip = rank_to_ip[svc_rep_a] +rank_b_ip = rank_to_ip[svc_rep_b] +rank_a_mount = pid_to_mount[rank_to_pid[svc_rep_a]] +rank_b_mount = pid_to_mount[rank_to_pid[svc_rep_b]] +rm_rank_a = f"sudo rm {rank_a_mount}/{label_to_uuid[POOL_LABEL_2]}/rdb-pool" +rm_rank_b = f"sudo rm {rank_b_mount}/{label_to_uuid[POOL_LABEL_2]}/rdb-pool" +clush_rm_rank_a = ["clush", "-w", rank_a_ip, rm_rank_a] +clush_rm_rank_b = ["clush", "-w", rank_b_ip, rm_rank_b] +print("(F2: Destroy tank_F2 rdb-pool on rank a and b.)") +print(f"Command for rank a: {clush_rm_rank_a}\n") +print(f"Command for rank b: {clush_rm_rank_b}\n") +subprocess.run(clush_rm_rank_a, check=False) +subprocess.run(clush_rm_rank_b, check=False) + +# F5: Copy tank_5 pool directory from /mnt/daos1 in hostlist[0] to /mnt/daos0 in +# hostlist[1]. Match owner. (Mount points are arbitrary.) +# In order to copy the pool directory without password, there are two things to set up. +# 1. Since we're running rsync as user, update the mode of the source pool directory as +# below. +# Set 777 for /mnt/daos1 and /mnt/daos1//* i.e., +# chmod 777 /mnt/daos1; chmod -R 777 /mnt/daos1/ +# 2. Update mode of the destination mount point to 777. e.g., +# clush -w "sudo chmod 777 /mnt/daos0" + +# Alternatively, we can generate public-private key pair for root and call scp with sudo. +# Then we don't need to do step 2 (update mode to 777). + +print("(F5: Update mode of the source pool directory.)") +pool_uuid_5 = label_to_uuid[POOL_LABEL_5] +chmod_cmd = f"sudo chmod 777 /mnt/daos1; sudo chmod -R 777 /mnt/daos1/{pool_uuid_5}" +clush_chmod_cmd = ["clush", "-w", hostlist[0], chmod_cmd] +print(f"Command: {clush_chmod_cmd}\n") +subprocess.run(clush_chmod_cmd, check=False) + +print("(F5: Update mode of the destination mount point.)") +CHMOD_CMD = "sudo chmod 777 /mnt/daos0" +clush_chmod_cmd = ["clush", "-w", hostlist[1], CHMOD_CMD] +print(f"Command: {clush_chmod_cmd}\n") +subprocess.run(clush_chmod_cmd, check=False) + +# Since we're sending each file (vos-0 to 15 + rdb-pool) one at a time rather than the +# whole pool directory, we need to create the destination fake pool directory first. +print("(F5: Create a fake pool directory at the destination mount point.)") +mkdir_cmd = f"sudo mkdir /mnt/daos0/{pool_uuid_5}" +clush_mkdir_cmd = ["clush", "-w", hostlist[1], mkdir_cmd] +print(f"Command: {clush_mkdir_cmd}\n") +subprocess.run(clush_mkdir_cmd, check=False) + +print("(F5: Update mode of the fake pool directory at destination.)") +chmod_cmd = f"sudo chmod 777 /mnt/daos0/{pool_uuid_5}" +clush_chmod_cmd = ["clush", "-w", hostlist[1], chmod_cmd] +print(f"Command: {clush_chmod_cmd}\n") +subprocess.run(clush_chmod_cmd, check=False) + +# Run the following xargs + rsync command on hostlist[0] using clush: +# ls /mnt/daos1/ | xargs --max-procs=16 -I% \ +# rsync -avz /mnt/daos1//% hostlist[1]:/mnt/daos0/ + +# 1. The initial ls command lists the content of the pool directory, which contains 16 vos +# files (because there are 16 targets) and rdb-pool file. +# 2. By using xargs, each item of the ls output is passed into rsync and the rsync +# commands are executed in parallel. i.e., each file is sent by separate rsync process in +# parallel. + +# * We use --max-procs=16 to support at most 16 rsync processes to run in parallel. +# * -I% means replace % in the following rsync command by the output of ls. i.e., file +# name. +# * rsync -avz means archive, verbose, and compress. By using compress, we can +# significantly reduce the size of the data and the transfer time. +# * By running rsync in parallel, we can significantly reduce the transfer time. e.g., For +# a 2TB pool with 8 targets per engine, each vos file size is about 7G (rdb-pool is +# smaller). If we run a simple rsync, which runs serially, it takes 1 min 50 sec. +# However, if we run them in parallel, it's reduced to 24 sec. +print(f"(F5: Copy pool directory from {hostlist[0]} to {hostlist[1]}.)") +xargs_rsync_cmd = (f"ls /mnt/daos1/{pool_uuid_5} | xargs --max-procs=16 -I% " + f"rsync -avz /mnt/daos1/{pool_uuid_5}/% " + f"{hostlist[1]}:/mnt/daos0/{pool_uuid_5}") +clush_xargs_rsync_cmd = ["clush", "-w", hostlist[0], xargs_rsync_cmd] +print(f"Command: {clush_xargs_rsync_cmd}\n") +subprocess.run(clush_xargs_rsync_cmd, check=False) + +print("(F5: Set owner for the copied dir and files to daos_server:daos_server.)") +chown_cmd = f"sudo chown -R daos_server:daos_server /mnt/daos0/{pool_uuid_5}" +clush_chown_cmd = ["clush", "-w", hostlist[1], chown_cmd] +print(f"Command: {clush_chown_cmd}\n") +subprocess.run(clush_chown_cmd, check=False) + +print("(F6: Remove vos-0 from one of the nodes.)") +pool_uuid_6 = label_to_uuid[POOL_LABEL_6] +rm_cmd = f"sudo rm -rf /mnt/daos0/{pool_uuid_6}/vos-0" +# Remove vos-0 from /mnt/daos0 in rank 0 node. Note that /mnt/daos0 may not be mapped to +# rank 0. Rank 0 is mapped to either daos0 or daos1. However, we don't care for the +# purpose of testing dangling pool map. +clush_rm_cmd = ["clush", "-w", rank_to_ip[0], rm_cmd] +print(f"Command: {clush_rm_cmd}\n") +subprocess.run(clush_rm_cmd, check=False) + +print("F7: Use ddb to show that the container is left in shards.") +pool_uuid_7 = label_to_uuid[POOL_LABEL_7] +# Run ddb on /mnt/daos0 of rank 0 node. +ddb_cmd = f"sudo ddb /mnt/daos0/{pool_uuid_7}/vos-0 ls" +# ddb with clush causes some authentication error. tank_F7 is created across all ranks, so +# just run ddb locally as a workaround. +ddb_cmd_list = ddb_cmd.split(" ") +print(f"Command: {ddb_cmd}") +subprocess.run(ddb_cmd_list, check=False) + +# (optional) F3: Show pool directory at mount point to verify that the pool exists on +# engine. + +print("\n5-2. Restart servers.") +system_start() + +input("\n6. Show the faults injected for each pool/container for F1, F3, F4, F5, F8. " + "Hit enter...") +print(f"6-F1. Show dangling pool entry for {POOL_LABEL_1}.") +# F3 part 1 +print(f"6-F3. MS doesn't recognize {POOL_LABEL_3}.") +# F4 part 1 +print(f"6-F4-1. Label ({POOL_LABEL_4}) in MS is corrupted with -fault added.") +list_pool(no_query=True) + +# F2: (optional) Try to create a container, which will hang. + +# F4 part 2 +print(f"\n6-F4-2. Label ({POOL_LABEL_4}) in PS is still original.") +POOL_LABEL_4_FAULT = POOL_LABEL_4 + "-fault" +pool_get_prop(pool_label=POOL_LABEL_4_FAULT, properties="label") + +# F5: Call dmg storage query usage to show that the pool is using more space. +print(f"\n6-F5. Print storage usage to show that {POOL_LABEL_5} is using more space. " + f"Pool directory is copied to {hostlist[1]}.") +storage_query_usage(host_list=f5_host_list) + +# F8: Show inconsistency by getting the container label. +print("\n6-F8. Show container label inconsistency.") +cont_get_prop(pool_label=POOL_LABEL_8, cont_label=CONT_LABEL_8) +print(f"Error because container ({CONT_LABEL_8}) doesn't exist on container service.\n") + +print(f"Container ({CONT_LABEL_8}) exists on property.") +cont_get_prop(pool_label=POOL_LABEL_8, cont_label="new-label", properties="label") + +input("\n7. Enable checker. Hit enter...") +system_stop(force=True) +check_enable() + +input("\n8. Start checker with interactive mode. Hit enter...") +check_set_policy(all_interactive=True) +print() +check_start() +print() +repeat_check_query() + +input("\n8-1. Select repair options for F1 to F4. Hit enter...") +print("(Create UUID to sequence number.)") +uuid_to_seqnum = create_uuid_to_seqnum() +SEQ_NUM_1 = str(hex(uuid_to_seqnum[label_to_uuid[POOL_LABEL_1]])) +SEQ_NUM_2 = str(hex(uuid_to_seqnum[label_to_uuid[POOL_LABEL_2]])) +SEQ_NUM_3 = str(hex(uuid_to_seqnum[label_to_uuid[POOL_LABEL_3]])) +SEQ_NUM_4 = str(hex(uuid_to_seqnum[label_to_uuid[POOL_LABEL_4]])) +SEQ_NUM_5 = str(hex(uuid_to_seqnum[label_to_uuid[POOL_LABEL_5]])) +SEQ_NUM_6 = str(hex(uuid_to_seqnum[label_to_uuid[POOL_LABEL_6]])) +SEQ_NUM_7 = str(hex(uuid_to_seqnum[label_to_uuid[POOL_LABEL_7]])) +SEQ_NUM_8 = str(hex(uuid_to_seqnum[label_to_uuid[POOL_LABEL_8]])) + +# F1: 1: Discard the dangling pool entry from MS [suggested]. +print(f"\n{POOL_LABEL_1} - 1: Discard the dangling pool entry from MS [suggested].") +check_repair(sequence_num=SEQ_NUM_1, action="1") + +# F2: 2: Start pool service under DICTATE mode from rank 1 [suggested]. +print(f"\n{POOL_LABEL_2} - 2: Start pool service under DICTATE mode from rank 1 " + f"[suggested].") +check_repair(sequence_num=SEQ_NUM_2, action="2") + +# F3:2: Re-add the orphan pool back to MS [suggested]. +print(f"\n{POOL_LABEL_3} - 2: Re-add the orphan pool back to MS [suggested].") +check_repair(sequence_num=SEQ_NUM_3, action="2") + +# F4: 2: Trust PS pool label. +print(f"\n{POOL_LABEL_4} - 2: Trust PS pool label.") +check_repair(sequence_num=SEQ_NUM_4, action="2") + +print() +# Call dmg check query until n is entered. +repeat_check_query() + +input("\n8-2. Select repair options for F5 to F8. Hit enter...") +# F5: 1: Discard the orphan pool shard to release space [suggested]. +print(f"\n{POOL_LABEL_5} - 1: Discard the orphan pool shard to release space " + f"[suggested].") +check_repair(sequence_num=SEQ_NUM_5, action="1") + +# F6: 1: Change pool map for the dangling map entry [suggested]. +print(f"\n{POOL_LABEL_6} - 1: Change pool map for the dangling map entry as down " + f"[suggested].") +check_repair(sequence_num=SEQ_NUM_6, action="1") + +# F7: 1: Destroy the orphan container to release space [suggested]. +print(f"\n{POOL_LABEL_7} - 1: Destroy the orphan container to release space [suggested].") +check_repair(sequence_num=SEQ_NUM_7, action="1") + +# F8: 2: Trust the container label in container property. +print(f"\n{POOL_LABEL_8} - 2: Trust the container label in container property.") +check_repair(sequence_num=SEQ_NUM_8, action="2") + +print() +# Call dmg check query until n is entered. +repeat_check_query() + +print("\n9. Disable the checker.") +check_disable() +system_start() + +print("\nRun show_fixed_aurora.py to show the issues fixed...") diff --git a/utils/cr_demo/show_fixed_aurora.py b/utils/cr_demo/show_fixed_aurora.py new file mode 100644 index 00000000000..6271023ac13 --- /dev/null +++ b/utils/cr_demo/show_fixed_aurora.py @@ -0,0 +1,117 @@ +""" + (C) Copyright 2023 Intel Corporation. + + SPDX-License-Identifier: BSD-2-Clause-Patent +""" +import argparse +import subprocess # nosec + +import yaml +from ClusterShell.NodeSet import NodeSet +from demo_utils import (cont_get_prop, create_container, list_pool, pool_get_prop, pool_query, + storage_query_usage, system_query, system_stop) + +# Run this script on Aurora node as user after running run_demo_aurora.py. E.g., +# python3 show_fixed_aurora.py -l aurora-daos-[0001-0100] + +TEST_CMD = "sudo date" +test_cmd_list = TEST_CMD.split(" ") +print(f"Check sudo works by calling: {TEST_CMD}") +subprocess.run(test_cmd_list, check=False) + +POOL_LABEL = "tank" +CONT_LABEL = "bucket" +TARGET_PER_RANK = 16 + +PARSER = argparse.ArgumentParser() +PARSER.add_argument( + "-l", "--hostlist", required=True, help="List of hosts used for run_demo.py") +ARGS = vars(PARSER.parse_args()) +HOSTLIST = ARGS["hostlist"] +node_set = NodeSet(HOSTLIST) +hostlist = list(node_set) + +# Call dmg system query to obtain the IP address of necessary ranks. +rank_to_ip = {} +stdout = system_query(json=True) +# Printing system query output helps, but the output will be long if there are many ranks. +# print(f"dmg system query stdout = {stdout}") +generated_yaml = yaml.safe_load(stdout) +RANK_COUNT = 0 +JOINED_COUNT = 0 +for member in generated_yaml["response"]["members"]: + rank_to_ip[member["rank"]] = member["addr"].split(":")[0] + RANK_COUNT += 1 + if member["state"] == "joined": + JOINED_COUNT += 1 +# Print the number of ranks and joined ranks as a reference. +print(f"\n{RANK_COUNT} ranks; {JOINED_COUNT} joined") +TOTAL_TARGET = RANK_COUNT * TARGET_PER_RANK + +POOL_LABEL_1 = POOL_LABEL + "_F1" +POOL_LABEL_2 = POOL_LABEL + "_F2" +POOL_LABEL_3 = POOL_LABEL + "_F3" +POOL_LABEL_4 = POOL_LABEL + "_F4" +POOL_LABEL_5 = POOL_LABEL + "_F5" +POOL_LABEL_6 = POOL_LABEL + "_F6" +POOL_LABEL_7 = POOL_LABEL + "_F7" +POOL_LABEL_8 = POOL_LABEL + "_F8" +CONT_LABEL_8 = CONT_LABEL + "_F8" + +print("(Create label to UUID mapping.)") +label_to_uuid = {} +stdout = list_pool(json=True) +generated_yaml = yaml.safe_load(stdout) +for pool in generated_yaml["response"]["pools"]: + label_to_uuid[pool["label"]] = pool["uuid"] + +input("\n10. Show the issues fixed. Hit enter...") +print(f"10-F1. Dangling pool ({POOL_LABEL_1}) was removed.") +print(f"10-F3. Orphan pool ({POOL_LABEL_3}) was reconstructed.") +list_pool() + +print(f"10-F2. Create a container on {POOL_LABEL_2}. Pool can be started now, so it " + f"should succeed.") +CONT_LABEL_2 = CONT_LABEL + "_2" +create_container(pool_label=POOL_LABEL_2, cont_label=CONT_LABEL_2) +# (optional) Show that rdb-pool file in rank 0 and 2 are recovered. + +print(f"\n10-F4. Label inconsistency for {POOL_LABEL_4} was resolved. " + f"See pool list above.") +pool_get_prop(pool_label=POOL_LABEL_4, properties="label") + +# F5: Call dmg storage query usage to verify the storage was reclaimed. - Not working due +# to a bug. Instead, show that pool directory on dst node (rank 3 for 4-VM) was removed. +print(f"\n10-F5-1. Print storage usage to show that storage used by {POOL_LABEL_5} is " + f"reclaimed after pool directory is removed from {hostlist[1]}.") +f5_host_list = f"{hostlist[0]},{hostlist[1]}" +storage_query_usage(host_list=f5_host_list) + +print(f"\n10-F5-2. {label_to_uuid[POOL_LABEL_5]} pool directory on {hostlist[1]} " + f"at /mnt/daos0 was removed.") +LS_CMD = "ls /mnt/daos0" +clush_ls_cmd = ["clush", "-w", hostlist[1], LS_CMD] +print(f"Command: {clush_ls_cmd}\n") +subprocess.run(clush_ls_cmd, check=False) + +EXPECTED_TARGET = TOTAL_TARGET - 1 +print( + f"\n10-F6. {POOL_LABEL_6} has one less target ({TOTAL_TARGET} -> {EXPECTED_TARGET}).") +pool_query(pool_label=POOL_LABEL_6) +# (optional) Reintegrate rank 1 on pool 6. Wait for rebuild to finish. Then verify the +# target count. + +# F8: Verify that the inconsistency is fixed. The label is back to the original. +print(f"\n10-F8. Container label inconsistency for {CONT_LABEL_8} was fixed.") +cont_get_prop(pool_label=POOL_LABEL_8, cont_label=CONT_LABEL_8, properties="label") + +# F7: Stop server. Call the same ddb command to verify that the container is removed from +# shard. +print(f"\n10-F7. Use ddb to verify that the container in {POOL_LABEL_7} is removed " + f"from shards.") +system_stop(force=True) +pool_uuid_7 = label_to_uuid[POOL_LABEL_7] +ddb_cmd = f"sudo ddb /mnt/daos0/{pool_uuid_7}/vos-0 ls" +ddb_cmd_list = ddb_cmd.split(" ") +print(f"Command: {ddb_cmd}") +subprocess.run(ddb_cmd_list, check=False) diff --git a/utils/rpms/daos.rpmlintrc b/utils/rpms/daos.rpmlintrc index 46be7b84b9b..09022a74e59 100644 --- a/utils/rpms/daos.rpmlintrc +++ b/utils/rpms/daos.rpmlintrc @@ -20,7 +20,7 @@ addFilter("daos-client\.x86_64: E: post(i|u)n-without-ldconfig \/usr\/lib64\/lib addFilter("daos-(client|server)\.x86_64: W: dangerous-command-in-%post(un)? rm") # lots of missing manpages -addFilter("W: no-manual-page-for-binary (cart_ctl|daos_agent|dfuse|self_test|acl_dump_test|agent_tests|crt_launch|daos_debug_set_params|daos_gen_io_conf|daos_perf|daos_racer|daos_run_io_conf|daos_test|dfs_test|dfuse_test|drpc_engine_test|drpc_test|eq_tests|fault_status|hello_drpc|job_tests|jobtest|security_test|daos_firmware|daos_admin|daos_engine|daos_metrics|daos_server|daos_storage_estimator.py|evt_ctl|jump_pl_map|obj_ctl|pl_bench|rdbt|ring_pl_map|smd_ut|bio_ut|vea_stress|vea_ut|vos_perf|vos_tests|dtx_tests)") +addFilter("W: no-manual-page-for-binary (cart_ctl|daos_agent|dfuse|self_test|acl_dump_test|agent_tests|crt_launch|daos_debug_set_params|daos_gen_io_conf|daos_perf|daos_racer|daos_run_io_conf|daos_test|dfs_test|dfuse_test|drpc_engine_test|drpc_test|eq_tests|fault_status|hello_drpc|job_tests|jobtest|security_test|daos_firmware|daos_admin|daos_engine|daos_metrics|daos_server|daos_storage_estimator.py|evt_ctl|jump_pl_map|obj_ctl|pl_bench|rdbt|ring_pl_map|smd_ut|bio_ut|vea_stress|vea_ut|vos_perf|vos_tests|dtx_tests|ddb|ddb_tests)") addFilter("daos-(server|firmware)\.x86_64: W: non-standard-(u|g)id \/.+ daos_server") diff --git a/utils/rpms/daos.spec b/utils/rpms/daos.spec index 3e3e339fa77..824c7f37d50 100644 --- a/utils/rpms/daos.spec +++ b/utils/rpms/daos.spec @@ -15,7 +15,7 @@ Name: daos Version: 2.5.101 -Release: 3%{?relval}%{?dist} +Release: 4%{?relval}%{?dist} Summary: DAOS Storage Engine License: BSD-2-Clause-Patent @@ -431,9 +431,12 @@ getent passwd daos_agent >/dev/null || useradd -s /sbin/nologin -r -g daos_agent %attr(2755,root,daos_server) %{_bindir}/daos_server %{_bindir}/daos_engine %{_bindir}/daos_metrics +%{_bindir}/ddb %{_sysconfdir}/ld.so.conf.d/daos.conf %dir %{_libdir}/daos_srv +%{_libdir}/daos_srv/libchk.so %{_libdir}/daos_srv/libcont.so +%{_libdir}/daos_srv/libddb.so %{_libdir}/daos_srv/libdtx.so %{_libdir}/daos_srv/libmgmt.so %{_libdir}/daos_srv/libobj.so @@ -549,6 +552,7 @@ getent passwd daos_agent >/dev/null || useradd -s /sbin/nologin -r -g daos_agent %{_bindir}/vea_ut %{_bindir}/vos_tests %{_bindir}/vea_stress +%{_bindir}/ddb_tests %{_bindir}/obj_ctl %{_bindir}/vos_perf @@ -583,6 +587,9 @@ getent passwd daos_agent >/dev/null || useradd -s /sbin/nologin -r -g daos_agent # No files in a shim package %changelog +* Fri Apr 05 2024 Fan Yong 2.5.101-4 +- Catastrophic Recovery + * Thu Apr 04 2024 Ashley M. Pittman 2.5.101-3 - Update pydaos install process - Add a dependency from daos-client-tests to daos-devel diff --git a/utils/test_memcheck.supp b/utils/test_memcheck.supp index e4671fef8ae..1bfbf8ffcf5 100644 --- a/utils/test_memcheck.supp +++ b/utils/test_memcheck.supp @@ -402,3 +402,9 @@ write(buf) fun:runtime/internal/syscall.Syscall6 } +{ + go ShadowSet race + Memcheck:Value8 + fun:_ZN6__tsan9ShadowSetEPNS_9RawShadowES1_S0_ + fun:racecall +}