2424 * THE SOFTWARE.
2525 */
2626
27- #include < boost/lexical_cast.hpp>
2827#include < algorithm>
28+ #include < boost/lexical_cast.hpp>
2929#include < cstdint>
3030#include < ostream>
3131
3939#include " utils/flags.h"
4040#include " utils/fmt_logging.h"
4141
42- // There is an option FLAGS_max_replicas_in_group which restricts the max replica count of the whole
43- // cluster. It's a cluster-level option. However, now that it's allowed to update the replication
44- // factor of each table, this cluster-level option should be replaced.
42+ // There is an option FLAGS_max_replicas_in_group which restricts the max
43+ // replica count of the whole cluster. It's a cluster-level option. However, now
44+ // that it's allowed to update the replication factor of each table, this
45+ // cluster-level option should be replaced.
4546//
46- // Conceptually FLAGS_max_replicas_in_group is the total number of alive and dropped replicas. Its
47- // default value is 4. For a table that has replication factor 3, that FLAGS_max_replicas_in_group
48- // is set to 4 means 3 alive replicas plus a dropped replica.
47+ // Conceptually FLAGS_max_replicas_in_group is the total number of alive and
48+ // dropped replicas. Its default value is 4. For a table that has replication
49+ // factor 3, that FLAGS_max_replicas_in_group is set to 4 means 3 alive replicas
50+ // plus a dropped replica.
4951//
50- // FLAGS_max_replicas_in_group can also be loaded from configuration file, which means its default
51- // value will be overridden. The value of FLAGS_max_replicas_in_group will be assigned to another
52- // static variable `MAX_REPLICA_COUNT_IN_GRROUP`, whose default value is also 4.
52+ // FLAGS_max_replicas_in_group can also be loaded from configuration file, which
53+ // means its default value will be overridden. The value of
54+ // FLAGS_max_replicas_in_group will be assigned to another static variable
55+ // `MAX_REPLICA_COUNT_IN_GRROUP`, whose default value is also 4.
5356//
54- // For unit tests, `MAX_REPLICA_COUNT_IN_GRROUP` is set to the default value 4; for production
55- // environments, `MAX_REPLICA_COUNT_IN_GRROUP` is set to 3 since FLAGS_max_replicas_in_group is
56- // configured as 3 in `.ini` file.
57+ // For unit tests, `MAX_REPLICA_COUNT_IN_GRROUP` is set to the default value 4;
58+ // for production environments, `MAX_REPLICA_COUNT_IN_GRROUP` is set to 3 since
59+ // FLAGS_max_replicas_in_group is configured as 3 in `.ini` file.
5760//
58- // Since the cluster-level option FLAGS_max_replicas_in_group contains the alive and dropped
59- // replicas, we can use the replication factor of each table as the number of alive replicas, and
60- // introduce another option FLAGS_max_reserved_dropped_replicas representing the max reserved number
61+ // Since the cluster-level option FLAGS_max_replicas_in_group contains the alive
62+ // and dropped replicas, we can use the replication factor of each table as the
63+ // number of alive replicas, and introduce another option
64+ // FLAGS_max_reserved_dropped_replicas representing the max reserved number
6165// allowed for dropped replicas.
6266//
63- // If FLAGS_max_reserved_dropped_replicas is set to 1, there is at most one dropped replicas
64- // reserved, which means, once the number of alive replicas reaches max_replica_count, at most one
65- // dropped replica can be reserved and others will be eliminated; If
66- // FLAGS_max_reserved_dropped_replicas is set to 0, however, none of dropped replicas can be
67- // reserved.
67+ // If FLAGS_max_reserved_dropped_replicas is set to 1, there is at most one
68+ // dropped replicas reserved, which means, once the number of alive replicas
69+ // reaches max_replica_count, at most one dropped replica can be reserved and
70+ // others will be eliminated; If FLAGS_max_reserved_dropped_replicas is set to
71+ // 0, however, none of dropped replicas can be reserved.
6872//
6973// To be consistent with FLAGS_max_replicas_in_group, default value of
70- // FLAGS_max_reserved_dropped_replicas is set to 1 so that the unit tests can be passed. For
71- // production environments, it should be set to 0.
74+ // FLAGS_max_reserved_dropped_replicas is set to 1 so that the unit tests can be
75+ // passed. For production environments, it should be set to 0.
7276DSN_DEFINE_uint32 (meta_server,
7377 max_reserved_dropped_replicas,
7478 1 ,
@@ -121,7 +125,8 @@ bool construct_replica(meta_view view, const gpid &pid, int max_replica_count)
121125 pc.partition_flags = 0 ;
122126 pc.max_replica_count = max_replica_count;
123127
124- LOG_INFO (" construct for ({}), select {} as primary, ballot({}), committed_decree({}), "
128+ LOG_INFO (" construct for ({}), select {} as primary, ballot({}), "
129+ " committed_decree({}), "
125130 " prepare_decree({})" ,
126131 pid,
127132 server.node ,
@@ -131,12 +136,15 @@ bool construct_replica(meta_view view, const gpid &pid, int max_replica_count)
131136
132137 drop_list.pop_back ();
133138
134- // we put max_replica_count-1 recent replicas to last_drops, in case of the DDD-state when the
135- // only primary dead
136- // when add node to pc.last_drops, we don't remove it from our cc.drop_list
137- CHECK (pc.hp_last_drops .empty (), " last_drops of partition({}) must be empty" , pid);
139+ // we put max_replica_count-1 recent replicas to last_drops, in case of the
140+ // DDD-state when the only primary dead when add node to pc.last_drops, we
141+ // don't remove it from our cc.drop_list
142+ std::vector<host_port> last_drops;
143+ GET_HOST_PORTS (pc, last_drops, last_drops);
144+ CHECK (last_drops.empty (), " last_drops of partition({}) must be empty" , pid);
138145 for (auto iter = drop_list.rbegin (); iter != drop_list.rend (); ++iter) {
139- if (pc.hp_last_drops .size () + 1 >= max_replica_count) {
146+ // hp_last_drops is added in the steps bellow.
147+ if (last_drops.size () + 1 >= max_replica_count) {
140148 break ;
141149 }
142150 // similar to cc.drop_list, pc.last_drop is also a stack structure
@@ -206,10 +214,11 @@ void proposal_actions::track_current_learner(const dsn::host_port &node, const r
206214
207215 if (info.status == partition_status::PS_ERROR ||
208216 info.status == partition_status::PS_INACTIVE) {
209- // if we've collected inforamtions for the learner, then it claims it's down
210- // we will treat the learning process failed
217+ // if we've collected inforamtions for the learner, then it claims
218+ // it's down we will treat the learning process failed
211219 if (current_learner.ballot != invalid_ballot) {
212- LOG_INFO (" {}: a learner's is down to status({}), perhaps learn failed" ,
220+ LOG_INFO (" {}: a learner's is down to status({}), perhaps learn "
221+ " failed" ,
213222 info.pid ,
214223 dsn::enum_to_string (info.status ));
215224 learning_progress_abnormal_detected = true ;
@@ -223,15 +232,17 @@ void proposal_actions::track_current_learner(const dsn::host_port &node, const r
223232 current_learner.last_prepared_decree > info.last_prepared_decree ) {
224233
225234 // TODO: need to add a metric here.
226- LOG_WARNING (" {}: learner({})'s progress step back, please trace this carefully" ,
235+ LOG_WARNING (" {}: learner({})'s progress step back, please "
236+ " trace this carefully" ,
227237 info.pid ,
228238 node);
229239 }
230240
231- // NOTICE: the flag may be abormal currently. it's balancer's duty to make use of the
232- // abnormal flag and decide whether to cancel the proposal.
233- // if the balancer try to give the proposal another chance, or another learning round
234- // starts before the balancer notice it, let's just treat it normal again.
241+ // NOTICE: the flag may be abormal currently. it's balancer's duty
242+ // to make use of the abnormal flag and decide whether to cancel the
243+ // proposal. if the balancer try to give the proposal another
244+ // chance, or another learning round starts before the balancer
245+ // notice it, let's just treat it normal again.
235246 learning_progress_abnormal_detected = false ;
236247 current_learner = info;
237248 }
@@ -304,8 +315,8 @@ void config_context::cancel_sync()
304315
305316void config_context::check_size ()
306317{
307- // when add learner, it is possible that replica_count > max_replica_count, so we
308- // need to remove things from dropped only when it's not empty.
318+ // when add learner, it is possible that replica_count > max_replica_count,
319+ // so we need to remove things from dropped only when it's not empty.
309320 while (replica_count (*pc) + dropped.size () >
310321 pc->max_replica_count + FLAGS_max_reserved_dropped_replicas &&
311322 !dropped.empty ()) {
@@ -378,7 +389,8 @@ int config_context::collect_drop_replica(const host_port &node, const replica_in
378389 iter = find_from_dropped (node);
379390 if (iter == dropped.end ()) {
380391 CHECK (!in_dropped,
381- " adjust position of existing node({}) failed, this is a bug, partition({})" ,
392+ " adjust position of existing node({}) failed, this is a bug, "
393+ " partition({})" ,
382394 node,
383395 pc->pid );
384396 return -1 ;
@@ -392,7 +404,8 @@ bool config_context::check_order()
392404 return true ;
393405 for (unsigned int i = 0 ; i < dropped.size () - 1 ; ++i) {
394406 if (dropped_cmp (dropped[i], dropped[i + 1 ]) > 0 ) {
395- LOG_ERROR (" check dropped order for gpid({}) failed, [{},{},{},{},{}@{}] vs "
407+ LOG_ERROR (" check dropped order for gpid({}) failed, "
408+ " [{},{},{},{},{}@{}] vs "
396409 " [{},{},{},{},{}@{}]" ,
397410 pc->pid ,
398411 dropped[i].node ,
@@ -457,7 +470,8 @@ void config_context::adjust_proposal(const host_port &node, const replica_info &
457470 lb_actions.track_current_learner (node, info);
458471}
459472
460- bool config_context::get_disk_tag (const host_port &node, /* out*/ std::string &disk_tag) const
473+ bool config_context::get_disk_tag (const host_port &node,
474+ /* out*/ std::string &disk_tag) const
461475{
462476 auto iter = find_from_serving (node);
463477 if (iter == serving.end ()) {
@@ -495,12 +509,14 @@ void app_state_helper::reset_manual_compact_status()
495509 }
496510}
497511
498- bool app_state_helper::get_manual_compact_progress (/* out*/ int32_t &progress) const
512+ bool app_state_helper::get_manual_compact_progress (
513+ /* out*/ int32_t &progress) const
499514{
500515 int32_t total_replica_count = owner->partition_count * owner->max_replica_count ;
501516 CHECK_GT_MSG (total_replica_count,
502517 0 ,
503- " invalid app metadata, app({}), partition_count({}), max_replica_count({})" ,
518+ " invalid app metadata, app({}), partition_count({}), "
519+ " max_replica_count({})" ,
504520 owner->app_name ,
505521 owner->partition_count ,
506522 owner->max_replica_count );
@@ -538,7 +554,8 @@ app_state::app_state(const app_info &info) : app_info(info), helpers(new app_sta
538554 CLEAR_IP_AND_HOST_PORT (pc, secondaries);
539555 CLEAR_IP_AND_HOST_PORT (pc, last_drops);
540556
541- // TODO(yujingwei): use marco simplify the code, and the logical may should change
557+ // TODO(yujingwei): use marco simplify the code, and the logical may should
558+ // change
542559 pc.__set_hp_secondaries ({});
543560 pc.__set_hp_last_drops ({});
544561
0 commit comments