Skip to content

Commit b3810da

Browse files
MB-50712: Use the greedy vbmap algorithm by default
This patch changes the vbmap generation logic to use the greedy approach by default in all cases. However, it leaves the old logic in place too - it is possible to swtich back to use the old logic with the following diag/eval: ns_config:set(use_vbmap_greedy_optimization,false) Change-Id: I7e37c22984c7470a4b0004667fd323cb18bc1cc1 Reviewed-on: https://review.couchbase.org/c/ns_server/+/175370 Well-Formed: Restriction Checker Well-Formed: Build Bot <[email protected]> Tested-by: Hareen Kancharla <[email protected]> Reviewed-by: Dave Finlay <[email protected]>
1 parent 807bd6b commit b3810da

File tree

2 files changed

+117
-35
lines changed

2 files changed

+117
-35
lines changed

src/mb_map.erl

Lines changed: 114 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -66,10 +66,10 @@ promote_replicas_for_graceful_failover_for_chain(Chain, RemoveNodes) ->
6666
ChangedChain ++ RemoveNodesChain ++ Undefineds.
6767

6868
vbucket_movements_rec(AccMasters, AccReplicas, [], []) ->
69-
{AccMasters, AccReplicas};
69+
{AccReplicas, AccMasters};
7070
vbucket_movements_rec(AccMasters, AccReplicas,
7171
[[MasterSrc|_] = SrcChain | RestSrcChains],
72-
[[MasterDst|RestDst] | RestDstChains]) ->
72+
[[MasterDst|_] = DstChain | RestDstChains]) ->
7373
true = (MasterDst =/= undefined),
7474
AccMasters2 = case MasterSrc =:= MasterDst of
7575
true ->
@@ -84,7 +84,7 @@ vbucket_movements_rec(AccMasters, AccReplicas,
8484
true -> Acc;
8585
false -> Acc+1
8686
end
87-
end, AccReplicas, RestDst),
87+
end, AccReplicas, DstChain),
8888
vbucket_movements_rec(AccMasters2, AccReplicas2, RestSrcChains, RestDstChains).
8989

9090
%% returns 'score' for difference between Src and Dst map. It's a
@@ -297,8 +297,9 @@ matching_renamings_same_vbuckets_count(KeepNodesSet, CurrentTags,
297297
generate_map(Map, NumReplicas, Nodes, Options) ->
298298
Tags = proplists:get_value(tags, Options),
299299
UseOldCode = (Tags =:= undefined) andalso (NumReplicas =< 1),
300+
UseGreedy = proplists:get_bool(use_vbmap_greedy_optimization, Options),
300301

301-
case UseOldCode of
302+
case UseOldCode andalso not UseGreedy of
302303
true ->
303304
generate_map_old(Map, NumReplicas, Nodes, Options);
304305
false ->
@@ -317,17 +318,16 @@ generate_map_new(Map, NumReplicas, Nodes, Options) ->
317318
NumVBuckets = length(Map),
318319
NumSlaves = proplists:get_value(max_slaves, Options, 10),
319320
Tags = proplists:get_value(tags, Options),
321+
UseGreedy = proplists:get_bool(use_vbmap_greedy_optimization, Options),
320322

321323
MapsFromPast0 = find_matching_past_maps(Nodes, Map, Options, MapsHistory),
322324
MapsFromPast = score_maps(Map, MapsFromPast0),
323325
?log_debug("Scores for past maps:~n~p", [[S || {_, S} <- MapsFromPast]]),
324326

325327
GeneratedMaps0 =
326328
lists:append(
327-
%% vbmap itself randomizes some things internally so let's give it a
328-
%% chance
329329
[[invoke_vbmap(Map, ShuffledNodes, NumVBuckets,
330-
NumSlaves, NumReplicas, Tags) ||
330+
NumSlaves, NumReplicas, Tags, UseGreedy) ||
331331
_ <- lists:seq(1, 3)] ||
332332
ShuffledNodes <- [misc:shuffle(KeepNodes) || _ <- lists:seq(1, 3)]]),
333333

@@ -693,7 +693,7 @@ slaves([], _, _, Set) ->
693693
testnodes(NumNodes) ->
694694
[list_to_atom([$n | tl(integer_to_list(1000+N))]) || N <- lists:seq(1, NumNodes)].
695695

696-
invoke_vbmap(CurrentMap, Nodes, NumVBuckets, NumSlaves, NumReplicas, Tags) ->
696+
invoke_vbmap(CurrentMap, Nodes, NumVBuckets, NumSlaves, NumReplicas, Tags, UseGreedy) ->
697697
VbmapName =
698698
case misc:is_windows() of
699699
true ->
@@ -707,22 +707,26 @@ invoke_vbmap(CurrentMap, Nodes, NumVBuckets, NumSlaves, NumReplicas, Tags) ->
707707

708708
try
709709
{ok, Map} = do_invoke_vbmap(VbmapPath, DiagPath, CurrentMap, Nodes,
710-
NumVBuckets, NumSlaves, NumReplicas, Tags),
710+
NumVBuckets, NumSlaves, NumReplicas, Tags,
711+
UseGreedy),
711712
Map
712713
after
713714
file:delete(DiagPath)
714715
end.
715716

716717
do_invoke_vbmap(VbmapPath, DiagPath,
717-
CurrentMap, Nodes, NumVBuckets, NumSlaves, NumReplicas, Tags) ->
718+
CurrentMap, Nodes, NumVBuckets, NumSlaves, NumReplicas, Tags,
719+
UseGreedy) ->
718720
misc:executing_on_new_process(
719721
fun () ->
720722
do_invoke_vbmap_body(VbmapPath, DiagPath, CurrentMap, Nodes,
721-
NumVBuckets, NumSlaves, NumReplicas, Tags)
723+
NumVBuckets, NumSlaves, NumReplicas, Tags,
724+
UseGreedy)
722725
end).
723726

724727
do_invoke_vbmap_body(VbmapPath, DiagPath, CurrentMap, Nodes,
725-
NumVBuckets, NumSlaves, NumReplicas, Tags) ->
728+
NumVBuckets, NumSlaves, NumReplicas, Tags,
729+
UseGreedy) ->
726730
NumNodes = length(Nodes),
727731

728732
Args0 = ["--diag", DiagPath,
@@ -731,8 +735,41 @@ do_invoke_vbmap_body(VbmapPath, DiagPath, CurrentMap, Nodes,
731735
"--num-nodes", integer_to_list(NumNodes),
732736
"--num-slaves", integer_to_list(NumSlaves),
733737
"--num-replicas", integer_to_list(NumReplicas),
734-
"--relax-all"],
735-
Args = vbmap_tags_args(Nodes, Tags) ++ Args0,
738+
"--relax-all"] ++
739+
case UseGreedy of
740+
true ->
741+
["--greedy"];
742+
_ ->
743+
[]
744+
end,
745+
746+
MaxNodeId = length(Nodes) - 1,
747+
NodeIdList = lists:zip(Nodes, lists:seq(0, MaxNodeId)),
748+
?log_debug("Node Id Map: ~p", [NodeIdList]),
749+
750+
NodeIdMap = dict:from_list(NodeIdList),
751+
752+
IdVbMap = make_vbmap_with_node_ids(MaxNodeId, NodeIdMap, CurrentMap),
753+
754+
PrevMapFile = path_config:tempfile("prev-vbmap", ".json"),
755+
756+
ChainsWritten =
757+
case write_vbmap_to_file(IdVbMap, PrevMapFile) of
758+
ok ->
759+
?log_debug("Wrote vbmap to ~p", [PrevMapFile]),
760+
ok;
761+
Err ->
762+
?log_debug("Couldn't write to file: ~p, reason: ~p", [PrevMapFile, Err]),
763+
not_ok
764+
end,
765+
766+
Args = vbmap_tags_args(NodeIdMap, Tags) ++ Args0 ++
767+
(case ChainsWritten of
768+
ok ->
769+
["--current-map", PrevMapFile];
770+
_ ->
771+
[]
772+
end),
736773

737774
Port = erlang:open_port({spawn_executable, VbmapPath},
738775
[stderr_to_stdout, binary,
@@ -750,28 +787,45 @@ do_invoke_vbmap_body(VbmapPath, DiagPath, CurrentMap, Nodes,
750787

751788
case PortResult of
752789
{ok, Output} ->
753-
NodesMapping = dict:from_list(misc:enumerate(Nodes, 0)),
790+
IdNodeMap = dict:from_list(misc:enumerate(Nodes, 0)),
754791

755792
try
756793
Chains0 = ejson:decode(Output),
757794
Chains = lists:map(
758795
fun (Chain) ->
759-
[dict:fetch(N, NodesMapping) || N <- Chain]
796+
[dict:fetch(N, IdNodeMap) || N <- Chain]
760797
end, Chains0),
761798

762799
EffectiveNumCopies = length(hd(Chains)),
800+
S1 = vbucket_movements(CurrentMap, Chains),
801+
?log_debug("Score before simple minimization: ~p", [S1]),
763802

764803
Map0 = simple_minimize_moves(CurrentMap, Chains,
765804
EffectiveNumCopies, Nodes),
766805

806+
S2 = vbucket_movements(CurrentMap, Map0),
807+
?log_debug("Score after simple minimization: ~p", [S2]),
808+
809+
MapToUse =
810+
case S1 < S2 of
811+
true ->
812+
?log_debug("Map from vbmap better before simple
813+
minimization; using it"),
814+
Chains;
815+
_ ->
816+
?log_debug("Map better after simple minimization;
817+
using it"),
818+
Map0
819+
end,
820+
767821
Map =
768822
case EffectiveNumCopies < NumReplicas + 1 of
769823
true ->
770824
N = NumReplicas + 1 - EffectiveNumCopies,
771825
Extension = lists:duplicate(N, undefined),
772-
[Chain ++ Extension || Chain <- Map0];
826+
[Chain ++ Extension || Chain <- MapToUse];
773827
false ->
774-
Map0
828+
MapToUse
775829
end,
776830

777831
{ok, Map}
@@ -788,14 +842,31 @@ do_invoke_vbmap_body(VbmapPath, DiagPath, CurrentMap, Nodes,
788842
exit({vbmap_error, iolist_to_binary(Output)})
789843
end.
790844

791-
map_tags(Nodes, RawTags) ->
792-
{_, NodeIxMap} =
845+
map_chain(Chain, MaxNodeId, NodeIdMap) ->
846+
{ReversedChain, MaxNodeIdx1, NodeIdxMap1} =
793847
lists:foldl(
794-
fun (Node, {Ix, Acc}) ->
795-
Acc1 = dict:store(Node, Ix, Acc),
796-
{Ix + 1, Acc1}
797-
end, {0, dict:new()}, Nodes),
848+
fun(N, {ChainPart, MaxNId, NIdMap}) ->
849+
case dict:find(N, NIdMap) of
850+
{ok, Idx} ->
851+
{[Idx | ChainPart], MaxNId, NIdMap};
852+
error ->
853+
{[MaxNodeId + 1 | ChainPart], MaxNodeId + 1,
854+
dict:store(N, MaxNId + 1, NIdMap)}
855+
end
856+
end, {[], MaxNodeId, NodeIdMap}, Chain),
857+
{lists:reverse(ReversedChain), MaxNodeIdx1, NodeIdxMap1}.
798858

859+
make_vbmap_with_node_ids(MaxNodeId, NodeIdMap, CurrentMap) ->
860+
NodeIdMapWithUndefined = dict:store(undefined, -1, NodeIdMap),
861+
{ReversedChains, _, _} =
862+
lists:foldl(
863+
fun (Chain, {NewChains, MaxNId, NIdMap}) ->
864+
{Chain1, MaxNId1, NIdMap1} = map_chain(Chain, MaxNId, NIdMap),
865+
{[Chain1 | NewChains], MaxNId1, NIdMap1}
866+
end, {[], MaxNodeId, NodeIdMapWithUndefined}, CurrentMap),
867+
lists:reverse(ReversedChains).
868+
869+
map_tags(NodeIxMap, RawTags) ->
799870
{_, TagIxMap} =
800871
lists:foldl(
801872
fun (Tag, {Ix, Acc}) ->
@@ -810,16 +881,25 @@ map_tags(Nodes, RawTags) ->
810881

811882
[{dict:fetch(N, NodeIxMap), dict:fetch(T, TagIxMap)} || {N, T} <- RawTags].
812883

813-
vbmap_tags_args(Nodes, RawTags) ->
814-
case RawTags of
815-
undefined ->
816-
[];
817-
_ ->
818-
Tags = map_tags(Nodes, RawTags),
819-
TagsStrings = [?i2l(N) ++ ":" ++ ?i2l(T) || {N, T} <- Tags],
820-
TagsString = string:join(TagsStrings, ","),
821-
["--tags", TagsString]
822-
end.
884+
vbmap_tags_args(NodeIdMap, RawTags) ->
885+
case RawTags of
886+
undefined ->
887+
[];
888+
_ ->
889+
Tags = map_tags(NodeIdMap, RawTags),
890+
TagsStrings = [?i2l(N) ++ ":" ++ ?i2l(T) || {N, T} <- Tags],
891+
TagsString = string:join(TagsStrings, ","),
892+
["--tags", TagsString]
893+
end.
894+
895+
write_vbmap_to_file(VbMap, Filename) ->
896+
try
897+
BinChains = ejson:encode(VbMap),
898+
file:write_file(Filename, BinChains)
899+
catch T1:E1:S1 ->
900+
{error, {T1, E1, S1}}
901+
end.
902+
823903

824904
collect_vbmap_output(Port) ->
825905
do_collect_vbmap_output(Port, []).

src/ns_rebalancer.erl

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ generate_vbucket_map_options(KeepNodes, BucketConfig) ->
8282
end,
8383

8484
Opts0 = ns_bucket:config_to_map_options(BucketConfig),
85+
UseGreedy = ns_config:read_key_fast(use_vbmap_greedy_optimization, true),
8586

8687
%% Note that we don't need to have replication_topology here (in fact as
8788
%% of today it's still returned by ns_bucket:config_to_map_options/1), but
@@ -90,7 +91,8 @@ generate_vbucket_map_options(KeepNodes, BucketConfig) ->
9091
%% wrongly believe that rebalance is needed even when the cluster is
9192
%% balanced. See MB-15543 for details.
9293
misc:update_proplist(Opts0, [{replication_topology, star},
93-
{tags, Tags}]).
94+
{tags, Tags},
95+
{use_vbmap_greedy_optimization, UseGreedy}]).
9496

9597
generate_vbucket_map(CurrentMap, KeepNodes, BucketConfig) ->
9698
Opts = generate_vbucket_map_options(KeepNodes, BucketConfig),

0 commit comments

Comments
 (0)