Skip to content

Commit 3ffb51f

Browse files
committed
[BP] MB-51458 do not truncate long dcp connection names if the version
...of the producer node is less than 6.6.5 or one of the following: 7.0.0, 7.0.1 Change-Id: I3f1c1ca63993c6e1f408a3ed9b76aa0d14efab85 Reviewed-on: https://review.couchbase.org/c/ns_server/+/172546 Well-Formed: Restriction Checker Well-Formed: Build Bot <[email protected]> Reviewed-by: Timofey Barmin <[email protected]> Tested-by: Artem Stemkovski <[email protected]> Reviewed-on: https://review.couchbase.org/c/ns_server/+/172741
1 parent febf1a2 commit 3ffb51f

File tree

4 files changed

+169
-83
lines changed

4 files changed

+169
-83
lines changed

src/dcp_replicator.erl

Lines changed: 137 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -221,27 +221,49 @@ get_docs_estimate(Bucket, Partition, ConsumerNode) ->
221221
ns_memcached:get_dcp_docs_estimate(Bucket, Partition, Connection).
222222

223223
get_connection_name(ConsumerNode, ProducerNode, Bucket) ->
224-
ConsumerNodeList = atom_to_list(ConsumerNode),
225-
ProducerNodeList = atom_to_list(ProducerNode),
226-
CName = "replication:" ++ ProducerNodeList ++ "->" ++ ConsumerNodeList ++
227-
":" ++ Bucket,
224+
CName = "replication:" ++ atom_to_list(ProducerNode) ++ "->" ++
225+
atom_to_list(ConsumerNode) ++ ":" ++ Bucket,
228226

229227
case length(CName) =< ?MAX_DCP_CONNECTION_NAME of
230228
true ->
231229
CName;
232230
false ->
233-
%% Trim off the common prefix to shorten the names.
234-
{CNode, PNode} = trim_common_prefix(ConsumerNode, ProducerNode),
231+
case should_truncate_name(ConsumerNode, ProducerNode) of
232+
true ->
233+
get_truncated_connection_name(
234+
CName, ConsumerNode, ProducerNode, Bucket);
235+
false ->
236+
CName
237+
end
238+
end.
239+
240+
get_truncated_connection_name(LongName, ConsumerNode, ProducerNode, Bucket) ->
241+
%% Trim off the common prefix to shorten the names.
242+
{CNode, PNode} = trim_common_prefix(ConsumerNode, ProducerNode),
235243

236-
Hash = binary_to_list(base64:encode(crypto:hash(sha, CName))),
237-
Bkt = string:slice(Bucket, 0, 60),
244+
Hash = binary_to_list(base64:encode(crypto:hash(sha, LongName))),
245+
Bkt = string:slice(Bucket, 0, 60),
238246

239-
CName2 = "replication:" ++ PNode ++ "->" ++ CNode ++ ":" ++ Bkt ++
240-
":" ++ Hash,
241-
true = length(CName2) =< ?MAX_DCP_CONNECTION_NAME,
242-
CName2
247+
CName = "replication:" ++ PNode ++ "->" ++ CNode ++ ":" ++ Bkt ++
248+
":" ++ Hash,
249+
true = length(CName) =< ?MAX_DCP_CONNECTION_NAME,
250+
CName.
251+
252+
node_supports_truncated_names(Node) ->
253+
case Node == misc:this_node() of
254+
true ->
255+
true;
256+
false ->
257+
Quirks = rebalance_quirks:get_quirks([Node], long_names),
258+
not rebalance_quirks:is_enabled(
259+
dont_truncate_long_names,
260+
rebalance_quirks:get_node_quirks(Node, Quirks))
243261
end.
244262

263+
should_truncate_name(ConsumerNode, ProducerNode) ->
264+
lists:all(fun node_supports_truncated_names/1,
265+
[ConsumerNode, ProducerNode]).
266+
245267
trim_common_prefix(Consumer, Producer) ->
246268
%% Find the longest common prefix for the two nodes and chop
247269
%% it off (but not below a minimal length).
@@ -324,59 +346,108 @@ maybe_shut_consumer(Reason, Consumer) ->
324346
end.
325347

326348
-ifdef(TEST).
327-
get_connection_name_test() ->
328-
329-
%% Connection name fits into the maximum allowed
330-
331-
NodeA = 'nodeA.eng.couchbase.com',
332-
NodeB = 'nodeB.eng.couchbase.com',
333-
BucketAB = "bucket1",
334-
ConnAB = get_connection_name(NodeA, NodeB, BucketAB),
335-
?assertEqual("replication:nodeB.eng.couchbase.com->"
336-
"nodeA.eng.couchbase.com:bucket1", ConnAB),
337-
?assertEqual(true, length(ConnAB) =< ?MAX_DCP_CONNECTION_NAME),
338-
339-
%% Test where the connection name, using the previous method, won't
340-
%% fit into the maximum allowed.
341-
342-
Node1 = "[email protected]."
343-
"couchbase-new-pxxxxxxx.svc",
344-
Node2 = "[email protected]."
345-
"couchbase-new-pxxxxxxx.svc",
346-
Bucket12 = "com.yyyyyy.digital.ms.shoppingcart.shoppingcart.1234567890"
347-
"12345678901234567890",
348-
Conn12 = get_connection_name(list_to_atom(Node1), list_to_atom(Node2),
349-
Bucket12),
350-
?assertEqual("replication:1.platform-couchbase-cluster.couchb->"
351-
"0.platform-couchbase-cluster.couchb:com.yyyyyy.digital.ms."
352-
"shoppingcart.shoppingcart.123456789012:"
353-
"TYFMH5ZD2gPLOaLgcuA2VijsZvc=", Conn12),
354-
355-
%% Test that the node names aren't shortened too much (note the only
356-
%% difference is the last character).
357-
358-
Node3 = "ManyManyManyManyCommonCharacters_ns_1@platform-couchbase-cluster"
359-
"-0000",
360-
Node4 = "ManyManyManyManyCommonCharacters_ns_1@platform-couchbase-cluster"
361-
"-0001",
349+
get_connection_name_test_() ->
362350
LongBucket = "travel-sample-with-a-very-very-very-very-long-bucket-name",
363-
Conn34 = get_connection_name(list_to_atom(Node3), list_to_atom(Node4),
364-
LongBucket),
365-
?assertEqual("replication:s_1@platform-couchbase-cluster-0001->"
366-
"s_1@platform-couchbase-cluster-0000:travel-sample-with-a-"
367-
"very-very-very-very-long-bucket-name:"
368-
"D/D56MpAKsDt/0yqg6IXKBEaIcY=", Conn34),
369-
370-
%% Test with unique node names but one is much longer than the other.
371-
372-
Node5 = "AShortNodeName",
373-
Node6 = "ManyManyManyManyCommonCharacters_ns_1@platform-couchbase-cluster"
374-
"-AndEvenMoreCharactersToMakeThisNodeNameLongEnoughToRequireIt"
375-
"ToBeShortened",
376-
Conn56 = get_connection_name(list_to_atom(Node5), list_to_atom(Node6),
377-
LongBucket),
378-
?assertEqual("replication:ManyManyManyManyCommonCharacters_ns->"
379-
"AShortNodeName:travel-sample-with-a-very-very-very-very-"
380-
"long-bucket-name:A3aPD1Sik+5ZIz43M6NNTGn9XFw=", Conn56).
381-
351+
ConsumerNode = list_to_atom(
352+
"ns_1@platform-couchbase-cluster-0000."
353+
"platform-couchbase-cluster.couchbase-new-pxxxxxxx.svc"),
354+
ProducerNode = list_to_atom(
355+
"ns_1@platform-couchbase-cluster-0001."
356+
"platform-couchbase-cluster.couchbase-new-pxxxxxxx.svc"),
357+
VeryLongBucket = "com.yyyyyy.digital.ms.shoppingcart."
358+
"shoppingcart.123456789012345678901234567890",
359+
TrimmedName =
360+
"replication:1.platform-couchbase-cluster.couchb->0.platform-couchbase"
361+
"-cluster.couchb:com.yyyyyy.digital.ms.shoppingcart.shoppingcart."
362+
"123456789012:TYFMH5ZD2gPLOaLgcuA2VijsZvc=",
363+
{foreach,
364+
fun () ->
365+
meck:new(rebalance_quirks, [passthrough]),
366+
meck:new(misc, [passthrough])
367+
end,
368+
fun (_) ->
369+
meck:unload(rebalance_quirks),
370+
meck:unload(misc)
371+
end,
372+
[{"Connection name fits into the maximum allowed",
373+
fun () ->
374+
Conn = get_connection_name(
375+
'nodeA.eng.couchbase.com', 'nodeB.eng.couchbase.com',
376+
"bucket1"),
377+
?assertEqual("replication:nodeB.eng.couchbase.com->"
378+
"nodeA.eng.couchbase.com:bucket1", Conn),
379+
?assertEqual(true, length(Conn) =< ?MAX_DCP_CONNECTION_NAME)
380+
end},
381+
{"Connection name won't fit into the maximum allowed.",
382+
fun () ->
383+
meck:expect(rebalance_quirks, get_quirks,
384+
fun (_, long_names) -> [{ProducerNode, []}] end),
385+
meck:expect(misc, this_node, fun () -> ConsumerNode end),
386+
?assertEqual(
387+
TrimmedName,
388+
get_connection_name(ConsumerNode, ProducerNode,
389+
VeryLongBucket))
390+
end},
391+
{"Connection name won't fit into the maximum allowed. Pre 7.1 "
392+
"Against the node that supports trimming",
393+
fun () ->
394+
meck:expect(rebalance_quirks, get_quirks,
395+
fun (_, long_names) -> [{ProducerNode, []}] end),
396+
meck:expect(misc, this_node, fun () -> ConsumerNode end),
397+
?assertEqual(
398+
TrimmedName,
399+
get_connection_name(ConsumerNode, ProducerNode,
400+
VeryLongBucket))
401+
end},
402+
{"Connection name won't fit into the maximum allowed, but name trimming "
403+
"is not supported",
404+
fun () ->
405+
meck:expect(rebalance_quirks, get_quirks,
406+
fun (_, long_names) ->
407+
[{ProducerNode, [dont_truncate_long_names]}]
408+
end),
409+
meck:expect(misc, this_node, fun () -> ConsumerNode end),
410+
Conn = get_connection_name(ConsumerNode, ProducerNode,
411+
VeryLongBucket),
412+
?assertEqual(
413+
"replication:[email protected]"
414+
"-couchbase-cluster.couchbase-new-pxxxxxxx.svc->ns_1@platform"
415+
"-couchbase-cluster-0000.platform-couchbase-cluster.couchbase"
416+
"-new-pxxxxxxx.svc:com.yyyyyy.digital.ms.shoppingcart."
417+
"shoppingcart.123456789012345678901234567890", Conn)
418+
end},
419+
{"Test that the node names aren't shortened too much (note the only "
420+
"difference is the last character).",
421+
fun () ->
422+
Node1 = "ManyManyManyManyCommonCharacters_ns_1@platform-"
423+
"couchbase-cluster-0000",
424+
Node2 = "ManyManyManyManyCommonCharacters_ns_1@platform-"
425+
"couchbase-cluster-0001",
426+
meck:expect(rebalance_quirks, get_quirks,
427+
fun (_, long_names) -> [{Node1, []}] end),
428+
meck:expect(misc, this_node, fun () -> Node2 end),
429+
Conn = get_connection_name(
430+
list_to_atom(Node1), list_to_atom(Node2), LongBucket),
431+
?assertEqual(
432+
"replication:s_1@platform-couchbase-cluster-0001->"
433+
"s_1@platform-couchbase-cluster-0000:travel-sample-with-a-"
434+
"very-very-very-very-long-bucket-name:"
435+
"D/D56MpAKsDt/0yqg6IXKBEaIcY=", Conn)
436+
end},
437+
{"Test with unique node names but one is much longer than the other.",
438+
fun () ->
439+
Node1 = "AShortNodeName",
440+
Node2 = "ManyManyManyManyCommonCharacters_ns_1@platform-"
441+
"couchbase-cluster-AndEvenMoreCharactersToMakeThisNodeName"
442+
"LongEnoughToRequireItToBeShortened",
443+
meck:expect(rebalance_quirks, get_quirks,
444+
fun (_, long_names) -> [{Node1, []}] end),
445+
meck:expect(misc, this_node, fun () -> Node2 end),
446+
Conn = get_connection_name(
447+
list_to_atom(Node1), list_to_atom(Node2), LongBucket),
448+
?assertEqual(
449+
"replication:ManyManyManyManyCommonCharacters_ns->"
450+
"AShortNodeName:travel-sample-with-a-very-very-very-very-"
451+
"long-bucket-name:A3aPD1Sik+5ZIz43M6NNTGn9XFw=", Conn)
452+
end}]}.
382453
-endif.

src/misc.erl

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -334,6 +334,10 @@ position(E, [_|List], N) -> position(E, List, N+1).
334334
msecs_to_usecs(MilliSec) ->
335335
MilliSec * 1000.
336336

337+
%% just because we need to mock node() sometimes
338+
this_node() ->
339+
node().
340+
337341
% Returns just the node name string that's before the '@' char.
338342
% For example, returns "test" instead of "[email protected]".
339343
%

src/ns_vbucket_mover.erl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ init({Bucket, Nodes, OldMap, NewMap, ProgressCallback}) ->
140140
ets:new(compaction_inhibitions, [named_table, private, set]),
141141
ets:new(workers, [named_table, private, set]),
142142

143-
Quirks = rebalance_quirks:get_quirks(Nodes),
143+
Quirks = rebalance_quirks:get_quirks(Nodes, project_intact),
144144
SchedulerState = vbucket_move_scheduler:prepare(
145145
OldMap, NewMap, Quirks,
146146
menelaus_web_settings:get_rebalance_moves_per_node(),

src/rebalance_quirks.erl

Lines changed: 27 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -12,18 +12,19 @@
1212
-define(WAIT_STATUSES_TIMEOUT,
1313
ns_config:get_timeout({?MODULE, wait_statuses}, 15000)).
1414

15-
-export([get_quirks/1, is_enabled/2, get_node_quirks/2,
15+
-export([get_quirks/2, is_enabled/2, get_node_quirks/2,
1616
default_config/0, upgrade_config_project_intact_patched/0]).
1717
-export_type([quirk/0]).
1818

1919
-type quirk() :: takeover_via_orchestrator |
2020
disable_old_master |
2121
reset_replicas |
22-
trivial_moves.
22+
trivial_moves |
23+
dont_truncate_long_names.
2324

2425
%% APIs
25-
get_quirks(Nodes) ->
26-
Config = ns_config:get(),
26+
get_quirks(Nodes, Type) ->
27+
Config = ns_config:latest(),
2728
OverrideQuirks =
2829
lists:filtermap(
2930
fun (Node) ->
@@ -36,7 +37,7 @@ get_quirks(Nodes) ->
3637
end, Nodes),
3738

3839
OtherNodes = Nodes -- proplists:get_keys(OverrideQuirks),
39-
ComputedQuirks = compute_quirks(OtherNodes, Config),
40+
ComputedQuirks = compute_quirks(OtherNodes, Config, Type),
4041

4142
OverrideQuirks ++
4243
lists:map(
@@ -72,23 +73,28 @@ get_disabled_quirks(Node, Config) ->
7273
ns_config:search_node_with_default(Node, Config,
7374
disable_rebalance_quirks, []).
7475

75-
compute_quirks(Nodes, Config) ->
76+
compute_quirks(Nodes, Config, project_intact) ->
7677
Unpatched = [N || N <- Nodes,
7778
not has_project_intact_patches(N, Config)],
7879

7980
case Unpatched of
8081
[] ->
8182
[{N, []} || N <- Nodes];
8283
_ ->
83-
lists:map(fun ({Node, Status}) ->
84-
case get_version(Status) of
85-
{ok, Version} ->
86-
{Node, quirks_for_version(Version)};
87-
no_version ->
88-
exit({no_version_for_node, Node})
89-
end
90-
end, get_statuses(Nodes))
91-
end.
84+
compute_quirks(Nodes)
85+
end;
86+
compute_quirks(Nodes, _Config, long_names) ->
87+
compute_quirks(Nodes).
88+
89+
compute_quirks(Nodes) ->
90+
lists:map(fun ({Node, Status}) ->
91+
case get_version(Status) of
92+
{ok, Version} ->
93+
{Node, quirks_for_version(Version)};
94+
no_version ->
95+
exit({no_version_for_node, Node})
96+
end
97+
end, get_statuses(Nodes)).
9298

9399
get_statuses(Nodes) ->
94100
case ns_doctor:wait_statuses(Nodes, ?WAIT_STATUSES_TIMEOUT) of
@@ -120,12 +126,17 @@ all_quirks() ->
120126
[takeover_via_orchestrator,
121127
disable_old_master,
122128
reset_replicas,
123-
trivial_moves].
129+
trivial_moves,
130+
dont_truncate_long_names].
124131

125132
quirks_for_version(Version) ->
126133
[Quirk || Quirk <- all_quirks(),
127134
is_quirk_required(Quirk, Version)].
128135

136+
is_quirk_required(dont_truncate_long_names, [7, 0, X]) when X < 2 ->
137+
true;
138+
is_quirk_required(dont_truncate_long_names, Version) ->
139+
Version < [6, 6, 5];
129140
is_quirk_required(takeover_via_orchestrator, Version) ->
130141
Version < [5, 1, 0];
131142
is_quirk_required(_, _) ->

0 commit comments

Comments
 (0)