Skip to content

Commit 4a32470

Browse files
authored
AWS peer discovery: ensure consistent hostname path ordering (#14557)
* AWS peer discovery: ensure consistent hostname path ordering AWS EC2 API returns networkInterfaceSet and privateIpAddressesSet in arbitrary order, causing non-deterministic hostname resolution during peer discovery. This leads to inconsistent cluster formation. Changes: - Sort network interfaces by deviceIndex (0 first for primary ENI) - Sort private IP addresses by primary flag (primary=true first) - Add debug logging to show hostname path selection and sorting results - Add comprehensive unit tests for sorting behavior The sorting ensures deviceIndex=0 and primary=true IPs are consistently selected first, making peer discovery deterministic across deployments. * AWS peer discovery: ensure consistent hostname path ordering (address feedback on debug logs and sorting helper functions)
1 parent 668dbe2 commit 4a32470

File tree

2 files changed

+152
-14
lines changed

2 files changed

+152
-14
lines changed

deps/rabbitmq_peer_discovery_aws/src/rabbit_peer_discovery_aws.erl

Lines changed: 35 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -351,10 +351,12 @@ get_hostname_by_tags(Tags) ->
351351
get_hostname_path() ->
352352
UsePrivateIP = get_config_key(aws_use_private_ip, ?CONFIG_MODULE:config_map(?BACKEND_CONFIG_KEY)),
353353
HostnamePath = get_config_key(aws_hostname_path, ?CONFIG_MODULE:config_map(?BACKEND_CONFIG_KEY)),
354-
case HostnamePath of
354+
FinalPath = case HostnamePath of
355355
["privateDnsName"] when UsePrivateIP -> ["privateIpAddress"];
356356
P -> P
357-
end.
357+
end,
358+
?LOG_DEBUG("AWS peer discovery using hostname path: ~tp", [FinalPath]),
359+
FinalPath.
358360

359361
-spec get_hostname(path(), props()) -> string().
360362
get_hostname(Path, Props) ->
@@ -371,7 +373,37 @@ get_value(Key, Props) when is_integer(Key) ->
371373
{"item", Props2} = lists:nth(Key, Props),
372374
Props2;
373375
get_value(Key, Props) ->
374-
proplists:get_value(Key, Props).
376+
Value = proplists:get_value(Key, Props),
377+
sort_ec2_hostname_path_set_members(Key, Value).
378+
379+
%% Sort AWS API responses for consistent ordering
380+
-spec sort_ec2_hostname_path_set_members(string(), any()) -> any().
381+
sort_ec2_hostname_path_set_members("networkInterfaceSet", NetworkInterfaces) when is_list(NetworkInterfaces) ->
382+
lists:sort(fun({"item", A}, {"item", B}) -> device_index(A) =< device_index(B) end, NetworkInterfaces);
383+
sort_ec2_hostname_path_set_members("privateIpAddressesSet", PrivateIpAddresses) when is_list(PrivateIpAddresses) ->
384+
lists:sort(fun({"item", A}, {"item", B}) -> is_primary(A) >= is_primary(B) end, PrivateIpAddresses);
385+
sort_ec2_hostname_path_set_members(_, Value) ->
386+
Value.
387+
388+
%% Extract deviceIndex from network interface attachment
389+
-spec device_index(props()) -> integer().
390+
device_index(Interface) ->
391+
Attachment = proplists:get_value("attachment", Interface),
392+
case proplists:get_value("deviceIndex", Attachment) of
393+
DeviceIndex when is_list(DeviceIndex) ->
394+
{Int, []} = string:to_integer(DeviceIndex),
395+
Int;
396+
DeviceIndex when is_integer(DeviceIndex) ->
397+
DeviceIndex
398+
end.
399+
400+
%% Extract primary flag from private IP address
401+
-spec is_primary(props()) -> boolean().
402+
is_primary(IpAddress) ->
403+
case proplists:get_value("primary", IpAddress) of
404+
"true" -> true;
405+
_ -> false
406+
end.
375407

376408
-spec get_tags() -> tags().
377409
get_tags() ->

deps/rabbitmq_peer_discovery_aws/test/unit_SUITE.erl

Lines changed: 117 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,9 @@ groups() ->
2323
{unit, [], [
2424
maybe_add_tag_filters,
2525
get_hostname_name_from_reservation_set,
26-
registration_support
26+
registration_support,
27+
network_interface_sorting,
28+
private_ip_address_sorting
2729
]},
2830
{lock, [], [
2931
lock_single_node,
@@ -75,12 +77,93 @@ get_hostname_name_from_reservation_set(_Config) ->
7577
?assertEqual(Expectation,
7678
rabbit_peer_discovery_aws:get_hostname_name_from_reservation_set(
7779
reservation_set(), []))
80+
end},
81+
{"from private IP DNS in network interface",
82+
fun() ->
83+
os:putenv("AWS_HOSTNAME_PATH", "networkInterfaceSet,2,privateIpAddressesSet,1,privateDnsName"),
84+
Expectation = ["ip-10-0-15-100.eu-west-1.compute.internal",
85+
"ip-10-0-16-31.eu-west-1.compute.internal"],
86+
?assertEqual(Expectation,
87+
rabbit_peer_discovery_aws:get_hostname_name_from_reservation_set(
88+
reservation_set(), []))
7889
end}]
7990
}).
8091

8192
registration_support(_Config) ->
8293
?assertEqual(false, rabbit_peer_discovery_aws:supports_registration()).
8394

95+
network_interface_sorting(_Config) ->
96+
%% Test ENI sorting by deviceIndex (DescribeInstances only returns attached ENIs)
97+
NetworkInterfaces = [
98+
{"item", [
99+
{"networkInterfaceId", "eni-secondary"},
100+
{"attachment", [{"deviceIndex", "1"}]}
101+
]},
102+
{"item", [
103+
{"networkInterfaceId", "eni-primary"},
104+
{"attachment", [{"deviceIndex", "0"}]}
105+
]},
106+
{"item", [
107+
{"networkInterfaceId", "eni-tertiary"},
108+
{"attachment", [{"deviceIndex", "2"}]}
109+
]}
110+
],
111+
112+
%% Should sort ENIs by deviceIndex
113+
Sorted = rabbit_peer_discovery_aws:sort_ec2_hostname_path_set_members("networkInterfaceSet", NetworkInterfaces),
114+
115+
%% Should have all 3 ENIs
116+
?assertEqual(3, length(Sorted)),
117+
118+
%% Primary ENI (deviceIndex=0) should be first
119+
{"item", FirstENI} = lists:nth(1, Sorted),
120+
?assertEqual("eni-primary", proplists:get_value("networkInterfaceId", FirstENI)),
121+
122+
%% Secondary ENI (deviceIndex=1) should be second
123+
{"item", SecondENI} = lists:nth(2, Sorted),
124+
?assertEqual("eni-secondary", proplists:get_value("networkInterfaceId", SecondENI)),
125+
126+
%% Tertiary ENI (deviceIndex=2) should be third
127+
{"item", ThirdENI} = lists:nth(3, Sorted),
128+
?assertEqual("eni-tertiary", proplists:get_value("networkInterfaceId", ThirdENI)).
129+
130+
private_ip_address_sorting(_Config) ->
131+
%% Test private IP address sorting by primary flag
132+
PrivateIpAddresses = [
133+
{"item", [
134+
{"privateIpAddress", "10.0.14.176"},
135+
{"privateDnsName", "ip-10-0-14-176.us-west-2.compute.internal"},
136+
{"primary", "false"}
137+
]},
138+
{"item", [
139+
{"privateIpAddress", "10.0.12.112"},
140+
{"privateDnsName", "ip-10-0-12-112.us-west-2.compute.internal"},
141+
{"primary", "true"}
142+
]},
143+
{"item", [
144+
{"privateIpAddress", "10.0.15.200"},
145+
{"privateDnsName", "ip-10-0-15-200.us-west-2.compute.internal"},
146+
{"primary", "false"}
147+
]}
148+
],
149+
150+
Sorted = rabbit_peer_discovery_aws:sort_ec2_hostname_path_set_members("privateIpAddressesSet", PrivateIpAddresses),
151+
?assertEqual(3, length(Sorted)),
152+
153+
%% Primary IP (primary=true) should be first
154+
{"item", FirstIP} = lists:nth(1, Sorted),
155+
?assertEqual("10.0.12.112", proplists:get_value("privateIpAddress", FirstIP)),
156+
?assertEqual("true", proplists:get_value("primary", FirstIP)),
157+
158+
%% Non-primary IPs should maintain relative order
159+
{"item", SecondIP} = lists:nth(2, Sorted),
160+
?assertEqual("10.0.14.176", proplists:get_value("privateIpAddress", SecondIP)),
161+
?assertEqual("false", proplists:get_value("primary", SecondIP)),
162+
163+
{"item", ThirdIP} = lists:nth(3, Sorted),
164+
?assertEqual("10.0.15.200", proplists:get_value("privateIpAddress", ThirdIP)),
165+
?assertEqual("false", proplists:get_value("primary", ThirdIP)).
166+
84167
lock_single_node(_Config) ->
85168
LocalNode = node(),
86169
Nodes = [LocalNode],
@@ -141,16 +224,30 @@ reservation_set() ->
141224
{"vpcId","vpc-4fe1562b"},
142225
{"networkInterfaceSet", [
143226
{"item",
144-
[{"association",
145-
[{"publicIp","203.0.113.11"},
146-
{"publicDnsName",
147-
"ec2-203-0-113-11.eu-west-1.compute.amazonaws.com"},
148-
{"ipOwnerId","amazon"}]}]},
149-
{"item",
150-
[{"association",
227+
[{"attachment", [{"deviceIndex", "1"}]},
228+
{"association",
151229
[{"publicIp","203.0.113.12"},
152230
{"publicDnsName",
153231
"ec2-203-0-113-12.eu-west-1.compute.amazonaws.com"},
232+
{"ipOwnerId","amazon"}]},
233+
{"privateIpAddressesSet", [
234+
{"item", [
235+
{"privateIpAddress", "10.0.15.101"},
236+
{"privateDnsName", "ip-10-0-15-101.eu-west-1.compute.internal"},
237+
{"primary", "false"}
238+
]},
239+
{"item", [
240+
{"privateIpAddress", "10.0.15.100"},
241+
{"privateDnsName", "ip-10-0-15-100.eu-west-1.compute.internal"},
242+
{"primary", "true"}
243+
]}
244+
]}]},
245+
{"item",
246+
[{"attachment", [{"deviceIndex", "0"}]},
247+
{"association",
248+
[{"publicIp","203.0.113.11"},
249+
{"publicDnsName",
250+
"ec2-203-0-113-11.eu-west-1.compute.amazonaws.com"},
154251
{"ipOwnerId","amazon"}]}]}]},
155252
{"privateIpAddress","10.0.16.29"}]}]}]},
156253
{"item", [{"reservationId","r-006cfdbf8d04c5f01"},
@@ -171,15 +268,24 @@ reservation_set() ->
171268
{"vpcId","vpc-4fe1562b"},
172269
{"networkInterfaceSet", [
173270
{"item",
174-
[{"association",
271+
[{"attachment", [{"deviceIndex", "0"}]},
272+
{"association",
175273
[{"publicIp","203.0.113.21"},
176274
{"publicDnsName",
177275
"ec2-203-0-113-21.eu-west-1.compute.amazonaws.com"},
178276
{"ipOwnerId","amazon"}]}]},
179277
{"item",
180-
[{"association",
278+
[{"attachment", [{"deviceIndex", "1"}]},
279+
{"association",
181280
[{"publicIp","203.0.113.22"},
182281
{"publicDnsName",
183282
"ec2-203-0-113-22.eu-west-1.compute.amazonaws.com"},
184-
{"ipOwnerId","amazon"}]}]}]},
283+
{"ipOwnerId","amazon"}]},
284+
{"privateIpAddressesSet", [
285+
{"item", [
286+
{"privateIpAddress", "10.0.16.31"},
287+
{"privateDnsName", "ip-10-0-16-31.eu-west-1.compute.internal"},
288+
{"primary", "true"}
289+
]}
290+
]}]}]},
185291
{"privateIpAddress","10.0.16.31"}]}]}]}].

0 commit comments

Comments
 (0)