Skip to content

Commit 1fd9462

Browse files
committed
Add opt in initial check run
1 parent 5c5026d commit 1fd9462

File tree

3 files changed

+204
-2
lines changed

3 files changed

+204
-2
lines changed

deps/rabbit/priv/schema/rabbit.schema

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1646,6 +1646,17 @@ end}.
16461646
{datatype, string}
16471647
]}.
16481648

1649+
1650+
%% Whether to verify if this is the first time a node starts.
1651+
%% When enabled, nodes will create a marker file on first startup
1652+
%% and refuse to start if the marker exists but tables are empty.
1653+
%%
1654+
1655+
{mapping, "verify_initial_run", "rabbit.verify_initial_run", [
1656+
{default, false},
1657+
{datatype, {enum, [true, false]}}
1658+
]}.
1659+
16491660
% ==========================
16501661
% Logging section
16511662
% ==========================

deps/rabbit/src/rabbit.erl

Lines changed: 47 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
%% Boot steps.
4141
-export([update_cluster_tags/0, maybe_insert_default_data/0, boot_delegate/0, recover/0,
4242
pg_local_amqp_session/0,
43-
pg_local_amqp_connection/0]).
43+
pg_local_amqp_connection/0, check_initial_run/0]).
4444

4545
-rabbit_boot_step({pre_boot, [{description, "rabbit boot start"}]}).
4646

@@ -199,10 +199,16 @@
199199
{requires, [core_initialized]},
200200
{enables, routing_ready}]}).
201201

202+
-rabbit_boot_step({initial_run_check,
203+
[{description, "check if this is the first time the node starts"},
204+
{mfa, {?MODULE, check_initial_run, []}},
205+
{requires, recovery},
206+
{enables, empty_db_check}]}).
207+
202208
-rabbit_boot_step({empty_db_check,
203209
[{description, "empty DB check"},
204210
{mfa, {?MODULE, maybe_insert_default_data, []}},
205-
{requires, recovery},
211+
{requires, initial_run_check},
206212
{enables, routing_ready}]}).
207213

208214

@@ -235,6 +241,7 @@
235241
{requires, [core_initialized, recovery]},
236242
{enables, routing_ready}]}).
237243

244+
238245
-rabbit_boot_step({pre_flight,
239246
[{description, "ready to communicate with peers and clients"},
240247
{requires, [core_initialized, recovery, routing_ready]}]}).
@@ -1151,6 +1158,44 @@ update_cluster_tags() ->
11511158
#{domain => ?RMQLOG_DOMAIN_GLOBAL}),
11521159
rabbit_runtime_parameters:set_global(cluster_tags, Tags, <<"internal_user">>).
11531160

1161+
1162+
-spec check_initial_run() -> 'ok' | no_return().
1163+
1164+
check_initial_run() ->
1165+
case application:get_env(rabbit, verify_initial_run, false) of
1166+
false ->
1167+
%% Feature is disabled, skip the check
1168+
?LOG_DEBUG("Initial run verification is disabled",
1169+
#{domain => ?RMQLOG_DOMAIN_GLOBAL}),
1170+
ok;
1171+
true ->
1172+
%% Feature is enabled, perform the check
1173+
DataDir = data_dir(),
1174+
MarkerFile = filename:join(DataDir, "node_initialized.marker"),
1175+
case filelib:is_file(MarkerFile) of
1176+
true ->
1177+
%% Not the first run, check if tables need default data
1178+
case rabbit_table:needs_default_data() of
1179+
true ->
1180+
?LOG_ERROR("Node has already been initialized, but database appears empty. "
1181+
"This could indicate data loss or a split-brain scenario.",
1182+
#{domain => ?RMQLOG_DOMAIN_GLOBAL}),
1183+
throw({error, cluster_already_initialized_but_tables_empty});
1184+
false ->
1185+
?LOG_INFO("Node has already been initialized, proceeding with normal startup",
1186+
#{domain => ?RMQLOG_DOMAIN_GLOBAL}),
1187+
ok
1188+
end;
1189+
false ->
1190+
%% First time starting, create the marker file
1191+
?LOG_INFO("First node startup detected, creating initialization marker",
1192+
#{domain => ?RMQLOG_DOMAIN_GLOBAL}),
1193+
ok = filelib:ensure_dir(MarkerFile),
1194+
ok = file:write_file(MarkerFile, <<>>, [exclusive]), % Empty file.
1195+
ok
1196+
end
1197+
end.
1198+
11541199
-spec maybe_insert_default_data() -> 'ok'.
11551200

11561201
maybe_insert_default_data() ->
Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
%% This Source Code Form is subject to the terms of the Mozilla Public
2+
%% License, v. 2.0. If a copy of the MPL was not distributed with this
3+
%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
4+
%%
5+
%% Copyright (c) 2007-2024 Broadcom. All Rights Reserved. The term "Broadcom" refers to Broadcom Inc. and/or its subsidiaries. All rights reserved.
6+
%%
7+
8+
%% Test suite for the verify_initial_run feature.
9+
%% This feature helps detect potential data loss scenarios by maintaining
10+
%% a marker file to track if a node has been initialized before.
11+
12+
-module(node_initial_run_SUITE).
13+
14+
-include_lib("common_test/include/ct.hrl").
15+
-include_lib("eunit/include/eunit.hrl").
16+
17+
-compile(export_all).
18+
19+
all() ->
20+
[
21+
{group, single_node_mnesia},
22+
{group, single_node_khepri}
23+
].
24+
25+
groups() ->
26+
[
27+
{single_node_mnesia, [], [
28+
verify_initial_run_disabled,
29+
verify_initial_run_enabled
30+
]},
31+
{single_node_khepri, [], [
32+
verify_initial_run_disabled,
33+
verify_initial_run_enabled
34+
]}
35+
].
36+
37+
%% -------------------------------------------------------------------
38+
%% Testsuite setup/teardown.
39+
%% -------------------------------------------------------------------
40+
41+
init_per_suite(Config) ->
42+
rabbit_ct_helpers:log_environment(),
43+
rabbit_ct_helpers:run_setup_steps(Config).
44+
45+
end_per_suite(Config) ->
46+
rabbit_ct_helpers:run_teardown_steps(Config).
47+
48+
init_per_group(Groupname, Config) ->
49+
rabbit_ct_helpers:set_config(Config, [
50+
{metadata_store, meta_store(Groupname)},
51+
{rmq_nodes_clustered, false},
52+
{rmq_nodename_suffix, Groupname},
53+
{rmq_nodes_count, 1}
54+
]).
55+
56+
end_per_group(_, Config) ->
57+
Config.
58+
59+
init_per_testcase(Testcase, Config) ->
60+
rabbit_ct_helpers:testcase_started(Config, Testcase),
61+
Config0 = maybe_enable_verify_initial_run(Config, Testcase),
62+
rabbit_ct_helpers:run_steps(
63+
Config0,
64+
rabbit_ct_broker_helpers:setup_steps() ++
65+
rabbit_ct_client_helpers:setup_steps()
66+
).
67+
68+
end_per_testcase(Testcase, Config) ->
69+
Config1 = rabbit_ct_helpers:run_steps(
70+
Config,
71+
rabbit_ct_client_helpers:teardown_steps() ++
72+
rabbit_ct_broker_helpers:teardown_steps()
73+
),
74+
rabbit_ct_helpers:testcase_finished(Config1, Testcase).
75+
76+
%% -------------------------------------------------------------------
77+
%% Test cases
78+
%% -------------------------------------------------------------------
79+
80+
verify_initial_run_disabled(Config) ->
81+
% When feature is disabled (default), node should start normally
82+
DataDir = rabbit_ct_broker_helpers:get_node_config(Config, 0, data_dir),
83+
MarkerFile = filename:join(DataDir, "node_initialized.marker"),
84+
% Setting is disabled so no marker file should be present
85+
?assertNot(filelib:is_file(MarkerFile)),
86+
87+
% Restarting the node should work fine
88+
ok = rabbit_ct_broker_helpers:stop_node(Config, 0),
89+
ok = rabbit_ct_broker_helpers:start_node(Config, 0),
90+
% Still no marker file
91+
?assertNot(filelib:is_file(MarkerFile)),
92+
ok.
93+
94+
verify_initial_run_enabled(Config) ->
95+
DataDir = rabbit_ct_broker_helpers:get_node_config(Config, 0, data_dir),
96+
MarkerFile = filename:join(DataDir, "node_initialized.marker"),
97+
98+
% Setting is enabled so marker file should be present after initial startup
99+
?assert(filelib:is_file(MarkerFile)),
100+
101+
% Restarting the node should be fine, as there is a marker file
102+
% and corresponding schema data (consistent state)
103+
ok = rabbit_ct_broker_helpers:stop_node(Config, 0),
104+
ok = rabbit_ct_broker_helpers:start_node(Config, 0),
105+
106+
SchemaFile = schema_file(Config),
107+
108+
?assert(filelib:is_file(MarkerFile)),
109+
110+
% Stop the node and remove the present schema to simulate data loss
111+
ok = rabbit_ct_broker_helpers:stop_node(Config, 0),
112+
file:delete(SchemaFile),
113+
% Node should fail to start because marker exists but schema is missing,
114+
% indicating potential data loss or corruption
115+
?assertMatch(
116+
{error, {skip, "Failed to initialize RabbitMQ"}},
117+
rabbit_ct_broker_helpers:start_node(Config, 0)
118+
),
119+
ok.
120+
121+
%% -------------------------------------------------------------------
122+
%% Internal helpers
123+
%% -------------------------------------------------------------------
124+
125+
maybe_enable_verify_initial_run(Config, verify_initial_run_enabled) ->
126+
rabbit_ct_helpers:merge_app_env(
127+
Config, {rabbit, [{verify_initial_run, true}]}
128+
);
129+
maybe_enable_verify_initial_run(Config, _) ->
130+
Config.
131+
132+
meta_store(single_node_mnesia) ->
133+
mnesia;
134+
meta_store(single_node_khepri) ->
135+
khepri.
136+
137+
schema_file(Config) ->
138+
DataDir = rabbit_ct_broker_helpers:get_node_config(Config, 0, data_dir),
139+
MetaStore = rabbit_ct_helpers:get_config(Config, metadata_store),
140+
case MetaStore of
141+
mnesia ->
142+
filename:join(DataDir, "schema.DAT");
143+
khepri ->
144+
NodeName = rabbit_ct_broker_helpers:get_node_config(Config, 0, nodename),
145+
filename:join([DataDir, "coordination", NodeName, "names.dets"])
146+
end.

0 commit comments

Comments
 (0)