Skip to content

Commit 8f4abb0

Browse files
committed
Always update info about groups with invalid layout, add more UT
1 parent 7c04a37 commit 8f4abb0

File tree

4 files changed

+45
-26
lines changed

4 files changed

+45
-26
lines changed

ydb/core/blobstorage/ut_blobstorage/sanitize_groups.cpp

Lines changed: 32 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@ Y_UNIT_TEST_SUITE(GroupLayoutSanitizer) {
2525
}
2626
}
2727

28-
void CreateEnv(std::unique_ptr<TEnvironmentSetup>& env, std::vector<TNodeLocation>& locations) {
29-
TBlobStorageGroupType groupType = TBlobStorageGroupType::ErasureMirror3dc;
28+
void CreateEnv(std::unique_ptr<TEnvironmentSetup>& env, std::vector<TNodeLocation>& locations,
29+
TBlobStorageGroupType groupType) {
3030
const ui32 numNodes = locations.size();
3131

3232
env.reset(new TEnvironmentSetup({
@@ -37,27 +37,30 @@ Y_UNIT_TEST_SUITE(GroupLayoutSanitizer) {
3737

3838
const ui32 disksPerNode = 1;
3939
const ui32 slotsPerDisk = 3;
40+
41+
// Assure that sanitizer doesn't send request to initially allocated groups
42+
env->Runtime->FilterFunction = CatchSanitizeRequests;
4043
env->CreateBoxAndPool(disksPerNode, numNodes * disksPerNode * slotsPerDisk / 9);
4144
}
4245

43-
Y_UNIT_TEST(Test3dc) {
46+
NActorsInterconnect::TNodeLocation LocationGenerator(ui32 dc, ui32 rack, ui32 unit) {
47+
NActorsInterconnect::TNodeLocation proto;
48+
proto.SetDataCenter(ToString(dc));
49+
proto.SetRack(ToString(rack));
50+
proto.SetUnit(ToString(unit));
51+
return proto;
52+
}
53+
54+
void Test(TBlobStorageGroupType groupType, ui32 dcs, ui32 racks) {
4455
std::vector<TNodeLocation> locations;
45-
TLocationGenerator locationGenerator = [](ui32 dc, ui32 rack, ui32 unit) {
46-
NActorsInterconnect::TNodeLocation proto;
47-
proto.SetDataCenter(ToString(dc));
48-
proto.SetRack(ToString(rack));
49-
proto.SetUnit(ToString(unit));
50-
return proto;
51-
};
5256

53-
MakeLocations(locations, 3, 5, 1, locationGenerator);
57+
MakeLocations(locations, dcs, racks, 1, LocationGenerator);
5458
std::unique_ptr<TEnvironmentSetup> env;
55-
CreateEnv(env, locations);
5659

57-
TBlobStorageGroupType groupType = TBlobStorageGroupType::ErasureMirror3dc;
58-
TGroupGeometryInfo geom = CreateGroupGeometry(groupType);
60+
CreateEnv(env, locations, groupType);
61+
env->Sim(TDuration::Minutes(3));
5962

60-
env->Runtime->FilterFunction = CatchSanitizeRequests;
63+
TGroupGeometryInfo geom = CreateGroupGeometry(groupType);
6164

6265
TString error;
6366
auto cfg = env->FetchBaseConfig();
@@ -86,6 +89,18 @@ Y_UNIT_TEST_SUITE(GroupLayoutSanitizer) {
8689
UNIT_ASSERT_C(CheckBaseConfigLayout(geom, cfg, true, error), error);
8790
}
8891

92+
Y_UNIT_TEST(Test3dc) {
93+
Test(TBlobStorageGroupType::ErasureMirror3dc, 3, 5);
94+
}
95+
96+
Y_UNIT_TEST(TestBlock4Plus2) {
97+
Test(TBlobStorageGroupType::Erasure4Plus2Block, 1, 12);
98+
}
99+
100+
Y_UNIT_TEST(TestMirror3of4) {
101+
Test(TBlobStorageGroupType::ErasureMirror3of4, 1, 12);
102+
}
103+
89104
TString PrintGroups(TBlobStorageGroupType groupType, const NKikimrBlobStorage::TBaseConfig& cfg,
90105
std::vector<TNodeLocation> locations) {
91106
TGroupGeometryInfo geom = CreateGroupGeometry(groupType);
@@ -137,6 +152,7 @@ Y_UNIT_TEST_SUITE(GroupLayoutSanitizer) {
137152
}
138153

139154
void TestMultipleRealmsOccupation(bool allowMultipleRealmsOccupation) {
155+
TBlobStorageGroupType groupType = TBlobStorageGroupType::ErasureMirror3dc;
140156
std::vector<TNodeLocation> locations;
141157
TLocationGenerator locationGenerator = [](ui32 dc, ui32 rack, ui32 unit) {
142158
NActorsInterconnect::TNodeLocation proto;
@@ -152,9 +168,8 @@ Y_UNIT_TEST_SUITE(GroupLayoutSanitizer) {
152168
};
153169
MakeLocations(locations, 4, 5, 1, locationGenerator);
154170
std::unique_ptr<TEnvironmentSetup> env;
155-
CreateEnv(env, locations);
171+
CreateEnv(env, locations, groupType);
156172

157-
TBlobStorageGroupType groupType = TBlobStorageGroupType::ErasureMirror3dc;
158173
TGroupGeometryInfo geom = CreateGroupGeometry(groupType);
159174

160175
env->Runtime->FilterFunction = CatchSanitizeRequests;

ydb/core/mind/bscontroller/impl.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1512,6 +1512,7 @@ class TBlobStorageController : public TActor<TBlobStorageController>, public TTa
15121512
private:
15131513
TString InstanceId;
15141514
std::shared_ptr<std::atomic_uint64_t> SelfHealUnreassignableGroups = std::make_shared<std::atomic_uint64_t>();
1515+
std::shared_ptr<std::atomic_uint64_t> GroupLayoutSanitizerInvalidGroups = std::make_shared<std::atomic_uint64_t>();
15151516
TMaybe<TActorId> MigrationId;
15161517
TVSlots VSlots; // ordering is important
15171518
TPDisks PDisks; // ordering is important

ydb/core/mind/bscontroller/self_heal.cpp

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,7 @@ namespace NKikimr::NBsController {
284284
bool DonorMode;
285285
THostRecordMap HostRecords;
286286
std::shared_ptr<TControlWrapper> EnableSelfHealWithDegraded;
287+
std::shared_ptr<std::atomic_uint64_t> GroupsWithInvalidLayoutCounter;
287288

288289
using TTopologyDescr = std::tuple<TBlobStorageGroupType::EErasureSpecies, ui32, ui32, ui32>;
289290
THashMap<TTopologyDescr, std::shared_ptr<TBlobStorageGroupInfo::TTopology>> Topologies;
@@ -296,14 +297,16 @@ namespace NKikimr::NBsController {
296297
public:
297298
TSelfHealActor(ui64 tabletId, std::shared_ptr<std::atomic_uint64_t> unreassignableGroups, THostRecordMap hostRecords,
298299
bool groupLayoutSanitizerEnabled, bool allowMultipleRealmsOccupation, bool donorMode,
299-
std::shared_ptr<TControlWrapper> enableSelfHealWithDegraded)
300+
std::shared_ptr<TControlWrapper> enableSelfHealWithDegraded,
301+
std::shared_ptr<std::atomic_uint64_t> groupsWithInvalidLayoutCounter)
300302
: TabletId(tabletId)
301303
, UnreassignableGroups(std::move(unreassignableGroups))
302304
, GroupLayoutSanitizerEnabled(groupLayoutSanitizerEnabled)
303305
, AllowMultipleRealmsOccupation(allowMultipleRealmsOccupation)
304306
, DonorMode(donorMode)
305307
, HostRecords(std::move(hostRecords))
306308
, EnableSelfHealWithDegraded(std::move(enableSelfHealWithDegraded))
309+
, GroupsWithInvalidLayoutCounter(std::move(groupsWithInvalidLayoutCounter))
307310
{}
308311

309312
void Bootstrap(const TActorId& parentId) {
@@ -318,17 +321,16 @@ namespace NKikimr::NBsController {
318321

319322
void Handle(TEvControllerUpdateSelfHealInfo::TPtr& ev) {
320323
if (const auto& setting = ev->Get()->GroupLayoutSanitizerEnabled) {
321-
bool previousSetting = std::exchange(GroupLayoutSanitizerEnabled, *setting);
322-
if (!previousSetting && GroupLayoutSanitizerEnabled) {
323-
UpdateLayoutInformationForAllGroups();
324-
}
324+
std::exchange(GroupLayoutSanitizerEnabled, *setting);
325325
}
326+
326327
if (const auto& setting = ev->Get()->AllowMultipleRealmsOccupation) {
327328
bool previousSetting = std::exchange(AllowMultipleRealmsOccupation, *setting);
328329
if (previousSetting != AllowMultipleRealmsOccupation) {
329330
UpdateLayoutInformationForAllGroups();
330331
}
331332
}
333+
332334
if (const auto& setting = ev->Get()->DonorMode) {
333335
DonorMode = *setting;
334336
}
@@ -345,9 +347,7 @@ namespace NKikimr::NBsController {
345347

346348
g.Content = std::move(*data);
347349

348-
if (GroupLayoutSanitizerEnabled) {
349-
UpdateGroupLayoutInformation(g);
350-
}
350+
UpdateGroupLayoutInformation(g);
351351

352352
ui32 numFailRealms = 0;
353353
ui32 numFailDomainsPerFailRealm = 0;
@@ -500,6 +500,7 @@ namespace NKikimr::NBsController {
500500
}
501501
}
502502

503+
GroupsWithInvalidLayoutCounter->store(GroupsWithInvalidLayout.Size());
503504
UnreassignableGroups->store(counter);
504505
}
505506

@@ -899,7 +900,7 @@ namespace NKikimr::NBsController {
899900
IActor *TBlobStorageController::CreateSelfHealActor() {
900901
Y_ABORT_UNLESS(HostRecords);
901902
return new TSelfHealActor(TabletID(), SelfHealUnreassignableGroups, HostRecords, GroupLayoutSanitizerEnabled,
902-
AllowMultipleRealmsOccupation, DonorMode, EnableSelfHealWithDegraded);
903+
AllowMultipleRealmsOccupation, DonorMode, EnableSelfHealWithDegraded, GroupLayoutSanitizerInvalidGroups);
903904
}
904905

905906
void TBlobStorageController::InitializeSelfHealState() {
@@ -1159,6 +1160,7 @@ namespace NKikimr::NBsController {
11591160
);
11601161

11611162
TabletCounters->Simple()[NBlobStorageController::COUNTER_SELF_HEAL_UNREASSIGNABLE_GROUPS] = SelfHealUnreassignableGroups->load();
1163+
TabletCounters->Simple()[NBlobStorageController::COUNTER_GROUP_LAYOUT_SANITIZER_INVALID_GROUPS] = GroupLayoutSanitizerInvalidGroups->load();
11621164

11631165
Schedule(TDuration::Seconds(15), new TEvPrivate::TEvUpdateSelfHealCounters);
11641166
}

ydb/core/protos/counters_bs_controller.proto

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ enum ESimpleCounters {
2828
COUNTER_DISK_SCRUB_CUR_DISKS = 18 [(CounterOpts) = {Name: "CurrentlyScrubbedDisks"}];
2929
COUNTER_DISK_SCRUB_CUR_GROUPS = 19 [(CounterOpts) = {Name: "CurrentlyScrubbedGroups"}];
3030
COUNTER_SELF_HEAL_UNREASSIGNABLE_GROUPS = 20 [(CounterOpts) = {Name: "SelfHealUnreassignableGroups"}];
31+
COUNTER_GROUP_LAYOUT_SANITIZER_INVALID_GROUPS = 21 [(CounterOpts) = {Name: "GroupLayoutSanitizerInvlaidGroups"}];
3132
}
3233

3334
enum ECumulativeCounters {

0 commit comments

Comments
 (0)