Skip to content

Commit d9de4c2

Browse files
authored
Implement a test for splitting shards by the follower load (#26236)
Changelog entry Add a new test to verify splitting shards by the follower load Changelog category Not for changelog (changelog entry is not required) Description for reviewers Added a new test TSchemeShardSplitByLoad::TableSplitsByFollowerLoad, which is supposed to verify that a shard splits, when one of the followers is overloaded. Since splitting by the follower load is not implemented yet, this test is essentially reversed - it verifies that no splitting happens in this case. NOTE: Once this new feature is implemented, this test will be updated to verify the new behavior.
1 parent f62910c commit d9de4c2

File tree

3 files changed

+454
-56
lines changed

3 files changed

+454
-56
lines changed

ydb/core/testlib/tablet_helpers.cpp

Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1092,6 +1092,121 @@ namespace NKikimr {
10921092
runtime.GrabEdgeEvent<TEvents::TEvWakeup>(handle);
10931093
}
10941094

1095+
/**
1096+
* A special actor, which starts a tablet follower and restarts it, if needed.
1097+
*/
1098+
class TFollowerLauncher : public TActorBootstrapped<TFollowerLauncher> {
1099+
private:
1100+
ui64 TabletId;
1101+
ui32 FollowerId;
1102+
TActorId FollowerActorId;
1103+
1104+
public:
1105+
TFollowerLauncher(ui64 tabletId, ui32 follewerId)
1106+
: TabletId(tabletId)
1107+
, FollowerId(follewerId)
1108+
{
1109+
}
1110+
1111+
void Bootstrap(const TActorContext& ctx) {
1112+
CreateFollower();
1113+
1114+
LOG_INFO_S(
1115+
ctx,
1116+
NKikimrServices::HIVE,
1117+
"[Follower launcher " << SelfId()
1118+
<< "] Created follower ID " << FollowerId
1119+
<< " for tabletId " << TabletId
1120+
<< ": " << FollowerActorId
1121+
);
1122+
1123+
Become(&TThis::StateWork);
1124+
}
1125+
1126+
STFUNC(StateWork) {
1127+
switch (ev->GetTypeRewrite()) {
1128+
HFunc(TEvTablet::TEvTabletDead, Handle);
1129+
HFunc(TEvents::TEvPoison, Handle);
1130+
}
1131+
}
1132+
1133+
void Handle(TEvTablet::TEvTabletDead::TPtr& ev, const TActorContext& ctx) {
1134+
if (ev->Sender != FollowerActorId) {
1135+
LOG_INFO_S(
1136+
ctx,
1137+
NKikimrServices::HIVE,
1138+
"[Follower launcher " << SelfId()
1139+
<< "] Received EvTabletDead for tabletId " << ev->Get()->TabletID
1140+
<< ", but from an unknown actor ID, ignored: " << FollowerActorId
1141+
);
1142+
1143+
return;
1144+
}
1145+
1146+
LOG_INFO_S(
1147+
ctx,
1148+
NKikimrServices::HIVE,
1149+
"[Follower launcher " << SelfId()
1150+
<< "] Received EvTabletDead from follower ID " << FollowerId
1151+
<< " for tabletId " << TabletId
1152+
<< ": " << FollowerActorId
1153+
);
1154+
1155+
// The follower has died, start a new one
1156+
FollowerActorId = {};
1157+
CreateFollower();
1158+
1159+
LOG_INFO_S(
1160+
ctx,
1161+
NKikimrServices::HIVE,
1162+
"[Follower launcher " << SelfId()
1163+
<< "] Restarted follower ID " << FollowerId
1164+
<< " for tabletId " << TabletId
1165+
<< ": " << FollowerActorId
1166+
);
1167+
}
1168+
1169+
void Handle(TEvents::TEvPoison::TPtr& /* ev */, const TActorContext& ctx) {
1170+
if (FollowerActorId) {
1171+
LOG_INFO_S(
1172+
ctx,
1173+
NKikimrServices::HIVE,
1174+
"[Follower launcher " << SelfId()
1175+
<< "] Destroying follower ID " << FollowerId
1176+
<< " for tabletId " << TabletId
1177+
<< ": " << FollowerActorId
1178+
);
1179+
1180+
ctx.Send(FollowerActorId, new TEvents::TEvPoisonPill());
1181+
FollowerActorId = {};
1182+
};
1183+
1184+
Die(ctx);
1185+
}
1186+
1187+
private:
1188+
void CreateFollower() {
1189+
FollowerActorId = Register(
1190+
CreateTabletFollower(
1191+
SelfId(),
1192+
CreateTestTabletInfo(
1193+
TabletId,
1194+
TTabletTypes::DataShard,
1195+
DataGroupErasure
1196+
),
1197+
new TTabletSetupInfo(
1198+
&CreateDataShard,
1199+
TMailboxType::Simple,
1200+
0,
1201+
TMailboxType::Simple,
1202+
0
1203+
),
1204+
FollowerId
1205+
)
1206+
);
1207+
}
1208+
};
1209+
10951210
class TFakeHive : public TActor<TFakeHive>, public NTabletFlatExecutor::TTabletExecutedFlat {
10961211
public:
10971212
static std::function<IActor* (const TActorId &, TTabletStorageInfo*)> DefaultGetTabletCreationFunc(ui32 type) {
@@ -1240,6 +1355,34 @@ namespace NKikimr {
12401355
State->TabletIdToOwner[tabletId] = key;
12411356

12421357
LOG_INFO_S(ctx, NKikimrServices::HIVE, logPrefix << "boot OK, tablet id " << tabletId);
1358+
1359+
// After a successful creation of a data shard, need to create
1360+
// the given number of followers (if requested)
1361+
//
1362+
// NOTE: Only the simplest PartitionConfig -> FollowerCount option
1363+
// is supported here. More complex options (for example,
1364+
// FollowerCountPerDataCenter and FollowerGroups options)
1365+
// are completely ignored.
1366+
if (type == TTabletTypes::DataShard) {
1367+
const ui32 followerCount = ev->Get()->Record.GetFollowerCount();
1368+
1369+
if (followerCount) {
1370+
LOG_INFO_S(
1371+
ctx,
1372+
NKikimrServices::HIVE,
1373+
logPrefix << "DataShard created successfully (tabletId: " << tabletId
1374+
<< "), creating " << followerCount << " followers"
1375+
);
1376+
1377+
for (ui32 i = 0; i < followerCount; ++i) {
1378+
const ui32 followerId = i + 1;
1379+
1380+
it->second.FollowerLaunchers[followerId] = ctx.Register(
1381+
new TFollowerLauncher(tabletId, followerId)
1382+
);
1383+
}
1384+
}
1385+
}
12431386
} else {
12441387
LOG_ERROR_S(ctx, NKikimrServices::HIVE, logPrefix << "boot failed, status " << status);
12451388
}
@@ -1341,6 +1484,17 @@ namespace NKikimr {
13411484
TFakeHiveTabletInfo& tabletInfo = it->second;
13421485
ctx.Send(ctx.SelfID, new TEvFakeHive::TEvNotifyTabletDeleted(tabletInfo.TabletId));
13431486

1487+
// Destroy all follower actors, if any
1488+
for (const auto& [followerId, launcherActorId] : it->second.FollowerLaunchers) {
1489+
Cerr << "FAKEHIVE " << TabletID()
1490+
<< " Destroying launcher for the followerId " << followerId
1491+
<< " for tabletId " << it->second.TabletId
1492+
<< ": " << launcherActorId
1493+
<< Endl;
1494+
1495+
ctx.Send(launcherActorId, new TEvents::TEvPoison());
1496+
}
1497+
13441498
// Kill the tablet and don't restart it
13451499
TActorId bootstrapperActorId = tabletInfo.BootstrapperActorId;
13461500
ctx.Send(bootstrapperActorId, new TEvBootstrapper::TEvStandBy());

ydb/core/testlib/tablet_helpers.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,7 @@ namespace NKikimr {
149149
const TTabletTypes::EType Type;
150150
const ui64 TabletId;
151151
TActorId BootstrapperActorId;
152+
TMap<ui32, TActorId> FollowerLaunchers; // keyed by followerId
152153
ETabletState State = ETabletState::Unknown;
153154
TSubDomainKey ObjectDomain; // what subdomain tablet belongs to
154155

0 commit comments

Comments
 (0)