Skip to content

Commit 87705c8

Browse files
authored
stable-25-3-1: Fix state storage replicas selection (#26370)
2 parents cafe78b + cbda30a commit 87705c8

File tree

3 files changed

+136
-33
lines changed

3 files changed

+136
-33
lines changed

ydb/core/base/statestorage.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,9 +75,9 @@ void TStateStorageInfo::SelectReplicas(ui64 tabletId, TSelection *selection, ui3
7575
selection->SelectedReplicas[idx] = ringGroup.Rings[idx].SelectReplica(hash);
7676
}
7777
} else { // NToSelect < total, first - select rings with walker, then select concrete node
78-
TStateStorageRingWalker walker(hash, total);
79-
for (ui32 idx : xrange(ringGroup.NToSelect))
80-
selection->SelectedReplicas[idx] = ringGroup.Rings[walker.Next()].SelectReplica(hash);
78+
for (ui32 idx = 0; ui32 ringIdx : TStateStorageRingWalker::Select(hash, total, ringGroup.NToSelect)) {
79+
selection->SelectedReplicas[idx++] = ringGroup.Rings[ringIdx].SelectReplica(hash);
80+
}
8181
}
8282
}
8383

ydb/core/base/statestorage_ringwalker.h

Lines changed: 30 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
#include "defs.h"
33

44
namespace NKikimr {
5-
5+
66
static const ui32 Primes[128] = {
77
104743, 105023, 105359, 105613,
88
104759, 105031, 105361, 105619,
@@ -38,22 +38,35 @@ static const ui32 Primes[128] = {
3838
106013, 106303, 106591, 106823,
3939
};
4040

41-
class TStateStorageRingWalker {
42-
const ui32 Sz;
43-
const ui32 Delta;
44-
ui32 A;
45-
public:
46-
TStateStorageRingWalker(ui32 hash, ui32 sz)
47-
: Sz(sz)
48-
, Delta(Primes[hash % 128])
49-
, A(hash + Delta)
41+
struct TStateStorageRingWalker {
42+
static auto Select(ui32 hash, ui32 sz, ui32 nToSelect)
5043
{
51-
Y_DEBUG_ABORT_UNLESS(Delta > Sz);
52-
}
53-
54-
ui32 Next() {
55-
A += Delta;
56-
return (A % Sz);
44+
std::vector<ui32> rings;
45+
rings.resize(nToSelect);
46+
std::unordered_set<ui32> ringsUsed;
47+
const ui32 delta = Primes[hash % 128];
48+
ui32 a = hash + delta;
49+
Y_DEBUG_ABORT_UNLESS(delta > sz);
50+
for (ui32 i : xrange(nToSelect)) {
51+
a += delta;
52+
rings[i] = a % sz;
53+
ringsUsed.insert(rings[i]);
54+
}
55+
if (ringsUsed.size() != nToSelect) {
56+
std::unordered_set<ui32> duplicates;
57+
for (ui32 i : xrange(nToSelect)) {
58+
if (!duplicates.insert(rings[i]).second) {
59+
ui32 proposedRing = rings[i];
60+
while (ringsUsed.count(proposedRing) > 0) {
61+
proposedRing = (proposedRing + 1) % nToSelect;
62+
}
63+
rings[i] = proposedRing;
64+
ringsUsed.insert(proposedRing);
65+
duplicates.insert(proposedRing);
66+
}
67+
}
68+
}
69+
return rings;
5770
}
5871
};
59-
}
72+
}

ydb/core/base/statestorage_ut.cpp

Lines changed: 103 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,25 @@ struct TStateStorageInfo : public TThrRefBase {
5252
mutable ui64 Hash;
5353
};
5454

55+
class TStateStorageRingWalker {
56+
const ui32 Sz;
57+
const ui32 Delta;
58+
ui32 A;
59+
public:
60+
TStateStorageRingWalker(ui32 hash, ui32 sz)
61+
: Sz(sz)
62+
, Delta(Primes[hash % 128])
63+
, A(hash + Delta)
64+
{
65+
Y_DEBUG_ABORT_UNLESS(Delta > Sz);
66+
}
67+
68+
ui32 Next() {
69+
A += Delta;
70+
return (A % Sz);
71+
}
72+
};
73+
5574
void TStateStorageInfo::SelectReplicas(ui64 tabletId, TSelection *selection) const {
5675
const ui32 hash = StateStorageHashFromTabletID(tabletId);
5776
const ui32 total = Rings.size();
@@ -69,7 +88,7 @@ void TStateStorageInfo::SelectReplicas(ui64 tabletId, TSelection *selection) con
6988
selection->SelectedReplicas[idx] = Rings[idx].SelectReplica(hash);
7089
}
7190
} else { // NToSelect < total, first - select rings with walker, then select concrete node
72-
TStateStorageRingWalker walker(hash, total);
91+
NStateStorageOld::TStateStorageRingWalker walker(hash, total);
7392
for (ui32 idx : xrange(NToSelect))
7493
selection->SelectedReplicas[idx] = Rings[walker.Next()].SelectReplica(hash);
7594
}
@@ -350,6 +369,11 @@ Y_UNIT_TEST_SUITE(TStateStorageConfig) {
350369
TStateStorageInfo::TSelection selection;
351370
for (ui64 tabletId = 8000000; tabletId < 9000000; ++tabletId) {
352371
info.SelectReplicas(tabletId, &selection, 0);
372+
std::unordered_set<TActorId> ids;
373+
for (ui32 i : xrange(selection.Sz)) {
374+
ids.insert(selection.SelectedReplicas[i]);
375+
}
376+
Y_ABORT_UNLESS(ids.size() == selection.Sz);
353377
Y_ABORT_UNLESS(nToSelect == selection.Sz);
354378
for (ui32 idx : xrange(nToSelect))
355379
retHash = CombineHashes<ui64>(retHash, selection.SelectedReplicas[idx].Hash());
@@ -379,31 +403,31 @@ Y_UNIT_TEST_SUITE(TStateStorageConfig) {
379403

380404
Y_UNIT_TEST(TestReplicaSelection) {
381405
UNIT_ASSERT(StabilityRun(3, 3, 1, false) == 17606246762804570019ULL);
382-
UNIT_ASSERT(StabilityRun(13, 3, 1, false) == 421354124534079828ULL);
383-
UNIT_ASSERT(StabilityRun(13, 9, 1, false) == 10581416019959162949ULL);
406+
UNIT_ASSERT(StabilityRun(13, 3, 1, false) == 6799095354188407094ULL);
407+
UNIT_ASSERT(StabilityRun(13, 9, 1, false) == 9959984117877048199ULL);
384408
UNIT_ASSERT(StabilityRun(3, 3, 1, true) == 17606246762804570019ULL);
385-
UNIT_ASSERT(StabilityRun(13, 3, 1, true) == 421354124534079828ULL);
386-
UNIT_ASSERT(StabilityRun(13, 9, 1, true) == 10581416019959162949ULL);
409+
UNIT_ASSERT(StabilityRun(13, 3, 1, true) == 6799095354188407094ULL);
410+
UNIT_ASSERT(StabilityRun(13, 9, 1, true) == 9959984117877048199ULL);
387411
}
388412

389413
Y_UNIT_TEST(TestMultiReplicaFailDomains) {
390414
UNIT_ASSERT(StabilityRun(3, 3, 3, false) == 12043409773822600429ULL);
391-
UNIT_ASSERT(StabilityRun(13, 3, 5, false) == 3265154396592024904ULL);
392-
UNIT_ASSERT(StabilityRun(13, 9, 8, false) == 12079940289459527060ULL);
415+
UNIT_ASSERT(StabilityRun(13, 3, 5, false) == 16389704234708466102ULL);
416+
UNIT_ASSERT(StabilityRun(13, 9, 8, false) == 15827315848675537518ULL);
393417
UNIT_ASSERT(StabilityRun(3, 3, 3, true) == 7845257406715748850ULL);
394-
UNIT_ASSERT(StabilityRun(13, 3, 5, true) == 1986618578793030392ULL);
395-
UNIT_ASSERT(StabilityRun(13, 9, 8, true) == 6173011524598124144ULL);
418+
UNIT_ASSERT(StabilityRun(13, 3, 5, true) == 16411438521907095913ULL);
419+
UNIT_ASSERT(StabilityRun(13, 9, 8, true) == 5026957911653120252ULL);
396420
}
397421

398422
Y_UNIT_TEST(TestReplicaSelectionUniqueCombinations) {
399-
UNIT_ASSERT_DOUBLES_EQUAL(UniqueCombinationsRun(13, 3, 1, false), 0.000206, 1e-7);
400-
UNIT_ASSERT_DOUBLES_EQUAL(UniqueCombinationsRun(13, 3, 3, false), 0.000519, 1e-7);
423+
UNIT_ASSERT_DOUBLES_EQUAL(UniqueCombinationsRun(13, 3, 1, false), 0.000205, 1e-7);
424+
UNIT_ASSERT_DOUBLES_EQUAL(UniqueCombinationsRun(13, 3, 3, false), 0.000518, 1e-7);
401425
UNIT_ASSERT_DOUBLES_EQUAL(UniqueCombinationsRun(113, 3, 1, false), 0.009091, 1e-7);
402426
UNIT_ASSERT_DOUBLES_EQUAL(UniqueCombinationsRun(113, 3, 5, false), 0.045251, 1e-7);
403427
UNIT_ASSERT_DOUBLES_EQUAL(UniqueCombinationsRun(113, 9, 1, false), 0.009237, 1e-7);
404428
UNIT_ASSERT_DOUBLES_EQUAL(UniqueCombinationsRun(113, 9, 8, false), 0.01387, 1e-7);
405-
UNIT_ASSERT_DOUBLES_EQUAL(UniqueCombinationsRun(13, 3, 1, true), 0.000206, 1e-7);
406-
UNIT_ASSERT_DOUBLES_EQUAL(UniqueCombinationsRun(13, 3, 3, true), 0.004263, 1e-7);
429+
UNIT_ASSERT_DOUBLES_EQUAL(UniqueCombinationsRun(13, 3, 1, true), 0.000205, 1e-7);
430+
UNIT_ASSERT_DOUBLES_EQUAL(UniqueCombinationsRun(13, 3, 3, true), 0.004262, 1e-7);
407431
UNIT_ASSERT_DOUBLES_EQUAL(UniqueCombinationsRun(113, 3, 1, true), 0.009091, 1e-7);
408432
UNIT_ASSERT_DOUBLES_EQUAL(UniqueCombinationsRun(113, 3, 5, true), 0.63673, 1e-7);
409433
UNIT_ASSERT_DOUBLES_EQUAL(UniqueCombinationsRun(113, 9, 1, true), 0.009237, 1e-7);
@@ -462,6 +486,72 @@ Y_UNIT_TEST_SUITE(TStateStorageConfig) {
462486
info1.RingGroups[0].State = ERingGroupState::DISCONNECTED;
463487
UNIT_ASSERT(info1.RingGroups[0].SameConfiguration(info2.RingGroups[0]));
464488
}
489+
490+
Y_UNIT_TEST(Tablet72075186224040026Test) {
491+
TStateStorageInfo info;
492+
FillStateStorageInfo(&info, 9, 5, 1, false);
493+
NKikimr::TStateStorageInfo::TSelection selection;
494+
info.SelectReplicas(72075186224040026UL, &selection, 0);
495+
ui32 expected[] = {0, 2, 1, 3, 4};
496+
for (ui32 i : xrange(5)) {
497+
UNIT_ASSERT_EQUAL(selection.SelectedReplicas[i].NodeId(), expected[i]);
498+
}
499+
}
500+
501+
Y_UNIT_TEST(NonDuplicatedNodesTest) {
502+
TStateStorageInfo info;
503+
NStateStorageOld::TStateStorageInfo oldInfo;
504+
FillStateStorageInfo(&info, 9, 5, 1, false);
505+
oldInfo.Rings.resize(9);
506+
oldInfo.NToSelect = 5;
507+
for (ui32 i : xrange(9)) {
508+
oldInfo.Rings[i].Replicas.push_back(TActorId(i, i, i, i));
509+
}
510+
ui32 good = 0;
511+
for (ui64 tabletId : xrange(Max<ui64>() - 1000000UL, Max<ui64>())) {
512+
NKikimr::TStateStorageInfo::TSelection selection;
513+
NStateStorageOld::TStateStorageInfo::TSelection oldSelection;
514+
info.SelectReplicas(tabletId, &selection, 0);
515+
oldInfo.SelectReplicas(tabletId, &oldSelection);
516+
std::unordered_set<TActorId> nodes;
517+
for (ui32 i : xrange(5)) {
518+
nodes.insert(oldSelection.SelectedReplicas[i]);
519+
}
520+
if (nodes.size() == 5) {
521+
good++;
522+
for (ui32 i : xrange(5)) {
523+
UNIT_ASSERT_EQUAL(oldSelection.SelectedReplicas[i], selection.SelectedReplicas[i]);
524+
}
525+
} else {
526+
ui32 same = 0;
527+
for (ui32 i : xrange(5)) {
528+
if (oldSelection.SelectedReplicas[i] == selection.SelectedReplicas[i]) {
529+
same++;
530+
}
531+
}
532+
UNIT_ASSERT_EQUAL(same, nodes.size());
533+
}
534+
}
535+
UNIT_ASSERT_EQUAL(good, 999941);
536+
}
537+
538+
Y_UNIT_TEST(DuplicatedNodesTest) {
539+
TStateStorageInfo info;
540+
FillStateStorageInfo(&info, 9, 5, 1, false);
541+
ui32 bad = 0;
542+
for (ui64 tabletId : xrange(Max<ui64>() - 1000000UL, Max<ui64>())) {
543+
NKikimr::TStateStorageInfo::TSelection selection;
544+
info.SelectReplicas(tabletId, &selection, 0);
545+
std::unordered_set<TActorId> nodes;
546+
for (ui32 i : xrange(5)) {
547+
nodes.insert(selection.SelectedReplicas[i]);
548+
}
549+
if (nodes.size() != 5) {
550+
bad++;
551+
}
552+
}
553+
UNIT_ASSERT_EQUAL(bad, 0);
554+
}
465555
}
466556

467557
}

0 commit comments

Comments
 (0)