Skip to content

Commit 5ff3b23

Browse files
authored
Remove epoch calculation code (#288)
Follow up for: #277 We've been running with epoch number replication for weeks. Time to delete the unnecessary hacky code.
1 parent d95ed3b commit 5ff3b23

File tree

7 files changed

+12
-219
lines changed

7 files changed

+12
-219
lines changed

cloud/replication_test.cc

Lines changed: 1 addition & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -455,11 +455,6 @@ size_t ReplicationTest::catchUpFollower(
455455
DB::ApplyReplicationLogRecordInfo info;
456456
size_t ret = 0;
457457
unsigned flags = DB::AR_EVICT_OBSOLETE_FILES;
458-
flags |= DB::AR_RESET_IF_EPOCH_MISMATCH;
459-
if (replicate_epoch_number_) {
460-
flags |= DB::AR_REPLICATE_EPOCH_NUM;
461-
flags |= DB::AR_CONSISTENCY_CHECK_ON_EPOCH_REPLICATION;
462-
}
463458
for (; followerSequence_ < (int)log_records_.size(); ++followerSequence_) {
464459
if (num_records && ret >= *num_records) {
465460
break;
@@ -1139,19 +1134,7 @@ TEST_F(ReplicationTest, EvictObsoleteFiles) {
11391134
static_cast_with_check<DBImpl>(follower)->TEST_table_cache()->GetUsage());
11401135
}
11411136

1142-
class ReplicationTestWithParam : public ReplicationTest,
1143-
public testing::WithParamInterface<std::pair<bool, bool>> {
1144-
public:
1145-
ReplicationTestWithParam()
1146-
: ReplicationTest() {}
1147-
1148-
void SetUp() override {
1149-
std::tie(replicate_epoch_number_, consistency_check_on_epoch_replication) =
1150-
GetParam();
1151-
}
1152-
};
1153-
1154-
TEST_P(ReplicationTestWithParam, Stress) {
1137+
TEST_F(ReplicationTest, Stress) {
11551138
std::string val;
11561139
auto leader = openLeader();
11571140
openFollower();
@@ -1232,15 +1215,6 @@ TEST_P(ReplicationTestWithParam, Stress) {
12321215
verifyNextEpochNumber();
12331216
}
12341217

1235-
INSTANTIATE_TEST_CASE_P(ReplicationTest, ReplicationTestWithParam,
1236-
::testing::ValuesIn(std::vector<std::pair<bool, bool>>{
1237-
// don't replicate epoch
1238-
{false, true},
1239-
// replicate epoch but no consistency check
1240-
{true, false},
1241-
// replicate epoch and do consistency check
1242-
{true, true}}));
1243-
12441218
TEST_F(ReplicationTest, DeleteRange) {
12451219
auto leader = openLeader();
12461220
openFollower();

db/db_impl/db_impl.cc

Lines changed: 10 additions & 152 deletions
Original file line numberDiff line numberDiff line change
@@ -1430,48 +1430,18 @@ Status DBImpl::ApplyReplicationLogRecord(ReplicationLogRecord record,
14301430
if (!s.ok()) {
14311431
break;
14321432
}
1433-
if (flags & AR_REPLICATE_EPOCH_NUM) {
1434-
// replicate epoch number on follower
14351433

1436-
s = CheckNextEpochNumberConsistency(e, cfd);
1437-
if (!s.ok()) {
1438-
break;
1439-
}
1440-
1441-
auto& newFiles = e.GetNewFiles();
1442-
auto& deletedFiles = e.GetDeletedFiles();
1443-
1444-
if (flags & AR_CONSISTENCY_CHECK_ON_EPOCH_REPLICATION) {
1445-
if (deletedFiles.empty() && !newFiles.empty()) {
1446-
// Set next epoch number properly before epoch number consistency check.
1447-
// This is necessary if next_epoch_number changes during db reopen.
1448-
cfd->SetNextEpochNumber(newFiles.begin()->second.epoch_number);
1449-
}
1434+
s = CheckNextEpochNumberConsistency(e, cfd);
1435+
if (!s.ok()) {
1436+
break;
1437+
}
14501438

1451-
// do consistency check by comparing the replicated epoch number
1452-
// against inferred epoch number No need to
1453-
// `reset_next_epoch_number` here since we have already done it
1454-
s = InferEpochNumber(&e, cfd, info,
1455-
false /* reset_next_epoch_number */);
1456-
if (s.ok() && info->mismatched_epoch_num > 0) {
1457-
s = Status::Poison("epoch number consistency check fails");
1458-
}
1459-
if (!s.ok()) {
1460-
break;
1461-
}
1462-
}
1439+
auto& newFiles = e.GetNewFiles();
1440+
auto& deletedFiles = e.GetDeletedFiles();
14631441

1464-
// Maintain next epoch number on follower
1465-
if (deletedFiles.empty() && !newFiles.empty()) {
1466-
cfd->SetNextEpochNumber(newFiles.rbegin()->second.epoch_number + 1);
1467-
}
1468-
} else {
1469-
// infer epoch number on follower
1470-
s = InferEpochNumber(&e, cfd, info,
1471-
flags & AR_RESET_IF_EPOCH_MISMATCH);
1472-
if (!s.ok()) {
1473-
break;
1474-
}
1442+
// Maintain next epoch number on follower
1443+
if (deletedFiles.empty() && !newFiles.empty()) {
1444+
cfd->SetNextEpochNumber(newFiles.rbegin()->second.epoch_number + 1);
14751445
}
14761446
}
14771447
if (!s.ok()) {
@@ -1548,119 +1518,7 @@ Status DBImpl::ApplyReplicationLogRecord(ReplicationLogRecord record,
15481518
return s;
15491519
}
15501520

1551-
Status DBImpl::InferEpochNumber(VersionEdit* e, ColumnFamilyData* cfd,
1552-
ApplyReplicationLogRecordInfo* info,
1553-
bool reset_next_epoch_number) {
1554-
auto& newFiles = e->GetNewFiles();
1555-
// Epoch number calculation on the fly.
1556-
// There are two cases in which we need to calculate epoch number
1557-
// when applying `kManifestWrite`
1558-
// 1. flush which generates L0 files. epoch number is allocated
1559-
// based on `next_epoch_number` of each CF. The L0 files are sorted
1560-
// based on `largest seqno`.
1561-
// 2. compaction which merges files in lower levels to higher
1562-
// levels. epoch number = min epoch number of input files.
1563-
const auto& deletedFiles = e->GetDeletedFiles();
1564-
if (deletedFiles.empty() && !newFiles.empty()) {
1565-
// case 1: flush into L0 files. New files must be level 0
1566-
1567-
for (auto& p : newFiles) {
1568-
if (p.first != 0) {
1569-
ROCKS_LOG_ERROR(
1570-
immutable_db_options_.info_log,
1571-
"[%s] newly flushed file: %" PRIu64 " < is not at L0 but Level: %d",
1572-
cfd->GetName().c_str(), p.second.fd.GetNumber(), p.first);
1573-
return Status::Corruption("Newly flushed file is not at L0");
1574-
}
1575-
}
1576-
1577-
// sort added files by largest seqno
1578-
std::vector<FileMetaData*> added_files;
1579-
for (auto& p : newFiles) {
1580-
added_files.push_back(&p.second);
1581-
}
1582-
1583-
NewestFirstBySeqNo cmp;
1584-
std::sort(added_files.begin(), added_files.end(), cmp);
1585-
auto first_file = added_files[0];
1586-
// Rewind/advance next_epoch_number. This is necessary if next_epoch_number
1587-
// mismtaches due to db reopen.
1588-
if (first_file->epoch_number != kUnknownEpochNumber &&
1589-
first_file->epoch_number != cfd->GetNextEpochNumber() &&
1590-
reset_next_epoch_number) {
1591-
auto max_epoch_number =
1592-
cfd->current()->storage_info()->GetMaxEpochNumberOfFiles();
1593-
if (first_file->epoch_number < cfd->GetNextEpochNumber() &&
1594-
(first_file->epoch_number == max_epoch_number + 1)) {
1595-
ROCKS_LOG_INFO(immutable_db_options_.info_log,
1596-
"[%s] rewind next_epoch_number from: %" PRIu64
1597-
" to %" PRIu64,
1598-
cfd->GetName().c_str(), cfd->GetNextEpochNumber(),
1599-
max_epoch_number + 1);
1600-
cfd->SetNextEpochNumber(max_epoch_number + 1);
1601-
} else if (first_file->epoch_number > cfd->GetNextEpochNumber() &&
1602-
(cfd->GetNextEpochNumber() == max_epoch_number + 1)) {
1603-
ROCKS_LOG_INFO(immutable_db_options_.info_log,
1604-
"[%s] advance next_epoch_number from: %" PRIu64
1605-
" to %" PRIu64,
1606-
cfd->GetName().c_str(), cfd->GetNextEpochNumber(),
1607-
first_file->epoch_number);
1608-
cfd->SetNextEpochNumber(first_file->epoch_number);
1609-
} else {
1610-
// Not safe to rewind/advance `next_epoch_number`. This can happen
1611-
// when we do epoch recovery during db open (i.e., nodes run
1612-
// with different rocksdb versions and nodes upgrading from old version
1613-
// to new version need to recover epoch). Poison is the best we can do
1614-
return Status::Poison("Poison due to diverged next epoch number");
1615-
}
1616-
}
1617-
1618-
for (auto meta : added_files) {
1619-
auto replicated_epoch_number = meta->epoch_number;
1620-
auto inferred_epoch_number = cfd->NewEpochNumber();
1621-
if (replicated_epoch_number != inferred_epoch_number) {
1622-
ROCKS_LOG_INFO(immutable_db_options_.info_log,
1623-
"[%s] mismatched epoch for file: %" PRIu64
1624-
"; incoming: %" PRIu64 ", calculated: %" PRIu64,
1625-
cfd->GetName().c_str(), meta->fd.GetNumber(),
1626-
replicated_epoch_number, inferred_epoch_number);
1627-
info->mismatched_epoch_num += 1;
1628-
meta->epoch_number = inferred_epoch_number;
1629-
}
1630-
}
1631-
} else if (!deletedFiles.empty() && !newFiles.empty()) {
1632-
// case 2: compaction
1633-
uint64_t min_input_epoch_number = std::numeric_limits<uint64_t>::max();
1634-
const auto& storage_info = cfd->current()->storage_info();
1635-
for (auto [level, file_number] : deletedFiles) {
1636-
auto meta = storage_info->GetFileMetaDataByNumber(file_number);
1637-
if (!meta) {
1638-
ROCKS_LOG_ERROR(immutable_db_options_.info_log,
1639-
"[%s] deleted file: %" PRIu64 " at level: %d not found",
1640-
cfd->GetName().c_str(), file_number, level);
1641-
return Status::Corruption("Deleted file not found");
1642-
}
1643-
min_input_epoch_number =
1644-
std::min(meta->epoch_number, min_input_epoch_number);
1645-
}
1646-
1647-
for (auto& p : newFiles) {
1648-
auto replicated_epoch_number = p.second.epoch_number;
1649-
if (replicated_epoch_number != min_input_epoch_number) {
1650-
ROCKS_LOG_INFO(immutable_db_options_.info_log,
1651-
"[%s] mismatched epoch for file: %" PRIu64
1652-
"; incoming: %" PRIu64 ", calculated: %" PRIu64,
1653-
cfd->GetName().c_str(), p.second.fd.GetNumber(),
1654-
replicated_epoch_number, min_input_epoch_number);
1655-
info->mismatched_epoch_num += 1;
1656-
p.second.epoch_number = min_input_epoch_number;
1657-
}
1658-
}
1659-
}
1660-
return Status::OK();
1661-
}
1662-
1663-
Status DBImpl::CheckNextEpochNumberConsistency(VersionEdit& e, ColumnFamilyData* cfd) {
1521+
Status DBImpl::CheckNextEpochNumberConsistency(const VersionEdit& e, ColumnFamilyData* cfd) {
16641522
auto& newFiles = e.GetNewFiles();
16651523
auto& deletedFiles = e.GetDeletedFiles();
16661524

db/db_impl/db_impl.h

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -412,15 +412,9 @@ class DBImpl : public DB {
412412
ApplyReplicationLogRecordInfo* info,
413413
unsigned flags) override;
414414

415-
// Calculate epoch number on follower
416-
Status InferEpochNumber(VersionEdit* e, ColumnFamilyData* cfd,
417-
ApplyReplicationLogRecordInfo* info,
418-
bool reset_next_epoch_number);
419-
420415
// Check that replicated epoch number of newly flushed files >= cfd's next
421416
// epoch number.
422-
// TODO: make `VersionEdit` const
423-
Status CheckNextEpochNumberConsistency(VersionEdit& e, ColumnFamilyData* cfd);
417+
Status CheckNextEpochNumberConsistency(const VersionEdit& e, ColumnFamilyData* cfd);
424418
Status GetReplicationRecordDebugString(
425419
const ReplicationLogRecord& record, std::string* out) const override;
426420
Status GetPersistedReplicationSequence(std::string* out) override;

db/version_edit.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -504,7 +504,6 @@ class VersionEdit {
504504
// Retrieve the table files added as well as their associated levels.
505505
using NewFiles = std::vector<std::pair<int, FileMetaData>>;
506506
const NewFiles& GetNewFiles() const { return new_files_; }
507-
NewFiles& GetNewFiles() { return new_files_; }
508507

509508
// Retrieve all the compact cursors
510509
using CompactCursors = std::vector<std::pair<int, InternalKey>>;

include/rocksdb/db.h

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1305,9 +1305,6 @@ class DB {
13051305
// families that were deleted as a result of ApplyReplicationLogRecord()
13061306
// call, if any.
13071307
std::vector<uint32_t> deleted_column_families;
1308-
1309-
// record number of mismatched epoch number found
1310-
uint64_t mismatched_epoch_num{0};
13111308
};
13121309
// ApplyReplicationLogRecord() applies the replication record provided by the
13131310
// leader's ReplicationLogListener. Info contains some useful information
@@ -1326,22 +1323,6 @@ class DB {
13261323
// REQUIRES: info needs to be provided, can't be nullptr.
13271324
enum ApplyReplicationLogRecordFlags : unsigned {
13281325
AR_EVICT_OBSOLETE_FILES = 1U << 0,
1329-
// If set, replicate epoch number instead of calculating the number when
1330-
// applying `kManifestWrite`
1331-
AR_REPLICATE_EPOCH_NUM = 1U << 1,
1332-
// If set, rewind/advance `next_epoch_number` if mismatches found.
1333-
// Rocksdb doesn't track `next_epoch_number` in manifest file. When db is
1334-
// reopened, it calculates the `next_epoch_number` based on max epoch number
1335-
// of existing live files. So it's possible for the `next_epoch_number` to
1336-
// go backwards. Following two cases are possible:
1337-
// 1. leader reopens db, causing `next_epoch_number` on leader to go
1338-
// backwards. So follower needs to rewind it.
1339-
// 2. follower reopens db, causing `next_epoch_number` on follower to go
1340-
// backwards. So follower needs to advance it
1341-
AR_RESET_IF_EPOCH_MISMATCH = 1U << 2,
1342-
1343-
// Check the consistency of epoch number during replication
1344-
AR_CONSISTENCY_CHECK_ON_EPOCH_REPLICATION = 1U << 3
13451326
};
13461327
using CFOptionsFactory = std::function<ColumnFamilyOptions(Slice)>;
13471328
virtual Status ApplyReplicationLogRecord(ReplicationLogRecord record,

include/rocksdb/status.h

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -89,8 +89,6 @@ class Status {
8989
kTryAgain = 13,
9090
kCompactionTooLarge = 14,
9191
kColumnFamilyDropped = 15,
92-
// Temporary status code for rocksdb upgrade
93-
kPoison = 100,
9492
kMaxCode
9593
};
9694

@@ -179,9 +177,6 @@ class Status {
179177
static Status Corruption(SubCode msg = kNone) {
180178
return Status(kCorruption, msg);
181179
}
182-
static Status Poison(const Slice& msg, const Slice& msg2 = Slice()) {
183-
return Status(kPoison, msg, msg2);
184-
}
185180

186181
static Status NotSupported(const Slice& msg, const Slice& msg2 = Slice()) {
187182
return Status(kNotSupported, msg, msg2);
@@ -317,11 +312,6 @@ class Status {
317312
return code() == kCorruption;
318313
}
319314

320-
bool IsPoison() const {
321-
MarkChecked();
322-
return code() == kPoison;
323-
}
324-
325315
// Returns true iff the status indicates a NotSupported error.
326316
bool IsNotSupported() const {
327317
MarkChecked();

util/status.cc

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -132,9 +132,6 @@ std::string Status::ToString() const {
132132
case kMaxCode:
133133
assert(false);
134134
break;
135-
case kPoison:
136-
type = "Poison: ";
137-
break;
138135
}
139136
char tmp[30];
140137
if (type == nullptr) {

0 commit comments

Comments
 (0)