Skip to content

Commit 1ba4042

Browse files
authored
Fix the data loss issue that caused by the wrong entry log header (#4607)
* Fix the data loss issue that caused by the wrong entry log header --- # Motivation We observed numerous errors in the broker that failed to read the ledger from the bookkeeper; although the ledger metadata still exists, it was unable to read from the bookkeeper. After checking the data, we found the ledger located entry log was deleted by the bookkeeper. We have a data loss issue with the bookkeeper. The entry log file was deleted by the Garbage collector because the entry log file wrote a wrong file header. And there is an example that the shows the header is wrong: ``` Failed to get ledgers map index from: 82.log : Not all ledgers were found in ledgers map index. expected: -1932430239 -- found: 0 -- entryLogId: 82 ``` * Add test
1 parent 37b63c9 commit 1ba4042

File tree

4 files changed

+47
-1
lines changed

4 files changed

+47
-1
lines changed

bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/DefaultEntryLogger.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1151,6 +1151,10 @@ EntryLogMetadata extractEntryLogMetadataFromIndex(long entryLogId) throws IOExce
11511151
+ " -- found: " + meta.getLedgersMap().size() + " -- entryLogId: " + entryLogId);
11521152
}
11531153

1154+
if (header.ledgersCount == 0) {
1155+
throw new IOException("No ledgers map found in entryLogId " + entryLogId + ", do scan to double confirm");
1156+
}
1157+
11541158
return meta;
11551159
}
11561160

bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/EntryLoggerAllocator.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ class EntryLoggerAllocator {
8585
// within the same JVM. All of these Bookie instances access this header
8686
// so there can be race conditions when entry logs are rolled over and
8787
// this header buffer is cleared before writing it into the new logChannel.
88+
logfileHeader.setZero(0, DefaultEntryLogger.LOGFILE_HEADER_SIZE);
8889
logfileHeader.writeBytes("BKLO".getBytes(UTF_8));
8990
logfileHeader.writeInt(DefaultEntryLogger.HEADER_CURRENT_VERSION);
9091
logfileHeader.writerIndex(DefaultEntryLogger.LOGFILE_HEADER_SIZE);

bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/GarbageCollectorThread.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -842,11 +842,12 @@ protected void extractMetaFromEntryLogs() throws EntryLogMetadataMapException {
842842
continue;
843843
}
844844

845-
LOG.info("Extracting entry log meta from entryLogId: {}", entryLogId);
846845

847846
try {
848847
// Read through the entry log file and extract the entry log meta
849848
EntryLogMetadata entryLogMeta = entryLogger.getEntryLogMetadata(entryLogId, throttler);
849+
LOG.info("Extracted entry log meta from entryLogId: {}, ledgers {}",
850+
entryLogId, entryLogMeta.getLedgersMap().keys());
850851
removeIfLedgerNotExists(entryLogMeta);
851852
if (entryLogMeta.isEmpty()) {
852853
// This means the entry log is not associated with any active

bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/DefaultEntryLogTest.java

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -391,6 +391,46 @@ public void testRecoverFromLedgersMap() throws Exception {
391391
assertEquals(120, meta.getRemainingSize());
392392
}
393393

394+
@Test
395+
public void testLedgersMapIsEmpty() throws Exception {
396+
// create some entries
397+
entryLogger.addEntry(1L, generateEntry(1, 1).nioBuffer());
398+
entryLogger.addEntry(3L, generateEntry(3, 1).nioBuffer());
399+
entryLogger.addEntry(2L, generateEntry(2, 1).nioBuffer());
400+
entryLogger.addEntry(1L, generateEntry(1, 2).nioBuffer());
401+
((EntryLogManagerBase) entryLogger.getEntryLogManager()).createNewLog(DefaultEntryLogger.UNASSIGNED_LEDGERID);
402+
entryLogger.close();
403+
404+
// Rewrite the entry log header to be on V0 format
405+
File f = new File(curDir, "0.log");
406+
RandomAccessFile raf = new RandomAccessFile(f, "rw");
407+
raf.seek(8);
408+
// Mock that there is a ledgers map offset but the ledgers count is 0
409+
raf.writeLong(40);
410+
raf.writeInt(0);
411+
raf.close();
412+
413+
// now see which ledgers are in the log
414+
entryLogger = new DefaultEntryLogger(conf, dirsMgr);
415+
416+
try {
417+
entryLogger.extractEntryLogMetadataFromIndex(0L);
418+
fail("Should not be possible to recover from ledgers map index");
419+
} catch (IOException e) {
420+
assertEquals("No ledgers map found in entryLogId 0, do scan to double confirm", e.getMessage());
421+
}
422+
423+
// Public method should succeed by falling back to scanning the file
424+
EntryLogMetadata meta = entryLogger.getEntryLogMetadata(0L);
425+
LOG.info("Extracted Meta From Entry Log {}", meta);
426+
assertEquals(60, meta.getLedgersMap().get(1L));
427+
assertEquals(30, meta.getLedgersMap().get(2L));
428+
assertEquals(30, meta.getLedgersMap().get(3L));
429+
assertFalse(meta.getLedgersMap().containsKey(4L));
430+
assertEquals(120, meta.getTotalSize());
431+
assertEquals(120, meta.getRemainingSize());
432+
}
433+
394434
/**
395435
* Explicitly try to recover using the ledgers map index at the end of the entry log.
396436
*/

0 commit comments

Comments
 (0)