Skip to content

Commit f562856

Browse files
committed
Merge bitcoin/bitcoin#27866: blockstorage: Return on fatal flush errors
d8041d4 blockstorage: Return on fatal undo file flush error (TheCharlatan) f0207e0 blockstorage: Return on fatal block file flush error (TheCharlatan) 5671c15 blockstorage: Mark FindBlockPos as nodiscard (TheCharlatan) Pull request description: The goal of this PR is to establish that fatal blockstorage flush errors should be treated as errors at their call site. Prior to this patch `FlushBlockFile` may have failed without returning in `Chainstate::FlushStateToDisk`, leading to a potential write from `WriteBlockIndexDB` that may refer to a block that is not fully flushed to disk yet. By returning if either `FlushUndoFile` or `FlushBlockFile` fail, we ensure that no further write operations take place that may lead to an inconsistent database when crashing. Add `[[nodiscard]]` annotations to them such that they are not ignored in future. Functions that call either `FlushUndoFile` or `FlushBlockFile`, need to handle these extra abort cases properly. Since `Chainstate::FlushStateToDisk` already produces an abort error in case of `WriteBlockIndexDB` failing, no extra logic for functions calling `Chainstate::FlushStateToDisk` is required. Besides `Chainstate::FlushStateToDisk`, `FlushBlockFile` is also called by `FindBlockPos`, while `FlushUndoFile` is only called by `FlushBlockFile` and `WriteUndoDataForBlock`. For both these cases, the flush error is not further bubbled up. Instead, the error is logged and a comment is provided why bubbling up an error would be less desirable in these cases. --- This pull request is part of a larger effort towards improving the shutdown / abort / fatal error handling in validation code. It is a first step towards implementing proper fatal error return type enforcement similar as proposed by theuni in this pull request [comment](bitcoin/bitcoin#27711 (comment)). For ease of review of these critical changes, a first step would be checking that `AbortNode` leads to early and error-conveying returns at its call site. Further work for enforcing returns when `AbortNode` is called is done in bitcoin/bitcoin#27862. ACKs for top commit: stickies-v: re-ACK d8041d4 ryanofsky: Code review ACK d8041d4 Tree-SHA512: 47ade9b873b15e567c8f60ca538d5a0daf32163e1031be3212a3a45eb492b866664b225f2787c9e40f3e0c089140157d8fd1039abc00c7bdfeec1b52ecd7e219
2 parents d18a8f6 + d8041d4 commit f562856

File tree

3 files changed

+47
-10
lines changed

3 files changed

+47
-10
lines changed

src/node/blockstorage.cpp

Lines changed: 34 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -651,34 +651,43 @@ bool BlockManager::UndoReadFromDisk(CBlockUndo& blockundo, const CBlockIndex& in
651651
return true;
652652
}
653653

654-
void BlockManager::FlushUndoFile(int block_file, bool finalize)
654+
bool BlockManager::FlushUndoFile(int block_file, bool finalize)
655655
{
656656
FlatFilePos undo_pos_old(block_file, m_blockfile_info[block_file].nUndoSize);
657657
if (!UndoFileSeq().Flush(undo_pos_old, finalize)) {
658658
m_opts.notifications.flushError("Flushing undo file to disk failed. This is likely the result of an I/O error.");
659+
return false;
659660
}
661+
return true;
660662
}
661663

662-
void BlockManager::FlushBlockFile(bool fFinalize, bool finalize_undo)
664+
bool BlockManager::FlushBlockFile(bool fFinalize, bool finalize_undo)
663665
{
666+
bool success = true;
664667
LOCK(cs_LastBlockFile);
665668

666669
if (m_blockfile_info.size() < 1) {
667670
// Return if we haven't loaded any blockfiles yet. This happens during
668671
// chainstate init, when we call ChainstateManager::MaybeRebalanceCaches() (which
669672
// then calls FlushStateToDisk()), resulting in a call to this function before we
670673
// have populated `m_blockfile_info` via LoadBlockIndexDB().
671-
return;
674+
return true;
672675
}
673676
assert(static_cast<int>(m_blockfile_info.size()) > m_last_blockfile);
674677

675678
FlatFilePos block_pos_old(m_last_blockfile, m_blockfile_info[m_last_blockfile].nSize);
676679
if (!BlockFileSeq().Flush(block_pos_old, fFinalize)) {
677680
m_opts.notifications.flushError("Flushing block file to disk failed. This is likely the result of an I/O error.");
681+
success = false;
678682
}
679683
// we do not always flush the undo file, as the chain tip may be lagging behind the incoming blocks,
680684
// e.g. during IBD or a sync after a node going offline
681-
if (!fFinalize || finalize_undo) FlushUndoFile(m_last_blockfile, finalize_undo);
685+
if (!fFinalize || finalize_undo) {
686+
if (!FlushUndoFile(m_last_blockfile, finalize_undo)) {
687+
success = false;
688+
}
689+
}
690+
return success;
682691
}
683692

684693
uint64_t BlockManager::CalculateCurrentUsage()
@@ -771,7 +780,19 @@ bool BlockManager::FindBlockPos(FlatFilePos& pos, unsigned int nAddSize, unsigne
771780
if (!fKnown) {
772781
LogPrint(BCLog::BLOCKSTORAGE, "Leaving block file %i: %s\n", m_last_blockfile, m_blockfile_info[m_last_blockfile].ToString());
773782
}
774-
FlushBlockFile(!fKnown, finalize_undo);
783+
784+
// Do not propagate the return code. The flush concerns a previous block
785+
// and undo file that has already been written to. If a flush fails
786+
// here, and we crash, there is no expected additional block data
787+
// inconsistency arising from the flush failure here. However, the undo
788+
// data may be inconsistent after a crash if the flush is called during
789+
// a reindex. A flush error might also leave some of the data files
790+
// untrimmed.
791+
if (!FlushBlockFile(!fKnown, finalize_undo)) {
792+
LogPrintLevel(BCLog::BLOCKSTORAGE, BCLog::Level::Warning,
793+
"Failed to flush previous block file %05i (finalize=%i, finalize_undo=%i) before opening new block file %05i\n",
794+
m_last_blockfile, !fKnown, finalize_undo, nFile);
795+
}
775796
m_last_blockfile = nFile;
776797
m_undo_height_in_last_blockfile = 0; // No undo data yet in the new file, so reset our undo-height tracking.
777798
}
@@ -862,7 +883,14 @@ bool BlockManager::WriteUndoDataForBlock(const CBlockUndo& blockundo, BlockValid
862883
// with the block writes (usually when a synced up node is getting newly mined blocks) -- this case is caught in
863884
// the FindBlockPos function
864885
if (_pos.nFile < m_last_blockfile && static_cast<uint32_t>(block.nHeight) == m_blockfile_info[_pos.nFile].nHeightLast) {
865-
FlushUndoFile(_pos.nFile, true);
886+
// Do not propagate the return code, a failed flush here should not
887+
// be an indication for a failed write. If it were propagated here,
888+
// the caller would assume the undo data not to be written, when in
889+
// fact it is. Note though, that a failed flush might leave the data
890+
// file untrimmed.
891+
if (!FlushUndoFile(_pos.nFile, true)) {
892+
LogPrintLevel(BCLog::BLOCKSTORAGE, BCLog::Level::Warning, "Failed to flush undo file %05i\n", _pos.nFile);
893+
}
866894
} else if (_pos.nFile == m_last_blockfile && static_cast<uint32_t>(block.nHeight) > m_undo_height_in_last_blockfile) {
867895
m_undo_height_in_last_blockfile = block.nHeight;
868896
}

src/node/blockstorage.h

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -119,9 +119,14 @@ class BlockManager
119119
*/
120120
bool LoadBlockIndex()
121121
EXCLUSIVE_LOCKS_REQUIRED(cs_main);
122-
void FlushBlockFile(bool fFinalize = false, bool finalize_undo = false);
123-
void FlushUndoFile(int block_file, bool finalize = false);
124-
bool FindBlockPos(FlatFilePos& pos, unsigned int nAddSize, unsigned int nHeight, uint64_t nTime, bool fKnown);
122+
123+
/** Return false if block file or undo file flushing fails. */
124+
[[nodiscard]] bool FlushBlockFile(bool fFinalize = false, bool finalize_undo = false);
125+
126+
/** Return false if undo file flushing fails. */
127+
[[nodiscard]] bool FlushUndoFile(int block_file, bool finalize = false);
128+
129+
[[nodiscard]] bool FindBlockPos(FlatFilePos& pos, unsigned int nAddSize, unsigned int nHeight, uint64_t nTime, bool fKnown);
125130
bool FindUndoPos(BlockValidationState& state, int nFile, FlatFilePos& pos, unsigned int nAddSize);
126131

127132
FlatFileSeq BlockFileSeq() const;

src/validation.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2594,7 +2594,11 @@ bool Chainstate::FlushStateToDisk(
25942594
LOG_TIME_MILLIS_WITH_CATEGORY("write block and undo data to disk", BCLog::BENCH);
25952595

25962596
// First make sure all block and undo data is flushed to disk.
2597-
m_blockman.FlushBlockFile();
2597+
// TODO: Handle return error, or add detailed comment why it is
2598+
// safe to not return an error upon failure.
2599+
if (!m_blockman.FlushBlockFile()) {
2600+
LogPrintLevel(BCLog::VALIDATION, BCLog::Level::Warning, "%s: Failed to flush block file.\n", __func__);
2601+
}
25982602
}
25992603

26002604
// Then update all block file information (which may refer to block and undo files).

0 commit comments

Comments
 (0)