Skip to content

Commit ab765c2

Browse files
committed
Merge #16577: util: CBufferedFile fixes and unit test
efd2474 util: CBufferedFile fixes (Larry Ruane) Pull request description: The `CBufferedFile` object guarantees its user is able to "rewind" the data stream (that's being read from a file) up to a certain number of bytes, as specified by the user in the constructor. This guarantee is not honored due to a bug in the `SetPos` method. Such rewinding is done in `LoadExternalBlockFile()` (currently the only user of this object), which deserializes a series of `CBlock` objects. If that function encounters something unexpected in the data stream, which is coming from a `blocks/blk00???.dat` file, it "rewinds" to an earlier position in the stream to try to get in sync again. The `CBufferedFile` object does not actually rewind its file offset; it simply repositions its internal offset, `nReadPos`, to an earlier position within the object's private buffer; this is why there's a limit to how far the user may rewind. If `LoadExternalBlockFile()` needs to rewind (call `blkdat.SetPos()`), the stream may not be positioned as it should be, causing errors in deserialization. This need to rewind is probably rare, which is likely why this bug hasn't been noticed already. But if this object is used elsewhere in the future, this could be a serious problem, especially as, due to the nature of the bug, the `SetPos()` _sometimes_ works. This PR adds a unit test for `CBufferedFile` that fails due to this bug. (Until now it has had no unit tests.) The unit test provides good documentation and examples for developers trying to understand `LoadExternalBlockFile()` and for future users of this object. This PR also adds code to throw an exception from the constructor if the rewind argument is not less than the buffer size (since that doesn't make any sense). Finally, I discovered that the object is too restrictive in one respect: When the deserialization methods call this object's `read` method, a check ensures that the number of bytes being requested is less than the size of the buffer (adjusting for the rewind size), else it throws an exception. This restriction is unnecessary; the object being deserialized can be larger than the buffer because multiple reads from disk can satisfy the request. ACKs for top commit: laanwj: ACK ~after squash.~ efd2474 mzumsande: I had intended to follow up earlier on my last comment, ACK efd2474. I reviewed the code, ran tests and did a successful reindex on testnet with this branch. Tree-SHA512: 695529e0af38bae2af4e0cc2895dda56a71b9059c3de04d32e09c0165a50f6aacee499f2042156ab5eaa6f0349bab6bcca4ef9f6f9ded4e60d4483beab7e4554
2 parents 003f2d2 + efd2474 commit ab765c2

File tree

2 files changed

+257
-11
lines changed

2 files changed

+257
-11
lines changed

src/streams.h

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -735,16 +735,17 @@ class CBufferedFile
735735
size_t nBytes = fread((void*)&vchBuf[pos], 1, readNow, src);
736736
if (nBytes == 0) {
737737
throw std::ios_base::failure(feof(src) ? "CBufferedFile::Fill: end of file" : "CBufferedFile::Fill: fread failed");
738-
} else {
739-
nSrcPos += nBytes;
740-
return true;
741738
}
739+
nSrcPos += nBytes;
740+
return true;
742741
}
743742

744743
public:
745744
CBufferedFile(FILE *fileIn, uint64_t nBufSize, uint64_t nRewindIn, int nTypeIn, int nVersionIn) :
746745
nType(nTypeIn), nVersion(nVersionIn), nSrcPos(0), nReadPos(0), nReadLimit(std::numeric_limits<uint64_t>::max()), nRewind(nRewindIn), vchBuf(nBufSize, 0)
747746
{
747+
if (nRewindIn >= nBufSize)
748+
throw std::ios_base::failure("Rewind limit must be less than buffer size");
748749
src = fileIn;
749750
}
750751

@@ -777,8 +778,6 @@ class CBufferedFile
777778
void read(char *pch, size_t nSize) {
778779
if (nSize + nReadPos > nReadLimit)
779780
throw std::ios_base::failure("Read attempted past buffer limit");
780-
if (nSize + nRewind > vchBuf.size())
781-
throw std::ios_base::failure("Read larger than buffer size");
782781
while (nSize > 0) {
783782
if (nReadPos == nSrcPos)
784783
Fill();
@@ -802,16 +801,19 @@ class CBufferedFile
802801

803802
//! rewind to a given reading position
804803
bool SetPos(uint64_t nPos) {
805-
nReadPos = nPos;
806-
if (nReadPos + nRewind < nSrcPos) {
807-
nReadPos = nSrcPos - nRewind;
804+
size_t bufsize = vchBuf.size();
805+
if (nPos + bufsize < nSrcPos) {
806+
// rewinding too far, rewind as far as possible
807+
nReadPos = nSrcPos - bufsize;
808808
return false;
809-
} else if (nReadPos > nSrcPos) {
809+
}
810+
if (nPos > nSrcPos) {
811+
// can't go this far forward, go as far as possible
810812
nReadPos = nSrcPos;
811813
return false;
812-
} else {
813-
return true;
814814
}
815+
nReadPos = nPos;
816+
return true;
815817
}
816818

817819
bool Seek(uint64_t nPos) {

src/test/streams_tests.cpp

Lines changed: 244 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// Distributed under the MIT software license, see the accompanying
33
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
44

5+
#include <random.h>
56
#include <streams.h>
67
#include <test/setup_common.h>
78

@@ -202,4 +203,247 @@ BOOST_AUTO_TEST_CASE(streams_serializedata_xor)
202203
std::string(ds.begin(), ds.end()));
203204
}
204205

206+
BOOST_AUTO_TEST_CASE(streams_buffered_file)
207+
{
208+
FILE* file = fsbridge::fopen("streams_test_tmp", "w+b");
209+
// The value at each offset is the offset.
210+
for (uint8_t j = 0; j < 40; ++j) {
211+
fwrite(&j, 1, 1, file);
212+
}
213+
rewind(file);
214+
215+
// The buffer size (second arg) must be greater than the rewind
216+
// amount (third arg).
217+
try {
218+
CBufferedFile bfbad(file, 25, 25, 222, 333);
219+
BOOST_CHECK(false);
220+
} catch (const std::exception& e) {
221+
BOOST_CHECK(strstr(e.what(),
222+
"Rewind limit must be less than buffer size") != nullptr);
223+
}
224+
225+
// The buffer is 25 bytes, allow rewinding 10 bytes.
226+
CBufferedFile bf(file, 25, 10, 222, 333);
227+
BOOST_CHECK(!bf.eof());
228+
229+
// These two members have no functional effect.
230+
BOOST_CHECK_EQUAL(bf.GetType(), 222);
231+
BOOST_CHECK_EQUAL(bf.GetVersion(), 333);
232+
233+
uint8_t i;
234+
bf >> i;
235+
BOOST_CHECK_EQUAL(i, 0);
236+
bf >> i;
237+
BOOST_CHECK_EQUAL(i, 1);
238+
239+
// After reading bytes 0 and 1, we're positioned at 2.
240+
BOOST_CHECK_EQUAL(bf.GetPos(), 2);
241+
242+
// Rewind to offset 0, ok (within the 10 byte window).
243+
BOOST_CHECK(bf.SetPos(0));
244+
bf >> i;
245+
BOOST_CHECK_EQUAL(i, 0);
246+
247+
// We can go forward to where we've been, but beyond may fail.
248+
BOOST_CHECK(bf.SetPos(2));
249+
bf >> i;
250+
BOOST_CHECK_EQUAL(i, 2);
251+
252+
// If you know the maximum number of bytes that should be
253+
// read to deserialize the variable, you can limit the read
254+
// extent. The current file offset is 3, so the following
255+
// SetLimit() allows zero bytes to be read.
256+
BOOST_CHECK(bf.SetLimit(3));
257+
try {
258+
bf >> i;
259+
BOOST_CHECK(false);
260+
} catch (const std::exception& e) {
261+
BOOST_CHECK(strstr(e.what(),
262+
"Read attempted past buffer limit") != nullptr);
263+
}
264+
// The default argument removes the limit completely.
265+
BOOST_CHECK(bf.SetLimit());
266+
// The read position should still be at 3 (no change).
267+
BOOST_CHECK_EQUAL(bf.GetPos(), 3);
268+
269+
// Read from current offset, 3, forward until position 10.
270+
for (uint8_t j = 3; j < 10; ++j) {
271+
bf >> i;
272+
BOOST_CHECK_EQUAL(i, j);
273+
}
274+
BOOST_CHECK_EQUAL(bf.GetPos(), 10);
275+
276+
// We're guaranteed (just barely) to be able to rewind to zero.
277+
BOOST_CHECK(bf.SetPos(0));
278+
BOOST_CHECK_EQUAL(bf.GetPos(), 0);
279+
bf >> i;
280+
BOOST_CHECK_EQUAL(i, 0);
281+
282+
// We can set the position forward again up to the farthest
283+
// into the stream we've been, but no farther. (Attempting
284+
// to go farther may succeed, but it's not guaranteed.)
285+
BOOST_CHECK(bf.SetPos(10));
286+
bf >> i;
287+
BOOST_CHECK_EQUAL(i, 10);
288+
BOOST_CHECK_EQUAL(bf.GetPos(), 11);
289+
290+
// Now it's only guaranteed that we can rewind to offset 1
291+
// (current read position, 11, minus rewind amount, 10).
292+
BOOST_CHECK(bf.SetPos(1));
293+
BOOST_CHECK_EQUAL(bf.GetPos(), 1);
294+
bf >> i;
295+
BOOST_CHECK_EQUAL(i, 1);
296+
297+
// We can stream into large variables, even larger than
298+
// the buffer size.
299+
BOOST_CHECK(bf.SetPos(11));
300+
{
301+
uint8_t a[40 - 11];
302+
bf >> a;
303+
for (uint8_t j = 0; j < sizeof(a); ++j) {
304+
BOOST_CHECK_EQUAL(a[j], 11 + j);
305+
}
306+
}
307+
BOOST_CHECK_EQUAL(bf.GetPos(), 40);
308+
309+
// We've read the entire file, the next read should throw.
310+
try {
311+
bf >> i;
312+
BOOST_CHECK(false);
313+
} catch (const std::exception& e) {
314+
BOOST_CHECK(strstr(e.what(),
315+
"CBufferedFile::Fill: end of file") != nullptr);
316+
}
317+
// Attempting to read beyond the end sets the EOF indicator.
318+
BOOST_CHECK(bf.eof());
319+
320+
// Still at offset 40, we can go back 10, to 30.
321+
BOOST_CHECK_EQUAL(bf.GetPos(), 40);
322+
BOOST_CHECK(bf.SetPos(30));
323+
bf >> i;
324+
BOOST_CHECK_EQUAL(i, 30);
325+
BOOST_CHECK_EQUAL(bf.GetPos(), 31);
326+
327+
// We're too far to rewind to position zero.
328+
BOOST_CHECK(!bf.SetPos(0));
329+
// But we should now be positioned at least as far back as allowed
330+
// by the rewind window (relative to our farthest read position, 40).
331+
BOOST_CHECK(bf.GetPos() <= 30);
332+
333+
// We can explicitly close the file, or the destructor will do it.
334+
bf.fclose();
335+
336+
fs::remove("streams_test_tmp");
337+
}
338+
339+
BOOST_AUTO_TEST_CASE(streams_buffered_file_rand)
340+
{
341+
// Make this test deterministic.
342+
SeedInsecureRand(true);
343+
344+
for (int rep = 0; rep < 50; ++rep) {
345+
FILE* file = fsbridge::fopen("streams_test_tmp", "w+b");
346+
size_t fileSize = InsecureRandRange(256);
347+
for (uint8_t i = 0; i < fileSize; ++i) {
348+
fwrite(&i, 1, 1, file);
349+
}
350+
rewind(file);
351+
352+
size_t bufSize = InsecureRandRange(300) + 1;
353+
size_t rewindSize = InsecureRandRange(bufSize);
354+
CBufferedFile bf(file, bufSize, rewindSize, 222, 333);
355+
size_t currentPos = 0;
356+
size_t maxPos = 0;
357+
for (int step = 0; step < 100; ++step) {
358+
if (currentPos >= fileSize)
359+
break;
360+
361+
// We haven't read to the end of the file yet.
362+
BOOST_CHECK(!bf.eof());
363+
BOOST_CHECK_EQUAL(bf.GetPos(), currentPos);
364+
365+
// Pretend the file consists of a series of objects of varying
366+
// sizes; the boundaries of the objects can interact arbitrarily
367+
// with the CBufferFile's internal buffer. These first three
368+
// cases simulate objects of various sizes (1, 2, 5 bytes).
369+
switch (InsecureRandRange(5)) {
370+
case 0: {
371+
uint8_t a[1];
372+
if (currentPos + 1 > fileSize)
373+
continue;
374+
bf.SetLimit(currentPos + 1);
375+
bf >> a;
376+
for (uint8_t i = 0; i < 1; ++i) {
377+
BOOST_CHECK_EQUAL(a[i], currentPos);
378+
currentPos++;
379+
}
380+
break;
381+
}
382+
case 1: {
383+
uint8_t a[2];
384+
if (currentPos + 2 > fileSize)
385+
continue;
386+
bf.SetLimit(currentPos + 2);
387+
bf >> a;
388+
for (uint8_t i = 0; i < 2; ++i) {
389+
BOOST_CHECK_EQUAL(a[i], currentPos);
390+
currentPos++;
391+
}
392+
break;
393+
}
394+
case 2: {
395+
uint8_t a[5];
396+
if (currentPos + 5 > fileSize)
397+
continue;
398+
bf.SetLimit(currentPos + 5);
399+
bf >> a;
400+
for (uint8_t i = 0; i < 5; ++i) {
401+
BOOST_CHECK_EQUAL(a[i], currentPos);
402+
currentPos++;
403+
}
404+
break;
405+
}
406+
case 3: {
407+
// Find a byte value (that is at or ahead of the current position).
408+
size_t find = currentPos + InsecureRandRange(8);
409+
if (find >= fileSize)
410+
find = fileSize - 1;
411+
bf.FindByte(static_cast<char>(find));
412+
// The value at each offset is the offset.
413+
BOOST_CHECK_EQUAL(bf.GetPos(), find);
414+
currentPos = find;
415+
416+
bf.SetLimit(currentPos + 1);
417+
uint8_t i;
418+
bf >> i;
419+
BOOST_CHECK_EQUAL(i, currentPos);
420+
currentPos++;
421+
break;
422+
}
423+
case 4: {
424+
size_t requestPos = InsecureRandRange(maxPos + 4);
425+
bool okay = bf.SetPos(requestPos);
426+
// The new position may differ from the requested position
427+
// because we may not be able to rewind beyond the rewind
428+
// window, and we may not be able to move forward beyond the
429+
// farthest position we've reached so far.
430+
currentPos = bf.GetPos();
431+
BOOST_CHECK_EQUAL(okay, currentPos == requestPos);
432+
// Check that we can position within the rewind window.
433+
if (requestPos <= maxPos &&
434+
maxPos > rewindSize &&
435+
requestPos >= maxPos - rewindSize) {
436+
// We requested a position within the rewind window.
437+
BOOST_CHECK(okay);
438+
}
439+
break;
440+
}
441+
}
442+
if (maxPos < currentPos)
443+
maxPos = currentPos;
444+
}
445+
}
446+
fs::remove("streams_test_tmp");
447+
}
448+
205449
BOOST_AUTO_TEST_SUITE_END()

0 commit comments

Comments
 (0)