Skip to content

Commit efc4e2a

Browse files
authored
Merge pull request #1047 from openzim/compression_infinite_loop
Eliminated the possibility for zim::Compressor being trapped in an infinite loop
2 parents f8cc2cb + dae246c commit efc4e2a

File tree

2 files changed

+42
-20
lines changed

2 files changed

+42
-20
lines changed

src/compression.h

Lines changed: 15 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -227,10 +227,12 @@ template<typename INFO>
227227
class Compressor
228228
{
229229
public:
230-
Compressor(size_t initial_size=1024*1024) :
230+
explicit Compressor(size_t initial_size=1024*1024) :
231231
ret_data(new char[initial_size]),
232232
ret_size(initial_size)
233-
{}
233+
{
234+
ASSERT(initial_size, >, 0u);
235+
}
234236

235237
~Compressor() = default;
236238

@@ -244,32 +246,25 @@ class Compressor
244246
stream.next_in = (unsigned char*)data;
245247
stream.avail_in = size;
246248
while (true) {
249+
if (stream.avail_out == 0) {
250+
ret_size *= 2;
251+
std::unique_ptr<char[]> new_ret_data(new char[ret_size]);
252+
memcpy(new_ret_data.get(), ret_data.get(), stream.total_out);
253+
stream.next_out = (unsigned char*)(new_ret_data.get() + stream.total_out);
254+
stream.avail_out = ret_size - stream.total_out;
255+
ret_data = std::move(new_ret_data);
256+
}
247257
auto errcode = INFO::stream_run_encode(&stream, step);
248258
switch (errcode) {
249259
case CompStatus::OK:
250-
if (stream.avail_out == 0) {
251-
// lzma return a OK return status the first time it runs out of output memory.
252-
// The BUF_ERROR is returned only the second time we call a lzma_code.
253-
continue;
254-
} else {
255-
return RunnerStatus::NEED_MORE;
256-
}
260+
return RunnerStatus::NEED_MORE;
257261
case CompStatus::STREAM_END:
258262
return RunnerStatus::NEED_MORE;
259263
case CompStatus::BUF_ERROR:
260-
if (stream.avail_out == 0) {
261-
//Not enought output size
262-
ret_size *= 2;
263-
std::unique_ptr<char[]> new_ret_data(new char[ret_size]);
264-
memcpy(new_ret_data.get(), ret_data.get(), stream.total_out);
265-
stream.next_out = (unsigned char*)(new_ret_data.get() + stream.total_out);
266-
stream.avail_out = ret_size - stream.total_out;
267-
ret_data = std::move(new_ret_data);
268-
continue;
269-
} else {
264+
if (stream.avail_out != 0) {
270265
return RunnerStatus::ERROR;
271266
}
272-
break;
267+
break;
273268
default:
274269
// unreachable
275270
return RunnerStatus::ERROR;

test/compression.cpp

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,5 +99,32 @@ TYPED_TEST(CompressionTest, compress) {
9999
}
100100
}
101101

102+
TEST(CompressionTest, zstdCompressionOutputExactlyFillsTheBuffer) {
103+
const size_t DATA_SIZE = 10000000;
104+
std::vector<char> data(DATA_SIZE, '\0');
105+
for (size_t i = 0; i < DATA_SIZE; ++i ) {
106+
data[i] = 1 + (char)(i%255);
107+
}
108+
std::vector<unsigned char> outputDataBuffer(DATA_SIZE, '\0');
109+
110+
ZSTD_INFO::stream_t zstdStream;
111+
ZSTD_INFO::init_stream_encoder(&zstdStream, &data[0]);
112+
zstdStream.next_in = (const unsigned char*)&data[0];
113+
zstdStream.avail_in = DATA_SIZE;
114+
zstdStream.next_out = &outputDataBuffer[0];
115+
zstdStream.avail_out = DATA_SIZE;
116+
ZSTD_INFO::stream_run_encode(&zstdStream, CompStep::STEP);
117+
const size_t sizeOfIntermediateOutput = DATA_SIZE - zstdStream.avail_out;
118+
ASSERT_GT(sizeOfIntermediateOutput, 0);
119+
ASSERT_LT(sizeOfIntermediateOutput, DATA_SIZE);
120+
121+
// Setup the initial size of the output data buffer to exactly match the size
122+
// of output data from feeding in the first chunk of input data
123+
zim::Compressor<ZSTD_INFO> compressor(sizeOfIntermediateOutput);
124+
compressor.init(&data[0]);
125+
compressor.feed(&data[0], DATA_SIZE);
126+
zim::zsize_t compressedDataSize2;
127+
const auto out2 = compressor.get_data(&compressedDataSize2);
128+
}
102129

103130
} // namespace

0 commit comments

Comments
 (0)