Skip to content

Commit 321fd87

Browse files
lauris71Lauris Kaplinskimetsma
authored
Handle zero-sized files (#78)
* Fixed zero-sized file handling Enforce encryption worksflow for CDoc2 Added test for large files and long unicode filenames * Check that filenames are valid utf8 strings Signed-off-by: Lauris Kaplinski <[email protected]> --------- Signed-off-by: Lauris Kaplinski <[email protected]> Co-authored-by: Lauris Kaplinski <[email protected]> Co-authored-by: Raul Metsma <[email protected]>
1 parent a87060d commit 321fd87

File tree

6 files changed

+245
-24
lines changed

6 files changed

+245
-24
lines changed

cdoc/CDoc1Writer.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,7 @@ CDoc1Writer::addFile(const std::string& name, size_t size)
292292
{
293293
if(!d)
294294
return WORKFLOW_ERROR;
295+
if (name.empty() || !libcdoc::isValidUtf8(name)) return libcdoc::DATA_FORMAT_ERROR;
295296
d->files.push_back({name, size, {}});
296297
return libcdoc::OK;
297298
}

cdoc/CDoc2Writer.cpp

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -470,20 +470,30 @@ libcdoc::result_t
470470
CDoc2Writer::beginEncryption()
471471
{
472472
last_error.clear();
473-
if(!recipients.empty()) {
474-
LOG_ERROR("Encryption workflow already started");
475-
setLastError("Encryption workflow already started");
473+
if(recipients.empty()) {
474+
setLastError("No recipients added");
475+
LOG_ERROR("{}", last_error);
476+
return libcdoc::WORKFLOW_ERROR;
477+
}
478+
if(tar) {
479+
setLastError("Encryption already started");
480+
LOG_ERROR("{}", last_error);
481+
return libcdoc::WORKFLOW_ERROR;
476482
}
483+
if(auto rv = writeHeader(recipients); rv < 0)
484+
return rv;
477485
return libcdoc::OK;
478486
}
479487

480488
libcdoc::result_t
481489
CDoc2Writer::addFile(const std::string& name, size_t size)
482490
{
483491
if(!tar) {
484-
if(auto rv = writeHeader(recipients); rv < 0)
485-
return rv;
492+
setLastError("Encryption not started");
493+
LOG_ERROR("{}", last_error);
494+
return libcdoc::WORKFLOW_ERROR;
486495
}
496+
if (name.empty() || !libcdoc::isValidUtf8(name)) return libcdoc::DATA_FORMAT_ERROR;
487497
if(auto rv = tar->open(name, size); rv < 0) {
488498
setLastError(tar->getLastErrorStr(rv));
489499
LOG_ERROR("{}", last_error);

cdoc/Tar.cpp

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@
2424

2525
using namespace libcdoc;
2626

27+
constexpr unsigned int BLOCKSIZE = 512;
28+
2729
template<std::size_t SIZE>
2830
static int64_t fromOctal(const std::array<char,SIZE> &data)
2931
{
@@ -72,7 +74,7 @@ struct libcdoc::Header {
7274
{
7375
int64_t unsignedSum = 0;
7476
int64_t signedSum = 0;
75-
for (size_t i = 0, size = sizeof(Header); i < size; i++) {
77+
for (size_t i = 0, size = BLOCKSIZE; i < size; i++) {
7678
unsignedSum += ((unsigned char*) this)[i];
7779
signedSum += ((signed char*) this)[i];
7880
}
@@ -97,9 +99,11 @@ struct libcdoc::Header {
9799
bool operator==(const Header&) const = default;
98100
};
99101

102+
static_assert (sizeof(Header) == BLOCKSIZE, "Header struct size is incorrect");
103+
100104
static int padding(int64_t size)
101105
{
102-
return sizeof(Header) - size % sizeof(Header);
106+
return BLOCKSIZE * ((size + BLOCKSIZE - 1) / BLOCKSIZE) - size;
103107
}
104108

105109
std::string toPaxRecord (const std::string &keyword, const std::string &value) {
@@ -131,7 +135,7 @@ libcdoc::TarConsumer::write(const uint8_t *src, size_t size)
131135

132136
libcdoc::result_t
133137
libcdoc::TarConsumer::writeHeader(const Header &h) {
134-
if(auto rv = _dst->write((const uint8_t *)&h, sizeof(Header)); rv != sizeof(Header))
138+
if(auto rv = _dst->write((const uint8_t *)&h, BLOCKSIZE); rv != BLOCKSIZE)
135139
return rv < OK ? rv : OUTPUT_ERROR;
136140
return OK;
137141
}
@@ -146,7 +150,7 @@ libcdoc::TarConsumer::writeHeader(Header &h, int64_t size) {
146150

147151
libcdoc::result_t
148152
libcdoc::TarConsumer::writePadding(int64_t size) {
149-
std::array<uint8_t,sizeof(libcdoc::Header)> pad {};
153+
static const std::array<uint8_t,BLOCKSIZE> pad {};
150154
auto padSize = padding(size);
151155
if(auto rv = _dst->write(pad.data(), padSize); rv != padSize)
152156
return rv < OK ? rv : OUTPUT_ERROR;
@@ -269,14 +273,14 @@ libcdoc::TarSource::next(std::string& name, int64_t& size)
269273
}
270274
}
271275
while (!_src->isEof()) {
272-
int64_t result = _src->read((uint8_t *)&h, sizeof(Header));
273-
if (result != sizeof(Header)) {
276+
int64_t result = _src->read((uint8_t *)&h, BLOCKSIZE);
277+
if (result != BLOCKSIZE) {
274278
_error = INPUT_STREAM_ERROR;
275279
return _error;
276280
}
277281
if (h.isNull()) {
278-
result = _src->read((uint8_t *)&h, sizeof(Header));
279-
if (result != sizeof(Header)) {
282+
result = _src->read((uint8_t *)&h, BLOCKSIZE);
283+
if (result != BLOCKSIZE) {
280284
_error = INPUT_STREAM_ERROR;
281285
return _error;
282286
}
@@ -299,8 +303,8 @@ libcdoc::TarSource::next(std::string& name, int64_t& size)
299303
}
300304
std::string paxData(pax_in.data(), pax_in.size());
301305
_src->skip(padding(h_size));
302-
result = _src->read((uint8_t *)&h, sizeof(Header));
303-
if (result != sizeof(Header)) {
306+
result = _src->read((uint8_t *)&h, BLOCKSIZE);
307+
if (result != BLOCKSIZE) {
304308
_error = INPUT_STREAM_ERROR;
305309
return _error;
306310
}

cdoc/Utils.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,28 @@ timeToISO(double time)
8282
#endif
8383
}
8484

85+
bool
86+
isValidUtf8 (std::string str)
87+
{
88+
const uint8_t *s = (const uint8_t *) str.data();
89+
const uint8_t *e = s + str.size();
90+
while (s < e) {
91+
size_t s_len = e - s;
92+
if ((s[0] & 0x80) == 0x0) {
93+
s += 1;
94+
} else if (((s[0] & 0xe0) == 0xc0) && (s_len >= 2) && ((s[1] & 0xc0) == 0x80)) {
95+
s += 2;
96+
} else if (((*s & 0xf0) == 0xe0) && (s_len >= 3) && ((s[1] & 0xc0) == 0x80) && ((s[2] & 0xc0) == 0x80)) {
97+
s += 3;
98+
} else if (((*s & 0xf8) == 0xf0) && (s_len >= 4) && ((s[1] & 0xc0) == 0x80) && ((s[2] & 0xc0) == 0x80) && ((s[3] & 0xc0) == 0x80)) {
99+
s += 4;
100+
} else {
101+
return false;
102+
}
103+
}
104+
return true;
105+
}
106+
85107
int
86108
parseURL(const std::string& url, std::string& host, int& port, std::string& path, bool end_with_slash)
87109
{

cdoc/Utils.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,8 @@ double getTime();
8686
double timeFromISO(std::string_view iso);
8787
std::string timeToISO(double time);
8888

89+
bool isValidUtf8 (std::string str);
90+
8991
static std::vector<uint8_t>
9092
readAllBytes(std::istream& ifs)
9193
{

test/libcdoc_boost.cpp

Lines changed: 191 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,12 @@
1919
#define BOOST_TEST_MODULE "C++ Unit Tests for libcdoc"
2020

2121
#include <boost/test/unit_test.hpp>
22+
#include <codecvt>
2223
#include <filesystem>
2324
#include <fstream>
2425
#include <map>
2526
#include <CDocCipher.h>
27+
#include <CryptoBackend.h>
2628
#include <Recipient.h>
2729
#include <Utils.h>
2830
#include <cdoc/Crypto.h>
@@ -75,14 +77,24 @@ class FixtureBase
7577
public:
7678
FixtureBase()
7779
{
78-
// Get path to test data, provided via argument to the unit tests application
79-
if (utf::framework::master_test_suite().argc <= 1)
80-
{
81-
testDataPath = DATA_DIR;
82-
}
83-
else
84-
{
85-
testDataPath = utf::framework::master_test_suite().argv[1];
80+
int argc = utf::framework::master_test_suite().argc;
81+
for (int i = 0; i < argc; i++) {
82+
std::string_view arg = utf::framework::master_test_suite().argv[i];
83+
if (arg == "--data-path") {
84+
if (i >= argc) {
85+
std::cerr << "Missing data path value" << std::endl;
86+
::exit(1);
87+
}
88+
i += 1;
89+
testDataPath = utf::framework::master_test_suite().argv[i];
90+
} else if (arg == "--max-filesize") {
91+
if (i >= argc) {
92+
std::cerr << "Missing max filesize value" << std::endl;
93+
::exit(1);
94+
}
95+
i += 1;
96+
max_filesize = std::stoull(utf::framework::master_test_suite().argv[i]);
97+
}
8698
}
8799
}
88100

@@ -122,9 +134,10 @@ class FixtureBase
122134
}
123135
}
124136

125-
fs::path testDataPath;
137+
fs::path testDataPath = DATA_DIR;
126138
fs::path sourceFilePath;
127139
fs::path targetFilePath;
140+
size_t max_filesize = 100000000;
128141
};
129142

130143
/**
@@ -215,6 +228,175 @@ class DecryptFixture : public FixtureBase
215228
}
216229
};
217230

231+
struct PipeSource : public libcdoc::DataSource {
232+
PipeSource(std::vector<uint8_t>& data, bool& eof) : _data(data), _eof(eof) {}
233+
234+
libcdoc::result_t read(uint8_t *dst, size_t size) override {
235+
size = std::min<size_t>(size, _data.size());
236+
std::copy(_data.cbegin(), _data.cbegin() + size, dst);
237+
if (_buf.size() < 1024) {
238+
size_t newbufsize = _buf.size() + size;
239+
if (newbufsize > 1024) newbufsize = 1024;
240+
size_t tocopy = newbufsize - _buf.size();
241+
_buf.insert(_buf.end(), _data.begin(), _data.begin() + tocopy);
242+
}
243+
_data.erase(_data.cbegin(), _data.cbegin() + size);
244+
return size;
245+
}
246+
247+
libcdoc::result_t seek(size_t pos) override {
248+
if (pos <= _buf.size()) {
249+
_data.insert(_data.begin(), _buf.begin() + pos, _buf.end());
250+
_buf.erase(_buf.begin() + pos, _buf.end());
251+
return libcdoc::OK;
252+
}
253+
return libcdoc::NOT_IMPLEMENTED;
254+
}
255+
bool isError() override { return false; }
256+
bool isEof() override { return _eof; }
257+
protected:
258+
std::vector<uint8_t>& _data;
259+
bool& _eof;
260+
std::vector<uint8_t> _buf;
261+
};
262+
263+
struct PipeConsumer : public libcdoc::DataConsumer {
264+
PipeConsumer(std::vector<uint8_t>& data, bool& eof) : _data(data), _eof(eof) { _eof = false; }
265+
libcdoc::result_t write(const uint8_t *src, size_t size) override final {
266+
_data.insert(_data.end(), src, src + size);
267+
return size;
268+
}
269+
libcdoc::result_t close() override final { _eof = true; return libcdoc::OK; }
270+
virtual bool isError() override final { return false; }
271+
protected:
272+
std::vector<uint8_t>& _data;
273+
bool& _eof;
274+
};
275+
276+
struct PipeCrypto : public libcdoc::CryptoBackend {
277+
PipeCrypto(std::string pwd) : _secret(pwd.cbegin(), pwd.cend()) {}
278+
279+
libcdoc::result_t getSecret(std::vector<uint8_t>& dst, unsigned int idx) {
280+
dst = _secret;
281+
return libcdoc::OK;
282+
};
283+
284+
std::vector<uint8_t> _secret;
285+
};
286+
287+
struct PipeWriter {
288+
static constexpr size_t BUFSIZE = 1024 * 1024;
289+
290+
PipeWriter(libcdoc::CDocWriter *writer, const std::vector<libcdoc::FileInfo>& files) : _writer(writer), _files(files), current(-1), cpos(0) {}
291+
292+
uint8_t getChar(int filenum, size_t pos) {
293+
uint64_t x = pos + ((uint64_t) filenum << 40);
294+
x = (x ^ (x >> 30)) * 0xbf58476d1ce4e5b9ULL;
295+
x = (x ^ (x >> 27)) * 0x94d049bb133111ebULL;
296+
x = x ^ (x >> 31);
297+
return (uint8_t) (x & 0xff);
298+
}
299+
300+
libcdoc::result_t writeMore() {
301+
if (current >= (int) _files.size()) return libcdoc::WORKFLOW_ERROR;
302+
303+
if ((current < 0) || (cpos >= _files[current].size)) {
304+
// Start new file
305+
current += 1;
306+
cpos = 0;
307+
if (current >= (int) _files.size()) {
308+
return _writer->finishEncryption();
309+
}
310+
return _writer->addFile(_files[current].name, _files[current].size);
311+
}
312+
size_t towrite = _files[current].size - cpos;
313+
if (towrite > BUFSIZE) towrite = BUFSIZE;
314+
uint8_t buf[BUFSIZE];
315+
for (int i = 0; i < towrite; i++) buf[i] = getChar(current, cpos + i);
316+
cpos += towrite;
317+
return _writer->writeData(buf, towrite);
318+
}
319+
320+
bool isEof() {
321+
return current >= (int) _files.size();
322+
}
323+
324+
int current = 0;
325+
size_t cpos = 0;
326+
327+
libcdoc::CDocWriter *_writer;
328+
const std::vector<libcdoc::FileInfo>& _files;
329+
};
330+
331+
BOOST_AUTO_TEST_SUITE(LargeFiles)
332+
333+
BOOST_FIXTURE_TEST_CASE_WITH_DECOR(EncryptWithPasswordAndLabel, FixtureBase, * utf::description("Testing weird and large files"))
334+
{
335+
std::vector<uint8_t> data;
336+
bool eof = false;
337+
PipeConsumer pipec(data, eof);
338+
PipeSource pipes(data, eof);
339+
PipeCrypto pcrypto("password");
340+
341+
// Create writer
342+
libcdoc::CDocWriter *writer = libcdoc::CDocWriter::createWriter(2, &pipec, false, nullptr, &pcrypto, nullptr);
343+
BOOST_TEST(writer != nullptr);
344+
libcdoc::Recipient rcpt = libcdoc::Recipient::makeSymmetric("test", 65536);
345+
BOOST_TEST(writer->addRecipient(rcpt) == libcdoc::OK);
346+
BOOST_TEST(writer->beginEncryption() == libcdoc::OK);
347+
348+
std::srand(1);
349+
std::vector<libcdoc::FileInfo> files;
350+
for (size_t i = max_filesize; i != 0; i = i / 1000) {
351+
size_t len = std::rand() % 1000;
352+
std::u16string u16(len, ' ');
353+
for (int i = 0; i < len; i++) u16[i] = std::rand() % 10000 + 32;
354+
std::string u8 = std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t>{}.to_bytes(u16);
355+
files.emplace_back(u8, i);
356+
files.emplace_back(u8, 0);
357+
}
358+
359+
PipeWriter wrt(writer, files);
360+
361+
// Create reader
362+
libcdoc::CDocReader *reader = libcdoc::CDocReader::createReader(&pipes, false, nullptr, &pcrypto, nullptr);
363+
BOOST_TEST(reader != nullptr);
364+
365+
// Fill buffer
366+
while((data.size() < 2 * wrt.BUFSIZE) && !wrt.isEof()) {
367+
BOOST_TEST(wrt.writeMore() == libcdoc::OK);
368+
}
369+
std::vector<uint8_t> fmk;
370+
BOOST_TEST(reader->getFMK(fmk, 0) == libcdoc::OK);
371+
BOOST_TEST(reader->beginDecryption(fmk) == libcdoc::OK);
372+
libcdoc::FileInfo fi;
373+
for (int cfile = 0; cfile < files.size(); cfile++) {
374+
// Fill buffer
375+
while((data.size() < 2 * wrt.BUFSIZE) && !wrt.isEof()) {
376+
BOOST_TEST(wrt.writeMore() == libcdoc::OK);
377+
}
378+
// Get file
379+
BOOST_TEST(reader->nextFile(fi) == libcdoc::OK);
380+
BOOST_TEST(fi.name == files[cfile].name);
381+
BOOST_TEST(fi.size == files[cfile].size);
382+
for (size_t pos = 0; pos < files[cfile].size; pos += wrt.BUFSIZE) {
383+
// Fill buffer
384+
while((data.size() < 2 * wrt.BUFSIZE) && !wrt.isEof()) {
385+
BOOST_TEST(wrt.writeMore() == libcdoc::OK);
386+
}
387+
size_t toread = files[cfile].size - pos;
388+
if (toread > wrt.BUFSIZE) toread = wrt.BUFSIZE;
389+
uint8_t buf[wrt.BUFSIZE], cbuf[wrt.BUFSIZE];
390+
BOOST_TEST(reader->readData(buf, toread) == toread);
391+
for (size_t i = 0; i < toread; i++) cbuf[i] = wrt.getChar(cfile, pos + i);
392+
BOOST_TEST(std::memcmp(buf, cbuf, toread) == 0);
393+
}
394+
}
395+
BOOST_TEST(reader->nextFile(fi) == libcdoc::END_OF_STREAM);
396+
BOOST_TEST(reader->finishDecryption() == libcdoc::OK);
397+
}
398+
399+
BOOST_AUTO_TEST_SUITE_END()
218400

219401
BOOST_AUTO_TEST_SUITE(PasswordUsageWithLabel)
220402

0 commit comments

Comments
 (0)