Skip to content

Commit c1b9fbf

Browse files
Handle DAX as an output format.
Note that this includes headers, so we use ZLIB format instead of DEFLATE format. It's just a more wasteful CSO. Not supporting NC areas now because we can't write out the first compressed block without knowing how many NC areas there will be.
1 parent a0cd695 commit c1b9fbf

File tree

6 files changed

+166
-51
lines changed

6 files changed

+166
-51
lines changed

src/compress.cpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include "compress.h"
44
#include "uv_helper.h"
55
#include "cso.h"
6+
#include "dax.h"
67
#include "input.h"
78
#include "output.h"
89
#include "buffer_pool.h"
@@ -62,9 +63,13 @@ class CompressionTask {
6263

6364
void CompressionTask::Enqueue() {
6465
if (task_.block_size == DEFAULT_BLOCK_SIZE) {
65-
// Start with a small block size.
66-
// We'll re-evaluate later.
67-
blockSize_ = SMALL_BLOCK_SIZE;
66+
if (task_.flags & TASKFLAG_FMT_DAX) {
67+
blockSize_ = DAX_FRAME_SIZE;
68+
} else {
69+
// Start with a small block size.
70+
// We'll re-evaluate later.
71+
blockSize_ = SMALL_BLOCK_SIZE;
72+
}
6873
} else {
6974
if (task_.block_size > MAX_BLOCK_SIZE) {
7075
Notify(TASK_INVALID_OPTION, "Block size too large");
@@ -148,6 +153,8 @@ void CompressionTask::BeginProcessing() {
148153
fmt = CSO_FMT_CSO2;
149154
} else if (task_.flags & TASKFLAG_FMT_ZSO) {
150155
fmt = CSO_FMT_ZSO;
156+
} else if (task_.flags & TASKFLAG_FMT_DAX) {
157+
fmt = CSO_FMT_DAX;
151158
}
152159

153160
// Now that we know the file size, check if we should resize the blockSize_.

src/compress.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ enum TaskFlags {
4747
TASKFLAG_NO_ALL = TASKFLAG_NO_ZLIB | TASKFLAG_NO_ZOPFLI | TASKFLAG_NO_7ZIP | TASKFLAG_NO_LZ4,
4848

4949
TASKFLAG_DECOMPRESS = 0x400,
50+
TASKFLAG_FMT_DAX = 0x800,
5051
};
5152

5253
typedef std::function<void (const Task *, TaskStatus status, int64_t pos, int64_t total, int64_t written)> ProgressCallback;

src/cso.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ enum CSOFormat {
1717
CSO_FMT_CSO1,
1818
CSO_FMT_CSO2,
1919
CSO_FMT_ZSO,
20+
CSO_FMT_DAX,
2021
};
2122

2223
#ifdef _MSC_VER

src/output.cpp

Lines changed: 123 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include "buffer_pool.h"
44
#include "compress.h"
55
#include "cso.h"
6+
#include "dax.h"
67

78
namespace maxcso {
89

@@ -50,14 +51,7 @@ void Output::SetFile(uv_file file, int64_t srcSize, uint32_t blockSize, CSOForma
5051
const uint32_t sectors = static_cast<uint32_t>((srcSize + blockSize_ - 1) >> blockShift_);
5152
// Start after the header and index, which we'll fill in later.
5253
index_ = new uint32_t[sectors + 1];
53-
if (flags_ & TASKFLAG_DECOMPRESS) {
54-
// Decompressing, so no header.
55-
// We still track the index for code simplicity, but throw it away.
56-
dstPos_ = 0;
57-
} else {
58-
// Start after the end of the index data and header.
59-
dstPos_ = sizeof(CSOHeader) + (sectors + 1) * sizeof(uint32_t);
60-
}
54+
dstPos_ = DstFirstSectorPos(sectors);
6155

6256
// TODO: We might be able to optimize shift better by running through the data.
6357
// That would require either a second pass or keeping the entire result in RAM.
@@ -74,6 +68,17 @@ void Output::SetFile(uv_file file, int64_t srcSize, uint32_t blockSize, CSOForma
7468
}
7569
}
7670

71+
if (fmt == CSO_FMT_DAX) {
72+
if (indexShift_ != 0 || static_cast<uint32_t>(srcSize_) < srcSize_) {
73+
finish_(false, "File too large to compress as DAX");
74+
return;
75+
}
76+
if (blockSize_ != DAX_FRAME_SIZE) {
77+
finish_(false, "DAX requires a block size of 8192");
78+
return;
79+
}
80+
}
81+
7782
// If the shift is above 11, the padding could make it need more space.
7883
// But that would be > 4 TB anyway, so let's not worry about it.
7984
indexAlign_ = 1 << indexShift_;
@@ -88,6 +93,21 @@ void Output::SetFile(uv_file file, int64_t srcSize, uint32_t blockSize, CSOForma
8893
}
8994
}
9095

96+
int64_t Output::DstFirstSectorPos(uint32_t totalSectors) {
97+
if (flags_ & TASKFLAG_DECOMPRESS) {
98+
// Decompressing, so no header.
99+
// We still track the index for code simplicity, but throw it away.
100+
return 0;
101+
} else if (flags_ & TASKFLAG_FMT_DAX) {
102+
// Pos (32 bits) and size (16 bits) per sector, plus header.
103+
// TODO: We don't support NC areas, but if we did, we'd have to know them here already...
104+
return sizeof(DAXHeader) + totalSectors * (sizeof(uint32_t) + sizeof(uint16_t));
105+
} else {
106+
// Start after the end of the index data and header.
107+
return sizeof(CSOHeader) + (totalSectors + 1) * sizeof(uint32_t);
108+
}
109+
}
110+
91111
int32_t Output::Align(int64_t &pos) {
92112
uint32_t off = static_cast<uint32_t>(pos % indexAlign_);
93113
if (off != 0) {
@@ -200,35 +220,11 @@ void Output::HandleReadySector(Sector *sector) {
200220
static char padding[2048] = {0};
201221
for (size_t i = 0; i < sectors.size(); ++i) {
202222
unsigned int bestSize = sectors[i]->BestSize();
203-
bufs[nbufs++] = uv_buf_init(reinterpret_cast<char *>(sectors[i]->BestBuffer()), bestSize);
204-
205-
// Update the index.
206-
const int32_t s = static_cast<int32_t>(sectors[i]->Pos() >> blockShift_);
207-
index_[s] = static_cast<int32_t>(dstPos >> indexShift_);
208-
// CSO2 doesn't use a flag for uncompressed, only the size of the block.
209-
if (!sectors[i]->Compressed() && fmt_ != CSO_FMT_CSO2) {
210-
index_[s] |= CSO_INDEX_UNCOMPRESSED;
211-
}
212-
switch (fmt_) {
213-
case CSO_FMT_CSO1:
214-
if (sectors[i]->Format() == SECTOR_FMT_LZ4) {
215-
finish_(false, "LZ4 format not supported within CSO v1 file");
216-
return;
217-
}
218-
break;
219-
case CSO_FMT_ZSO:
220-
if (sectors[i]->Format() == SECTOR_FMT_DEFLATE) {
221-
finish_(false, "Deflate format not supported within ZSO file");
222-
return;
223-
}
224-
break;
225-
case CSO_FMT_CSO2:
226-
if (sectors[i]->Format() == SECTOR_FMT_LZ4) {
227-
index_[s] |= CSO2_INDEX_LZ4;
228-
}
229-
break;
223+
if (!UpdateIndex(sectors[i]->Pos(), dstPos, bestSize, sectors[i]->Format())) {
224+
return;
230225
}
231226

227+
bufs[nbufs++] = uv_buf_init(reinterpret_cast<char *>(sectors[i]->BestBuffer()), bestSize);
232228
dstPos += bestSize;
233229
int32_t padSize = Align(dstPos);
234230
if (padSize != 0) {
@@ -276,6 +272,42 @@ void Output::HandleReadySector(Sector *sector) {
276272
});
277273
}
278274

275+
bool Output::UpdateIndex(int64_t srcPos, int64_t dstPos, uint32_t compressedSize, SectorFormat compressedFmt) {
276+
const int32_t s = static_cast<int32_t>(srcPos >> blockShift_);
277+
index_[s] = static_cast<int32_t>(dstPos >> indexShift_);
278+
// CSO2 doesn't use a flag for uncompressed, only the size of the block.
279+
if (compressedFmt == SECTOR_FMT_ORIG && fmt_ != CSO_FMT_CSO2 && fmt_ != CSO_FMT_DAX) {
280+
index_[s] |= CSO_INDEX_UNCOMPRESSED;
281+
}
282+
switch (fmt_) {
283+
case CSO_FMT_CSO1:
284+
if (compressedFmt == SECTOR_FMT_LZ4) {
285+
finish_(false, "LZ4 format not supported within CSO v1 file");
286+
return false;
287+
}
288+
break;
289+
case CSO_FMT_ZSO:
290+
if (compressedFmt == SECTOR_FMT_DEFLATE) {
291+
finish_(false, "Deflate format not supported within ZSO file");
292+
return false;
293+
}
294+
break;
295+
case CSO_FMT_CSO2:
296+
if (compressedFmt == SECTOR_FMT_LZ4) {
297+
index_[s] |= CSO2_INDEX_LZ4;
298+
}
299+
break;
300+
case CSO_FMT_DAX:
301+
if (compressedFmt != SECTOR_FMT_DEFLATE) {
302+
finish_(false, "Sector format must be ZLIB for entire file");
303+
return false;
304+
}
305+
break;
306+
}
307+
308+
return true;
309+
}
310+
279311
bool Output::ShouldCompress(int64_t pos, uint8_t *buffer) {
280312
if (flags_ & TASKFLAG_DECOMPRESS) {
281313
return false;
@@ -320,6 +352,20 @@ void Output::Flush() {
320352
return;
321353
}
322354

355+
switch (fmt_) {
356+
case CSO_FMT_CSO1:
357+
case CSO_FMT_CSO2:
358+
case CSO_FMT_ZSO:
359+
WriteCSOIndex();
360+
break;
361+
362+
case CSO_FMT_DAX:
363+
WriteDAXIndex();
364+
break;
365+
}
366+
}
367+
368+
void Output::WriteCSOIndex() {
323369
CSOHeader *header = new CSOHeader;
324370
if (fmt_ == CSO_FMT_ZSO) {
325371
memcpy(header->magic, ZSO_MAGIC, sizeof(header->magic));
@@ -352,6 +398,48 @@ void Output::Flush() {
352398
});
353399
}
354400

401+
void Output::WriteDAXIndex() {
402+
DAXHeader *header = new DAXHeader;
403+
memcpy(header->magic, DAX_MAGIC, sizeof(header->magic));
404+
header->uncompressed_size = static_cast<uint32_t>(srcSize_);
405+
// TODO: 0 because we don't support NC areas in writing currently.
406+
header->version = 0;
407+
header->nc_areas = 0;
408+
header->unused[0] = 0;
409+
header->unused[1] = 0;
410+
header->unused[2] = 0;
411+
header->unused[3] = 0;
412+
413+
const uint32_t sectors = static_cast<uint32_t>(SrcSizeAligned() >> blockShift_);
414+
uint16_t *sizes = new uint16_t[sectors];
415+
for (uint32_t i = 0; i < sectors; ++i) {
416+
uint32_t size = index_[i + 1] - index_[i];
417+
if (size < (1 << 16)) {
418+
sizes[i] = size;
419+
} else {
420+
finish_(false, "Compressed sector larger than 16 bits");
421+
}
422+
}
423+
424+
uv_buf_t bufs[3];
425+
bufs[0] = uv_buf_init(reinterpret_cast<char *>(header), sizeof(DAXHeader));
426+
// We skip the last entry of the index, which is the end.
427+
bufs[1] = uv_buf_init(reinterpret_cast<char *>(index_), sectors * sizeof(uint32_t));
428+
bufs[2] = uv_buf_init(reinterpret_cast<char *>(sizes), sectors * sizeof(uint16_t));
429+
const ssize_t totalBytes = sizeof(DAXHeader) + sectors * (sizeof(uint32_t) + sizeof(uint16_t));
430+
uv_.fs_write(loop_, &flush_, file_, bufs, 3, 0, [this, header, sizes, totalBytes](uv_fs_t *req) {
431+
if (req->result != totalBytes) {
432+
finish_(false, "Unable to write header data");
433+
} else {
434+
state_ |= STATE_INDEX_WRITTEN;
435+
CheckFinish();
436+
}
437+
uv_fs_req_cleanup(req);
438+
delete header;
439+
delete [] sizes;
440+
});
441+
}
442+
355443
void Output::CheckFinish() {
356444
if ((state_ & STATE_INDEX_WRITTEN) && (state_ & STATE_DATA_WRITTEN)) {
357445
finish_(true, nullptr);

src/output.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,16 @@ class Output {
2828
private:
2929
void CheckFinish();
3030
void Flush();
31-
int32_t Align(int64_t &pos);
31+
void WriteCSOIndex();
32+
void WriteDAXIndex();
3233
void HandleReadySector(Sector *sector);
3334
bool ShouldCompress(int64_t pos, uint8_t *buffer);
35+
36+
int32_t Align(int64_t &pos);
3437
inline int64_t SrcSizeAligned();
38+
int64_t DstFirstSectorPos(uint32_t totalSectors);
39+
40+
bool UpdateIndex(int64_t srcPos, int64_t dstPos, uint32_t compressedSize, SectorFormat compressedFmt);
3541

3642
enum State {
3743
STATE_INIT = 0x00,

src/sector.cpp

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,9 @@
1414

1515
namespace maxcso {
1616

17-
static int InitZlib(z_stream *&z, int strategy) {
17+
static int InitZlib(z_stream *&z, int strategy, bool withHeader) {
1818
z = reinterpret_cast<z_stream *>(calloc(1, sizeof(z_stream)));
19-
return deflateInit2(z, 9, Z_DEFLATED, -15, 9, strategy);
19+
return deflateInit2(z, 9, Z_DEFLATED, withHeader ? 15 : -15, 9, strategy);
2020
}
2121

2222
static void EndZlib(z_stream *&z) {
@@ -30,12 +30,12 @@ Sector::Sector(uint32_t flags)
3030
compress_(true), readySize_(0), buffer_(nullptr), best_(nullptr) {
3131
// Set up the zlib streams, which we will reuse each time we hit this sector.
3232
if (!(flags_ & TASKFLAG_NO_ZLIB_DEFAULT)) {
33-
InitZlib(zDefault_, Z_DEFAULT_STRATEGY);
33+
InitZlib(zDefault_, Z_DEFAULT_STRATEGY, (flags_ & TASKFLAG_FMT_DAX) != 0);
3434
}
3535
if (!(flags_ & TASKFLAG_NO_ZLIB_BRUTE)) {
36-
InitZlib(zFiltered_, Z_FILTERED);
37-
InitZlib(zHuffman_, Z_HUFFMAN_ONLY);
38-
InitZlib(zRLE_, Z_RLE);
36+
InitZlib(zFiltered_, Z_FILTERED, (flags_ & TASKFLAG_FMT_DAX) != 0);
37+
InitZlib(zHuffman_, Z_HUFFMAN_ONLY, (flags_ & TASKFLAG_FMT_DAX) != 0);
38+
InitZlib(zRLE_, Z_RLE, (flags_ & TASKFLAG_FMT_DAX) != 0);
3939
}
4040

4141
#ifndef NO_DEFLATE7Z
@@ -47,6 +47,7 @@ Sector::Sector(uint32_t flags)
4747
opts.fastbytes = 64;
4848
opts.matchcycles = 32;
4949
opts.algo = 1;
50+
opts.useZlib = (flags_ & TASKFLAG_FMT_DAX) != 0;
5051
Deflate7z::Alloc(&deflate7z_, &opts);
5152
}
5253
#endif
@@ -128,10 +129,13 @@ void Sector::FinalizeBest(uint32_t align) {
128129
// If bestSize_ wouldn't be smaller after alignment, we should not compress.
129130
// It won't save space, and it'll waste CPU on the decompression side.
130131
if (AlignedBestSize(align) >= blockSize_ && best_ != nullptr) {
131-
pool.Release(best_);
132-
best_ = nullptr;
133-
bestSize_ = blockSize_;
134-
bestFmt_ = SECTOR_FMT_ORIG;
132+
// TODO: For DAX, we allow this, since we don't support NC areas yet.
133+
if (!(flags_ & TASKFLAG_FMT_DAX)) {
134+
pool.Release(best_);
135+
best_ = nullptr;
136+
bestSize_ = blockSize_;
137+
bestFmt_ = SECTOR_FMT_ORIG;
138+
}
135139
}
136140
}
137141

@@ -206,9 +210,10 @@ void Sector::ZopfliTrial() {
206210
// Also doesn't return failure?
207211
unsigned char *out = nullptr;
208212
size_t outsize = 0;
209-
ZopfliCompress(&opt, ZOPFLI_FORMAT_DEFLATE, buffer_, blockSize_, &out, &outsize);
213+
ZopfliFormat fmt = (flags_ & TASKFLAG_FMT_DAX) != 0 ? ZOPFLI_FORMAT_ZLIB : ZOPFLI_FORMAT_DEFLATE;
214+
ZopfliCompress(&opt, fmt, buffer_, blockSize_, &out, &outsize);
210215
if (out != nullptr) {
211-
if (outsize > 0 && outsize < static_cast<size_t>(bestSize_)) {
216+
if (outsize > 0 && outsize < static_cast<size_t>(pool.bufferSize)) {
212217
// So that we have proper release semantics, we copy to our buffer.
213218
uint8_t *result = pool.Alloc();
214219
memcpy(result, out, outsize);
@@ -258,6 +263,13 @@ void Sector::LZ4Trial() {
258263
bool Sector::SubmitTrial(uint8_t *result, uint32_t size, SectorFormat fmt) {
259264
bool better = size + origMaxCost_ < bestSize_;
260265

266+
if (flags_ & TASKFLAG_FMT_DAX) {
267+
// TODO: Until we support NC areas (but that means we have to rebuild output or compress all blocks first?)
268+
if (!better && bestFmt_ == SECTOR_FMT_ORIG) {
269+
better = true;
270+
}
271+
}
272+
261273
// Based on the old and new format, we may want to apply some fuzzing for lz4.
262274
if (fmt == SECTOR_FMT_LZ4 && bestFmt_ == SECTOR_FMT_DEFLATE) {
263275
// Allow lz4 to make it larger by a max cost.

0 commit comments

Comments
 (0)