Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions llvm/include/llvm/Support/Compression.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
namespace llvm {
template <typename T> class SmallVectorImpl;
class Error;
class raw_ostream;

// None indicates no compression. The other members are a subset of
// compression::Format, which is used for compressed debug sections in some
Expand All @@ -44,6 +45,9 @@ void compress(ArrayRef<uint8_t> Input,
SmallVectorImpl<uint8_t> &CompressedBuffer,
int Level = DefaultCompression);

void compressToStream(ArrayRef<uint8_t> Input, raw_ostream &OS,
int Level = DefaultCompression);

Error decompress(ArrayRef<uint8_t> Input, uint8_t *Output,
size_t &UncompressedSize);

Expand All @@ -65,6 +69,9 @@ void compress(ArrayRef<uint8_t> Input,
SmallVectorImpl<uint8_t> &CompressedBuffer,
int Level = DefaultCompression, bool EnableLdm = false);

void compressToStream(ArrayRef<uint8_t> Input, raw_ostream &OS,
int Level = DefaultCompression, bool EnableLdm = false);

Error decompress(ArrayRef<uint8_t> Input, uint8_t *Output,
size_t &UncompressedSize);

Expand Down Expand Up @@ -116,6 +123,10 @@ const char *getReasonIfUnsupported(Format F);
void compress(Params P, ArrayRef<uint8_t> Input,
SmallVectorImpl<uint8_t> &Output);

// Compress Input into a raw_ostream, without buffering the entire compressed
// output. Compression parameters are the same as for `compress`.
void compressToStream(Params P, ArrayRef<uint8_t> Input, raw_ostream &OS);

// Decompress Input. The uncompressed size must be available.
Error decompress(DebugCompressionType T, ArrayRef<uint8_t> Input,
uint8_t *Output, size_t UncompressedSize);
Expand Down
22 changes: 8 additions & 14 deletions llvm/lib/MC/ELFObjectWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -144,9 +144,7 @@ struct ELFWriter {

uint64_t align(Align Alignment);

bool maybeWriteCompression(uint32_t ChType, uint64_t Size,
SmallVectorImpl<uint8_t> &CompressedContents,
Align Alignment);
bool maybeWriteCompression(uint32_t ChType, uint64_t Size, Align Alignment);

public:
ELFWriter(ELFObjectWriter &OWriter, raw_pwrite_stream &OS,
Expand Down Expand Up @@ -841,12 +839,9 @@ MCSectionELF *ELFWriter::createRelocationSection(MCContext &Ctx,
}

// Include the debug info compression header.
bool ELFWriter::maybeWriteCompression(
uint32_t ChType, uint64_t Size,
SmallVectorImpl<uint8_t> &CompressedContents, Align Alignment) {
uint64_t HdrSize =
is64Bit() ? sizeof(ELF::Elf64_Chdr) : sizeof(ELF::Elf32_Chdr);
if (Size <= HdrSize + CompressedContents.size())
bool ELFWriter::maybeWriteCompression(uint32_t ChType, uint64_t Size,
Align Alignment) {
if (Size <= 128)
return false;
// Platform specific header is followed by compressed data.
if (is64Bit()) {
Expand Down Expand Up @@ -897,10 +892,8 @@ void ELFWriter::writeSectionData(const MCAssembler &Asm, MCSection &Sec,
ChType = ELF::ELFCOMPRESS_ZSTD;
break;
}
compression::compress(compression::Params(CompressionType), Uncompressed,
Compressed);
if (!maybeWriteCompression(ChType, UncompressedData.size(), Compressed,
Sec.getAlign())) {

if (!maybeWriteCompression(ChType, UncompressedData.size(), Sec.getAlign())) {
W.OS << UncompressedData;
return;
}
Expand All @@ -909,7 +902,8 @@ void ELFWriter::writeSectionData(const MCAssembler &Asm, MCSection &Sec,
// Alignment field should reflect the requirements of
// the compressed section header.
Section.setAlignment(is64Bit() ? Align(8) : Align(4));
W.OS << toStringRef(Compressed);
compression::compressToStream(compression::Params(CompressionType),
Uncompressed, W.OS);
}

void ELFWriter::WriteSecHdrEntry(uint32_t Name, uint32_t Type, uint64_t Flags,
Expand Down
110 changes: 110 additions & 0 deletions llvm/lib/Support/Compression.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//

#include "llvm/Support/Compression.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Config/config.h"
Expand Down Expand Up @@ -55,6 +56,18 @@ void compression::compress(Params P, ArrayRef<uint8_t> Input,
}
}

void compression::compressToStream(Params P, ArrayRef<uint8_t> Input,
raw_ostream &OS) {
switch (P.format) {
case compression::Format::Zlib:
zlib::compressToStream(Input, OS, P.level);
break;
case compression::Format::Zstd:
zstd::compressToStream(Input, OS, P.level, P.zstdEnableLdm);
break;
}
}

Error compression::decompress(DebugCompressionType T, ArrayRef<uint8_t> Input,
uint8_t *Output, size_t UncompressedSize) {
switch (formatFor(T)) {
Expand Down Expand Up @@ -120,6 +133,49 @@ void zlib::compress(ArrayRef<uint8_t> Input,
CompressedBuffer.truncate(CompressedSize);
}

void zlib::compressToStream(ArrayRef<uint8_t> Input, raw_ostream &OS,
int Level) {
// Allocate a fixed size buffer to hold the output.
constexpr size_t OutBufferSize = 4096;
auto OutBuffer = std::make_unique<Bytef[]>(OutBufferSize);

z_stream ZStream;
ZStream.zalloc = Z_NULL;
ZStream.zfree = Z_NULL;
ZStream.opaque = Z_NULL;

int ZErr = deflateInit(&ZStream, Level);
if (ZErr != Z_OK)
report_bad_alloc_error("Failed to create ZStream");

// Ensure that the z_stream is cleaned up on all exit paths.
auto DeflateEndOnExit = make_scope_exit([&]() { deflateEnd(&ZStream); });

ZStream.next_in =
reinterpret_cast<Bytef *>(const_cast<uint8_t *>(Input.data()));
ZStream.avail_in = Input.size();

// Repeatedly deflate into the output buffer and flush it into the
// output stream. Repeat until we have drained the entire compression
// state.
while (ZErr != Z_STREAM_END) {
ZStream.next_out = OutBuffer.get();
ZStream.avail_out = OutBufferSize;

ZErr = deflate(&ZStream, Z_FINISH);
Copy link
Member

@MaskRay MaskRay Sep 26, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Z_FINISH, while works, harms compression ratio.

It's better to call Z_SYNC_FINISH for intermediate streams and Z_FINISH for the last stream.

lld/ELF/OutputSections.cpp deflateShard has a nice example (I studied many zlib users and figured out the best strategy)

With appropriate API calls, error checking if (ZErr == Z_STREAM_ERROR || ZErr == Z_BUF_ERROR) can be avoided.

if (ZErr == Z_STREAM_ERROR || ZErr == Z_BUF_ERROR)
report_fatal_error(convertZlibCodeToString(ZErr));

// Tell MemorySanitizer that zlib output buffer is fully initialized.
// This avoids a false report when running LLVM with uninstrumented ZLib.
__msan_unpoison(OutputBuffer.data(), OutBufferSize - ZStream.avail_out);

if (ZStream.avail_out < OutBufferSize)
OS.write(reinterpret_cast<char *>(OutBuffer.get()),
OutBufferSize - ZStream.avail_out);
}
}

Error zlib::decompress(ArrayRef<uint8_t> Input, uint8_t *Output,
size_t &UncompressedSize) {
int Res = ::uncompress((Bytef *)Output, (uLongf *)&UncompressedSize,
Expand Down Expand Up @@ -148,6 +204,10 @@ void zlib::compress(ArrayRef<uint8_t> Input,
SmallVectorImpl<uint8_t> &CompressedBuffer, int Level) {
llvm_unreachable("zlib::compress is unavailable");
}
void zlib::compressToStream(ArrayRef<uint8_t> Input, raw_ostream &OS,
int Level = DefaultCompression) {
llvm_unreachable("zlib::compressToStream is unavailable");
}
Error zlib::decompress(ArrayRef<uint8_t> Input, uint8_t *UncompressedBuffer,
size_t &UncompressedSize) {
llvm_unreachable("zlib::decompress is unavailable");
Expand Down Expand Up @@ -201,6 +261,51 @@ void zstd::compress(ArrayRef<uint8_t> Input,
CompressedBuffer.truncate(CompressedSize);
}

void zstd::compressToStream(ArrayRef<uint8_t> Input, raw_ostream &OS, int Level,
bool EnableLdm) {
// Allocate a buffer to hold the output.
size_t OutBufferSize = ZSTD_CStreamOutSize();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lld/ELF/OutputSections.cpp:395 has a good example to simplify the zstd api usage.

auto OutBuffer = std::make_unique<char[]>(OutBufferSize);

ZSTD_CStream *CStream = ZSTD_createCStream();
if (!CStream)
report_bad_alloc_error("Failed to create ZSTD_CCtx");

// Ensure that the ZSTD_CStream is cleaned up on all exit paths.
auto FreeCStreamOnExit =
make_scope_exit([=]() { ZSTD_freeCStream(CStream); });

if (ZSTD_isError(ZSTD_CCtx_setParameter(
CStream, ZSTD_c_enableLongDistanceMatching, EnableLdm ? 1 : 0))) {
report_bad_alloc_error("Failed to set ZSTD_c_enableLongDistanceMatching");
}

if (ZSTD_isError(
ZSTD_CCtx_setParameter(CStream, ZSTD_c_compressionLevel, Level))) {
report_bad_alloc_error("Failed to set ZSTD_c_compressionLevel");
}

ZSTD_inBuffer ZInput = {Input.data(), Input.size(), 0};

// Repeatedly compress into the output buffer and flush it into the
// output stream. Repeat until we have drained the entire compression
// state.
size_t ZRet;
do {
ZSTD_outBuffer ZOutput = {OutBuffer.get(), OutBufferSize, 0};
ZRet = ZSTD_compressStream2(CStream, &ZOutput, &ZInput, ZSTD_e_end);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With suitable input and output buffers, ZSTD_compressStream2 will never fail. So assert !isError should be fine.

if (ZSTD_isError(ZRet))
report_fatal_error(ZSTD_getErrorName(ZRet));

// Tell MemorySanitizer that zstd output buffer is fully initialized.
// This avoids a false report when running LLVM with uninstrumented ZStd.
__msan_unpoison(OutputBuffer.data(), ZOutput.pos);

if (ZOutput.pos > 0)
OS.write(reinterpret_cast<char *>(OutBuffer.get()), ZOutput.pos);
} while (ZRet != 0);
}

Error zstd::decompress(ArrayRef<uint8_t> Input, uint8_t *Output,
size_t &UncompressedSize) {
const size_t Res = ::ZSTD_decompress(
Expand Down Expand Up @@ -231,6 +336,11 @@ void zstd::compress(ArrayRef<uint8_t> Input,
bool EnableLdm) {
llvm_unreachable("zstd::compress is unavailable");
}
void zstd::compressToStream(ArrayRef<uint8_t> Input, raw_ostream &OS,
int Level = DefaultCompression,
bool EnableLdm = false) {
llvm_unreachable("zstd::compressToStream is unavailable");
}
Error zstd::decompress(ArrayRef<uint8_t> Input, uint8_t *Output,
size_t &UncompressedSize) {
llvm_unreachable("zstd::decompress is unavailable");
Expand Down
18 changes: 18 additions & 0 deletions llvm/unittests/Support/CompressionTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,15 @@ static void testZlibCompression(StringRef Input, int Level) {
SmallVector<uint8_t, 0> Uncompressed;
zlib::compress(arrayRefFromStringRef(Input), Compressed, Level);

// Check that stream compression results are the same as bulk compression.
SmallVector<char, 0> StreamCompressed;
raw_svector_ostream Stream(StreamCompressed);
zlib::compressToStream(arrayRefFromStringRef(Input), Stream, Level);
EXPECT_EQ(StreamCompressed.size(), Compressed.size());
for (size_t i = 0, e = StreamCompressed.size(); i != e; ++i) {
EXPECT_EQ(llvm::bit_cast<uint8_t>(StreamCompressed[i]), Compressed[i]);
}

// Check that uncompressed buffer is the same as original.
Error E = zlib::decompress(Compressed, Uncompressed, Input.size());
EXPECT_FALSE(std::move(E));
Expand Down Expand Up @@ -73,6 +82,15 @@ static void testZstdCompression(StringRef Input, int Level) {
SmallVector<uint8_t, 0> Uncompressed;
zstd::compress(arrayRefFromStringRef(Input), Compressed, Level);

// Check that stream compression results are the same as bulk compression.
SmallVector<char, 0> StreamCompressed;
raw_svector_ostream Stream(StreamCompressed);
zstd::compressToStream(arrayRefFromStringRef(Input), Stream, Level);
EXPECT_EQ(StreamCompressed.size(), Compressed.size());
for (size_t i = 0, e = StreamCompressed.size(); i != e; ++i) {
EXPECT_EQ(llvm::bit_cast<uint8_t>(StreamCompressed[i]), Compressed[i]);
}

// Check that uncompressed buffer is the same as original.
Error E = zstd::decompress(Compressed, Uncompressed, Input.size());
EXPECT_FALSE(std::move(E));
Expand Down