diff --git a/llvm/include/llvm/Support/Compression.h b/llvm/include/llvm/Support/Compression.h index 2a8da9e96d356..09457df95250c 100644 --- a/llvm/include/llvm/Support/Compression.h +++ b/llvm/include/llvm/Support/Compression.h @@ -19,6 +19,7 @@ namespace llvm { template class SmallVectorImpl; class Error; +class raw_ostream; // None indicates no compression. The other members are a subset of // compression::Format, which is used for compressed debug sections in some @@ -44,6 +45,9 @@ void compress(ArrayRef Input, SmallVectorImpl &CompressedBuffer, int Level = DefaultCompression); +void compressToStream(ArrayRef Input, raw_ostream &OS, + int Level = DefaultCompression); + Error decompress(ArrayRef Input, uint8_t *Output, size_t &UncompressedSize); @@ -65,6 +69,9 @@ void compress(ArrayRef Input, SmallVectorImpl &CompressedBuffer, int Level = DefaultCompression, bool EnableLdm = false); +void compressToStream(ArrayRef Input, raw_ostream &OS, + int Level = DefaultCompression, bool EnableLdm = false); + Error decompress(ArrayRef Input, uint8_t *Output, size_t &UncompressedSize); @@ -116,6 +123,10 @@ const char *getReasonIfUnsupported(Format F); void compress(Params P, ArrayRef Input, SmallVectorImpl &Output); +// Compress Input into a raw_ostream, without buffering the entire compressed +// output. Compression parameters are the same as for `compress`. +void compressToStream(Params P, ArrayRef Input, raw_ostream &OS); + // Decompress Input. The uncompressed size must be available. Error decompress(DebugCompressionType T, ArrayRef Input, uint8_t *Output, size_t UncompressedSize); diff --git a/llvm/lib/MC/ELFObjectWriter.cpp b/llvm/lib/MC/ELFObjectWriter.cpp index 005521bad6e01..bb0b2b4e0cfa7 100644 --- a/llvm/lib/MC/ELFObjectWriter.cpp +++ b/llvm/lib/MC/ELFObjectWriter.cpp @@ -144,9 +144,7 @@ struct ELFWriter { uint64_t align(Align Alignment); - bool maybeWriteCompression(uint32_t ChType, uint64_t Size, - SmallVectorImpl &CompressedContents, - Align Alignment); + bool maybeWriteCompression(uint32_t ChType, uint64_t Size, Align Alignment); public: ELFWriter(ELFObjectWriter &OWriter, raw_pwrite_stream &OS, @@ -841,12 +839,9 @@ MCSectionELF *ELFWriter::createRelocationSection(MCContext &Ctx, } // Include the debug info compression header. -bool ELFWriter::maybeWriteCompression( - uint32_t ChType, uint64_t Size, - SmallVectorImpl &CompressedContents, Align Alignment) { - uint64_t HdrSize = - is64Bit() ? sizeof(ELF::Elf64_Chdr) : sizeof(ELF::Elf32_Chdr); - if (Size <= HdrSize + CompressedContents.size()) +bool ELFWriter::maybeWriteCompression(uint32_t ChType, uint64_t Size, + Align Alignment) { + if (Size <= 128) return false; // Platform specific header is followed by compressed data. if (is64Bit()) { @@ -897,10 +892,8 @@ void ELFWriter::writeSectionData(const MCAssembler &Asm, MCSection &Sec, ChType = ELF::ELFCOMPRESS_ZSTD; break; } - compression::compress(compression::Params(CompressionType), Uncompressed, - Compressed); - if (!maybeWriteCompression(ChType, UncompressedData.size(), Compressed, - Sec.getAlign())) { + + if (!maybeWriteCompression(ChType, UncompressedData.size(), Sec.getAlign())) { W.OS << UncompressedData; return; } @@ -909,7 +902,8 @@ void ELFWriter::writeSectionData(const MCAssembler &Asm, MCSection &Sec, // Alignment field should reflect the requirements of // the compressed section header. Section.setAlignment(is64Bit() ? Align(8) : Align(4)); - W.OS << toStringRef(Compressed); + compression::compressToStream(compression::Params(CompressionType), + Uncompressed, W.OS); } void ELFWriter::WriteSecHdrEntry(uint32_t Name, uint32_t Type, uint64_t Flags, diff --git a/llvm/lib/Support/Compression.cpp b/llvm/lib/Support/Compression.cpp index badaf68ab59cd..2c5380bd4e22c 100644 --- a/llvm/lib/Support/Compression.cpp +++ b/llvm/lib/Support/Compression.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/Compression.h" +#include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/Config/config.h" @@ -55,6 +56,18 @@ void compression::compress(Params P, ArrayRef Input, } } +void compression::compressToStream(Params P, ArrayRef Input, + raw_ostream &OS) { + switch (P.format) { + case compression::Format::Zlib: + zlib::compressToStream(Input, OS, P.level); + break; + case compression::Format::Zstd: + zstd::compressToStream(Input, OS, P.level, P.zstdEnableLdm); + break; + } +} + Error compression::decompress(DebugCompressionType T, ArrayRef Input, uint8_t *Output, size_t UncompressedSize) { switch (formatFor(T)) { @@ -120,6 +133,49 @@ void zlib::compress(ArrayRef Input, CompressedBuffer.truncate(CompressedSize); } +void zlib::compressToStream(ArrayRef Input, raw_ostream &OS, + int Level) { + // Allocate a fixed size buffer to hold the output. + constexpr size_t OutBufferSize = 4096; + auto OutBuffer = std::make_unique(OutBufferSize); + + z_stream ZStream; + ZStream.zalloc = Z_NULL; + ZStream.zfree = Z_NULL; + ZStream.opaque = Z_NULL; + + int ZErr = deflateInit(&ZStream, Level); + if (ZErr != Z_OK) + report_bad_alloc_error("Failed to create ZStream"); + + // Ensure that the z_stream is cleaned up on all exit paths. + auto DeflateEndOnExit = make_scope_exit([&]() { deflateEnd(&ZStream); }); + + ZStream.next_in = + reinterpret_cast(const_cast(Input.data())); + ZStream.avail_in = Input.size(); + + // Repeatedly deflate into the output buffer and flush it into the + // output stream. Repeat until we have drained the entire compression + // state. + while (ZErr != Z_STREAM_END) { + ZStream.next_out = OutBuffer.get(); + ZStream.avail_out = OutBufferSize; + + ZErr = deflate(&ZStream, Z_FINISH); + if (ZErr == Z_STREAM_ERROR || ZErr == Z_BUF_ERROR) + report_fatal_error(convertZlibCodeToString(ZErr)); + + // Tell MemorySanitizer that zlib output buffer is fully initialized. + // This avoids a false report when running LLVM with uninstrumented ZLib. + __msan_unpoison(OutputBuffer.data(), OutBufferSize - ZStream.avail_out); + + if (ZStream.avail_out < OutBufferSize) + OS.write(reinterpret_cast(OutBuffer.get()), + OutBufferSize - ZStream.avail_out); + } +} + Error zlib::decompress(ArrayRef Input, uint8_t *Output, size_t &UncompressedSize) { int Res = ::uncompress((Bytef *)Output, (uLongf *)&UncompressedSize, @@ -148,6 +204,10 @@ void zlib::compress(ArrayRef Input, SmallVectorImpl &CompressedBuffer, int Level) { llvm_unreachable("zlib::compress is unavailable"); } +void zlib::compressToStream(ArrayRef Input, raw_ostream &OS, + int Level = DefaultCompression) { + llvm_unreachable("zlib::compressToStream is unavailable"); +} Error zlib::decompress(ArrayRef Input, uint8_t *UncompressedBuffer, size_t &UncompressedSize) { llvm_unreachable("zlib::decompress is unavailable"); @@ -201,6 +261,51 @@ void zstd::compress(ArrayRef Input, CompressedBuffer.truncate(CompressedSize); } +void zstd::compressToStream(ArrayRef Input, raw_ostream &OS, int Level, + bool EnableLdm) { + // Allocate a buffer to hold the output. + size_t OutBufferSize = ZSTD_CStreamOutSize(); + auto OutBuffer = std::make_unique(OutBufferSize); + + ZSTD_CStream *CStream = ZSTD_createCStream(); + if (!CStream) + report_bad_alloc_error("Failed to create ZSTD_CCtx"); + + // Ensure that the ZSTD_CStream is cleaned up on all exit paths. + auto FreeCStreamOnExit = + make_scope_exit([=]() { ZSTD_freeCStream(CStream); }); + + if (ZSTD_isError(ZSTD_CCtx_setParameter( + CStream, ZSTD_c_enableLongDistanceMatching, EnableLdm ? 1 : 0))) { + report_bad_alloc_error("Failed to set ZSTD_c_enableLongDistanceMatching"); + } + + if (ZSTD_isError( + ZSTD_CCtx_setParameter(CStream, ZSTD_c_compressionLevel, Level))) { + report_bad_alloc_error("Failed to set ZSTD_c_compressionLevel"); + } + + ZSTD_inBuffer ZInput = {Input.data(), Input.size(), 0}; + + // Repeatedly compress into the output buffer and flush it into the + // output stream. Repeat until we have drained the entire compression + // state. + size_t ZRet; + do { + ZSTD_outBuffer ZOutput = {OutBuffer.get(), OutBufferSize, 0}; + ZRet = ZSTD_compressStream2(CStream, &ZOutput, &ZInput, ZSTD_e_end); + if (ZSTD_isError(ZRet)) + report_fatal_error(ZSTD_getErrorName(ZRet)); + + // Tell MemorySanitizer that zstd output buffer is fully initialized. + // This avoids a false report when running LLVM with uninstrumented ZStd. + __msan_unpoison(OutputBuffer.data(), ZOutput.pos); + + if (ZOutput.pos > 0) + OS.write(reinterpret_cast(OutBuffer.get()), ZOutput.pos); + } while (ZRet != 0); +} + Error zstd::decompress(ArrayRef Input, uint8_t *Output, size_t &UncompressedSize) { const size_t Res = ::ZSTD_decompress( @@ -231,6 +336,11 @@ void zstd::compress(ArrayRef Input, bool EnableLdm) { llvm_unreachable("zstd::compress is unavailable"); } +void zstd::compressToStream(ArrayRef Input, raw_ostream &OS, + int Level = DefaultCompression, + bool EnableLdm = false) { + llvm_unreachable("zstd::compressToStream is unavailable"); +} Error zstd::decompress(ArrayRef Input, uint8_t *Output, size_t &UncompressedSize) { llvm_unreachable("zstd::decompress is unavailable"); diff --git a/llvm/unittests/Support/CompressionTest.cpp b/llvm/unittests/Support/CompressionTest.cpp index 5d326cafbe3a1..ead1007404a5f 100644 --- a/llvm/unittests/Support/CompressionTest.cpp +++ b/llvm/unittests/Support/CompressionTest.cpp @@ -28,6 +28,15 @@ static void testZlibCompression(StringRef Input, int Level) { SmallVector Uncompressed; zlib::compress(arrayRefFromStringRef(Input), Compressed, Level); + // Check that stream compression results are the same as bulk compression. + SmallVector StreamCompressed; + raw_svector_ostream Stream(StreamCompressed); + zlib::compressToStream(arrayRefFromStringRef(Input), Stream, Level); + EXPECT_EQ(StreamCompressed.size(), Compressed.size()); + for (size_t i = 0, e = StreamCompressed.size(); i != e; ++i) { + EXPECT_EQ(llvm::bit_cast(StreamCompressed[i]), Compressed[i]); + } + // Check that uncompressed buffer is the same as original. Error E = zlib::decompress(Compressed, Uncompressed, Input.size()); EXPECT_FALSE(std::move(E)); @@ -73,6 +82,15 @@ static void testZstdCompression(StringRef Input, int Level) { SmallVector Uncompressed; zstd::compress(arrayRefFromStringRef(Input), Compressed, Level); + // Check that stream compression results are the same as bulk compression. + SmallVector StreamCompressed; + raw_svector_ostream Stream(StreamCompressed); + zstd::compressToStream(arrayRefFromStringRef(Input), Stream, Level); + EXPECT_EQ(StreamCompressed.size(), Compressed.size()); + for (size_t i = 0, e = StreamCompressed.size(); i != e; ++i) { + EXPECT_EQ(llvm::bit_cast(StreamCompressed[i]), Compressed[i]); + } + // Check that uncompressed buffer is the same as original. Error E = zstd::decompress(Compressed, Uncompressed, Input.size()); EXPECT_FALSE(std::move(E));