Skip to content

Commit f64de31

Browse files
committed
Implement streaming compression for compressed ELF sections.
1 parent 1803d67 commit f64de31

File tree

4 files changed

+147
-14
lines changed

4 files changed

+147
-14
lines changed

llvm/include/llvm/Support/Compression.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
namespace llvm {
2020
template <typename T> class SmallVectorImpl;
2121
class Error;
22+
class raw_ostream;
2223

2324
// None indicates no compression. The other members are a subset of
2425
// compression::Format, which is used for compressed debug sections in some
@@ -44,6 +45,9 @@ void compress(ArrayRef<uint8_t> Input,
4445
SmallVectorImpl<uint8_t> &CompressedBuffer,
4546
int Level = DefaultCompression);
4647

48+
void compressToStream(ArrayRef<uint8_t> Input, raw_ostream &OS,
49+
int Level = DefaultCompression);
50+
4751
Error decompress(ArrayRef<uint8_t> Input, uint8_t *Output,
4852
size_t &UncompressedSize);
4953

@@ -65,6 +69,9 @@ void compress(ArrayRef<uint8_t> Input,
6569
SmallVectorImpl<uint8_t> &CompressedBuffer,
6670
int Level = DefaultCompression, bool EnableLdm = false);
6771

72+
void compressToStream(ArrayRef<uint8_t> Input, raw_ostream &OS,
73+
int Level = DefaultCompression, bool EnableLdm = false);
74+
6875
Error decompress(ArrayRef<uint8_t> Input, uint8_t *Output,
6976
size_t &UncompressedSize);
7077

@@ -116,6 +123,10 @@ const char *getReasonIfUnsupported(Format F);
116123
void compress(Params P, ArrayRef<uint8_t> Input,
117124
SmallVectorImpl<uint8_t> &Output);
118125

126+
// Compress Input into a raw_ostream, without buffering the entire compressed
127+
// output. Compression parameters are the same as for `compress`.
128+
void compressToStream(Params P, ArrayRef<uint8_t> Input, raw_ostream &OS);
129+
119130
// Decompress Input. The uncompressed size must be available.
120131
Error decompress(DebugCompressionType T, ArrayRef<uint8_t> Input,
121132
uint8_t *Output, size_t UncompressedSize);

llvm/lib/MC/ELFObjectWriter.cpp

Lines changed: 8 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -144,9 +144,7 @@ struct ELFWriter {
144144

145145
uint64_t align(Align Alignment);
146146

147-
bool maybeWriteCompression(uint32_t ChType, uint64_t Size,
148-
SmallVectorImpl<uint8_t> &CompressedContents,
149-
Align Alignment);
147+
bool maybeWriteCompression(uint32_t ChType, uint64_t Size, Align Alignment);
150148

151149
public:
152150
ELFWriter(ELFObjectWriter &OWriter, raw_pwrite_stream &OS,
@@ -841,12 +839,9 @@ MCSectionELF *ELFWriter::createRelocationSection(MCContext &Ctx,
841839
}
842840

843841
// Include the debug info compression header.
844-
bool ELFWriter::maybeWriteCompression(
845-
uint32_t ChType, uint64_t Size,
846-
SmallVectorImpl<uint8_t> &CompressedContents, Align Alignment) {
847-
uint64_t HdrSize =
848-
is64Bit() ? sizeof(ELF::Elf64_Chdr) : sizeof(ELF::Elf32_Chdr);
849-
if (Size <= HdrSize + CompressedContents.size())
842+
bool ELFWriter::maybeWriteCompression(uint32_t ChType, uint64_t Size,
843+
Align Alignment) {
844+
if (Size <= 128)
850845
return false;
851846
// Platform specific header is followed by compressed data.
852847
if (is64Bit()) {
@@ -897,10 +892,8 @@ void ELFWriter::writeSectionData(const MCAssembler &Asm, MCSection &Sec,
897892
ChType = ELF::ELFCOMPRESS_ZSTD;
898893
break;
899894
}
900-
compression::compress(compression::Params(CompressionType), Uncompressed,
901-
Compressed);
902-
if (!maybeWriteCompression(ChType, UncompressedData.size(), Compressed,
903-
Sec.getAlign())) {
895+
896+
if (!maybeWriteCompression(ChType, UncompressedData.size(), Sec.getAlign())) {
904897
W.OS << UncompressedData;
905898
return;
906899
}
@@ -909,7 +902,8 @@ void ELFWriter::writeSectionData(const MCAssembler &Asm, MCSection &Sec,
909902
// Alignment field should reflect the requirements of
910903
// the compressed section header.
911904
Section.setAlignment(is64Bit() ? Align(8) : Align(4));
912-
W.OS << toStringRef(Compressed);
905+
compression::compressToStream(compression::Params(CompressionType),
906+
Uncompressed, W.OS);
913907
}
914908

915909
void ELFWriter::WriteSecHdrEntry(uint32_t Name, uint32_t Type, uint64_t Flags,

llvm/lib/Support/Compression.cpp

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
//===----------------------------------------------------------------------===//
1212

1313
#include "llvm/Support/Compression.h"
14+
#include "llvm/ADT/ScopeExit.h"
1415
#include "llvm/ADT/SmallVector.h"
1516
#include "llvm/ADT/StringRef.h"
1617
#include "llvm/Config/config.h"
@@ -55,6 +56,18 @@ void compression::compress(Params P, ArrayRef<uint8_t> Input,
5556
}
5657
}
5758

59+
void compression::compressToStream(Params P, ArrayRef<uint8_t> Input,
60+
raw_ostream &OS) {
61+
switch (P.format) {
62+
case compression::Format::Zlib:
63+
zlib::compressToStream(Input, OS, P.level);
64+
break;
65+
case compression::Format::Zstd:
66+
zstd::compressToStream(Input, OS, P.level, P.zstdEnableLdm);
67+
break;
68+
}
69+
}
70+
5871
Error compression::decompress(DebugCompressionType T, ArrayRef<uint8_t> Input,
5972
uint8_t *Output, size_t UncompressedSize) {
6073
switch (formatFor(T)) {
@@ -120,6 +133,49 @@ void zlib::compress(ArrayRef<uint8_t> Input,
120133
CompressedBuffer.truncate(CompressedSize);
121134
}
122135

136+
void zlib::compressToStream(ArrayRef<uint8_t> Input, raw_ostream &OS,
137+
int Level) {
138+
// Allocate a fixed size buffer to hold the output.
139+
constexpr size_t OutBufferSize = 4096;
140+
auto OutBuffer = std::make_unique<Bytef[]>(OutBufferSize);
141+
142+
z_stream ZStream;
143+
ZStream.zalloc = Z_NULL;
144+
ZStream.zfree = Z_NULL;
145+
ZStream.opaque = Z_NULL;
146+
147+
int ZErr = deflateInit(&ZStream, Level);
148+
if (ZErr != Z_OK)
149+
report_bad_alloc_error("Failed to create ZStream");
150+
151+
// Ensure that the z_stream is cleaned up on all exit paths.
152+
auto DeflateEndOnExit = make_scope_exit([&]() { deflateEnd(&ZStream); });
153+
154+
ZStream.next_in =
155+
reinterpret_cast<Bytef *>(const_cast<uint8_t *>(Input.data()));
156+
ZStream.avail_in = Input.size();
157+
158+
// Repeatedly deflate into the output buffer and flush it into the
159+
// output stream. Repeat until we have drained the entire compression
160+
// state.
161+
while (ZErr != Z_STREAM_END) {
162+
ZStream.next_out = OutBuffer.get();
163+
ZStream.avail_out = OutBufferSize;
164+
165+
ZErr = deflate(&ZStream, Z_FINISH);
166+
if (ZErr == Z_STREAM_ERROR || ZErr == Z_BUF_ERROR)
167+
report_fatal_error(convertZlibCodeToString(ZErr));
168+
169+
// Tell MemorySanitizer that zlib output buffer is fully initialized.
170+
// This avoids a false report when running LLVM with uninstrumented ZLib.
171+
__msan_unpoison(OutputBuffer.data(), OutBufferSize - ZStream.avail_out);
172+
173+
if (ZStream.avail_out < OutBufferSize)
174+
OS.write(reinterpret_cast<char *>(OutBuffer.get()),
175+
OutBufferSize - ZStream.avail_out);
176+
}
177+
}
178+
123179
Error zlib::decompress(ArrayRef<uint8_t> Input, uint8_t *Output,
124180
size_t &UncompressedSize) {
125181
int Res = ::uncompress((Bytef *)Output, (uLongf *)&UncompressedSize,
@@ -148,6 +204,10 @@ void zlib::compress(ArrayRef<uint8_t> Input,
148204
SmallVectorImpl<uint8_t> &CompressedBuffer, int Level) {
149205
llvm_unreachable("zlib::compress is unavailable");
150206
}
207+
void zlib::compressToStream(ArrayRef<uint8_t> Input, raw_ostream &OS,
208+
int Level = DefaultCompression) {
209+
llvm_unreachable("zlib::compressToStream is unavailable");
210+
}
151211
Error zlib::decompress(ArrayRef<uint8_t> Input, uint8_t *UncompressedBuffer,
152212
size_t &UncompressedSize) {
153213
llvm_unreachable("zlib::decompress is unavailable");
@@ -201,6 +261,51 @@ void zstd::compress(ArrayRef<uint8_t> Input,
201261
CompressedBuffer.truncate(CompressedSize);
202262
}
203263

264+
void zstd::compressToStream(ArrayRef<uint8_t> Input, raw_ostream &OS, int Level,
265+
bool EnableLdm) {
266+
// Allocate a buffer to hold the output.
267+
size_t OutBufferSize = ZSTD_CStreamOutSize();
268+
auto OutBuffer = std::make_unique<char[]>(OutBufferSize);
269+
270+
ZSTD_CStream *CStream = ZSTD_createCStream();
271+
if (!CStream)
272+
report_bad_alloc_error("Failed to create ZSTD_CCtx");
273+
274+
// Ensure that the ZSTD_CStream is cleaned up on all exit paths.
275+
auto FreeCStreamOnExit =
276+
make_scope_exit([=]() { ZSTD_freeCStream(CStream); });
277+
278+
if (ZSTD_isError(ZSTD_CCtx_setParameter(
279+
CStream, ZSTD_c_enableLongDistanceMatching, EnableLdm ? 1 : 0))) {
280+
report_bad_alloc_error("Failed to set ZSTD_c_enableLongDistanceMatching");
281+
}
282+
283+
if (ZSTD_isError(
284+
ZSTD_CCtx_setParameter(CStream, ZSTD_c_compressionLevel, Level))) {
285+
report_bad_alloc_error("Failed to set ZSTD_c_compressionLevel");
286+
}
287+
288+
ZSTD_inBuffer ZInput = {Input.data(), Input.size(), 0};
289+
290+
// Repeatedly compress into the output buffer and flush it into the
291+
// output stream. Repeat until we have drained the entire compression
292+
// state.
293+
size_t ZRet;
294+
do {
295+
ZSTD_outBuffer ZOutput = {OutBuffer.get(), OutBufferSize, 0};
296+
ZRet = ZSTD_compressStream2(CStream, &ZOutput, &ZInput, ZSTD_e_end);
297+
if (ZSTD_isError(ZRet))
298+
report_fatal_error(ZSTD_getErrorName(ZRet));
299+
300+
// Tell MemorySanitizer that zstd output buffer is fully initialized.
301+
// This avoids a false report when running LLVM with uninstrumented ZStd.
302+
__msan_unpoison(OutputBuffer.data(), ZOutput.pos);
303+
304+
if (ZOutput.pos > 0)
305+
OS.write(reinterpret_cast<char *>(OutBuffer.get()), ZOutput.pos);
306+
} while (ZRet != 0);
307+
}
308+
204309
Error zstd::decompress(ArrayRef<uint8_t> Input, uint8_t *Output,
205310
size_t &UncompressedSize) {
206311
const size_t Res = ::ZSTD_decompress(
@@ -231,6 +336,11 @@ void zstd::compress(ArrayRef<uint8_t> Input,
231336
bool EnableLdm) {
232337
llvm_unreachable("zstd::compress is unavailable");
233338
}
339+
void zstd::compressToStream(ArrayRef<uint8_t> Input, raw_ostream &OS,
340+
int Level = DefaultCompression,
341+
bool EnableLdm = false) {
342+
llvm_unreachable("zstd::compressToStream is unavailable");
343+
}
234344
Error zstd::decompress(ArrayRef<uint8_t> Input, uint8_t *Output,
235345
size_t &UncompressedSize) {
236346
llvm_unreachable("zstd::decompress is unavailable");

llvm/unittests/Support/CompressionTest.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,15 @@ static void testZlibCompression(StringRef Input, int Level) {
2828
SmallVector<uint8_t, 0> Uncompressed;
2929
zlib::compress(arrayRefFromStringRef(Input), Compressed, Level);
3030

31+
// Check that stream compression results are the same as bulk compression.
32+
SmallVector<char, 0> StreamCompressed;
33+
raw_svector_ostream Stream(StreamCompressed);
34+
zlib::compressToStream(arrayRefFromStringRef(Input), Stream, Level);
35+
EXPECT_EQ(StreamCompressed.size(), Compressed.size());
36+
for (size_t i = 0, e = StreamCompressed.size(); i != e; ++i) {
37+
EXPECT_EQ(llvm::bit_cast<uint8_t>(StreamCompressed[i]), Compressed[i]);
38+
}
39+
3140
// Check that uncompressed buffer is the same as original.
3241
Error E = zlib::decompress(Compressed, Uncompressed, Input.size());
3342
EXPECT_FALSE(std::move(E));
@@ -73,6 +82,15 @@ static void testZstdCompression(StringRef Input, int Level) {
7382
SmallVector<uint8_t, 0> Uncompressed;
7483
zstd::compress(arrayRefFromStringRef(Input), Compressed, Level);
7584

85+
// Check that stream compression results are the same as bulk compression.
86+
SmallVector<char, 0> StreamCompressed;
87+
raw_svector_ostream Stream(StreamCompressed);
88+
zstd::compressToStream(arrayRefFromStringRef(Input), Stream, Level);
89+
EXPECT_EQ(StreamCompressed.size(), Compressed.size());
90+
for (size_t i = 0, e = StreamCompressed.size(); i != e; ++i) {
91+
EXPECT_EQ(llvm::bit_cast<uint8_t>(StreamCompressed[i]), Compressed[i]);
92+
}
93+
7694
// Check that uncompressed buffer is the same as original.
7795
Error E = zstd::decompress(Compressed, Uncompressed, Input.size());
7896
EXPECT_FALSE(std::move(E));

0 commit comments

Comments
 (0)