Skip to content

Commit f0e6be1

Browse files
committed
[𝘀𝗽𝗿] initial version
Created using spr 1.3.5-bogner
1 parent 8628ff3 commit f0e6be1

File tree

3 files changed

+206
-1
lines changed

3 files changed

+206
-1
lines changed

llvm/include/llvm/Support/LEB128.h

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,34 @@ LLVM_ABI extern unsigned getULEB128Size(uint64_t Value);
252252
/// Utility function to get the size of the SLEB128-encoded value.
253253
LLVM_ABI extern unsigned getSLEB128Size(int64_t Value);
254254

255+
// Unsigned Counted LEB128: A variant of LEB128 where the length information is
256+
// determined by counting trailing zero bits in the first byte. Specifically, if
257+
// the first byte has n-1 trailing zeros, then the encoded integer occupies n
258+
// bytes total. The special case of a zero first byte signals a 9-byte encoding.
259+
//
260+
// The remaining bits in the first byte, plus all subsequent bytes, contain the
261+
// actual value in little-endian order.
262+
263+
// clang-format off
264+
// xxxxxxx1: 7 value bits, 1 byte
265+
// xxxxxx10 xxxxxxxx: 14 value bits, 2 bytes
266+
// xxxxx100 xxxxxxxx xxxxxxxx: 21 value bits, 3 bytes
267+
// xxxx1000 xxxxxxxx xxxxxxxx xxxxxxxx: 28 value bits, 4 bytes
268+
// xxx10000 xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx: 35 value bits, 5 bytes
269+
// xx100000 xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx: 42 value bits, 6 bytes
270+
// x1000000 xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx: 49 value bits, 7 bytes
271+
// 10000000 xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx: 56 value bits, 8 bytes
272+
//
273+
// 00000000 xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx: 64 value bits, 9 bytes
274+
// The last byte should not be 0.
275+
// clang-format on
276+
LLVM_ABI void encodeUCLeb128(uint64_t x, raw_ostream &os);
277+
LLVM_ABI uint64_t getUCLeb128(const uint8_t *&p, const uint8_t *end);
278+
LLVM_ABI uint64_t getUCLeb128Unsafe(const uint8_t *&p);
279+
280+
// Note: If we introduce signed version of CLEB128, we should use sign extension
281+
// instead of zig-zag encoding. Sign extension actually generates faster code.
282+
255283
} // namespace llvm
256284

257285
#endif // LLVM_SUPPORT_LEB128_H

llvm/lib/Support/LEB128.cpp

Lines changed: 92 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,13 @@
1212
//===----------------------------------------------------------------------===//
1313

1414
#include "llvm/Support/LEB128.h"
15+
#include "llvm/ADT/bit.h"
16+
#include "llvm/Support/Endian.h"
17+
#include "llvm/Support/EndianStream.h"
18+
#include "llvm/Support/raw_ostream.h"
19+
20+
using namespace llvm;
21+
using namespace llvm::support;
1522

1623
namespace llvm {
1724

@@ -39,5 +46,89 @@ unsigned getSLEB128Size(int64_t Value) {
3946
} while (IsMore);
4047
return Size;
4148
}
42-
4349
} // namespace llvm
50+
51+
void llvm::encodeUCLeb128(uint64_t x, raw_ostream &os) {
52+
// Fast path for n == 1
53+
if (x < 128) {
54+
os.write((x << 1) | 1);
55+
return;
56+
}
57+
58+
unsigned significantBits = 64 - countl_zero(x);
59+
unsigned n = (significantBits + 6) / 7;
60+
if (n > 8) {
61+
// 9 bytes: 00000000 xxxxxxxx ...
62+
os.write(0);
63+
endian::write(os, x, endianness::little);
64+
return;
65+
}
66+
67+
uint64_t tagged = endian::byte_swap((x << n) | ((uint64_t)1 << (n - 1)),
68+
endianness::little);
69+
os.write((const char *)&tagged, n);
70+
}
71+
72+
template <int n>
73+
static inline uint64_t getUCLeb128Case(const uint8_t *&p, uint8_t byte) {
74+
uint64_t val = byte >> n;
75+
int shift = 8 - n;
76+
for (int i = 1; i < n; ++i) {
77+
val |= (uint64_t)p[i] << shift;
78+
shift += 8;
79+
}
80+
p += n;
81+
return val;
82+
}
83+
84+
template <bool CheckBounds>
85+
static uint64_t getUCLeb128Impl(const uint8_t *&p, const uint8_t *end) {
86+
if constexpr (CheckBounds) {
87+
if (p >= end)
88+
return 0;
89+
}
90+
// Fast path for n == 1
91+
uint8_t b0 = p[0];
92+
if (b0 & 1) {
93+
++p;
94+
return b0 >> 1;
95+
}
96+
97+
unsigned n = llvm::countr_zero(b0) + 1;
98+
if constexpr (CheckBounds) {
99+
if (end - p < n)
100+
return 0;
101+
}
102+
// Note: If n < 9 and we allow out-of-bounds read, we can use read64le(p) <<
103+
// (64-8*n) >> (64-7*n) instead of the following switch statement.
104+
switch (n) {
105+
case 1:
106+
return getUCLeb128Case<1>(p, b0);
107+
case 2:
108+
return getUCLeb128Case<2>(p, b0);
109+
case 3:
110+
return getUCLeb128Case<3>(p, b0);
111+
case 4:
112+
return getUCLeb128Case<4>(p, b0);
113+
case 5:
114+
return getUCLeb128Case<5>(p, b0);
115+
case 6:
116+
return getUCLeb128Case<6>(p, b0);
117+
case 7:
118+
return getUCLeb128Case<7>(p, b0);
119+
case 8:
120+
return getUCLeb128Case<8>(p, b0);
121+
default:
122+
// 9 bytes: 00000000 xxxxxxxx ...
123+
p += 9;
124+
return endian::read64le(p - 8);
125+
}
126+
}
127+
128+
uint64_t llvm::getUCLeb128(const uint8_t *&p, const uint8_t *end) {
129+
return getUCLeb128Impl<true>(p, end);
130+
}
131+
132+
uint64_t llvm::getUCLeb128Unsafe(const uint8_t *&p) {
133+
return getUCLeb128Impl<false>(p, nullptr);
134+
}

llvm/unittests/Support/LEB128Test.cpp

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -474,4 +474,90 @@ TEST(LEB128Test, ULEB128Size) {
474474
EXPECT_EQ(10u, getULEB128Size(UINT64_MAX));
475475
}
476476

477+
TEST(CLeb128Test, get) {
478+
#define EXPECT_CLEB128(VALUE, EXPECTED, SIZE) \
479+
do { \
480+
const uint8_t *V = reinterpret_cast<const uint8_t *>(VALUE); \
481+
const uint8_t *P = V; \
482+
const uint8_t *End = V + sizeof(VALUE) - 1; \
483+
uint64_t Result = getUCLeb128(P, End); \
484+
EXPECT_EQ(Result, EXPECTED); \
485+
EXPECT_EQ(P - V, SIZE); \
486+
P = V; \
487+
Result = getUCLeb128Unsafe(P); \
488+
EXPECT_EQ(Result, EXPECTED); \
489+
EXPECT_EQ(P - V, SIZE); \
490+
} while (0)
491+
492+
// Fast path: single byte with LSB = 1 (value = byte >> 1)
493+
EXPECT_CLEB128("\x01", 0u, 1);
494+
EXPECT_CLEB128("\x7f", 63u, 1);
495+
EXPECT_CLEB128("\xff", 127u, 1);
496+
EXPECT_CLEB128("\x02\x02", 128u, 2);
497+
EXPECT_CLEB128("\x00\x00\x01\x00\x00\x00\x00\x00\x00", 256u, 9);
498+
499+
// Test (1<<56)-2
500+
EXPECT_CLEB128("\x80\xfe\xff\xff\xff\xff\xff\xff", 0xfffffffffffffeu, 8);
501+
EXPECT_CLEB128("\x00\xfe\xff\xff\xff\xff\xff\xff\x00", 0xfffffffffffffeu, 9);
502+
503+
#undef EXPECT_CLEB128
504+
505+
// Test bounds checking in safe version
506+
{
507+
const uint8_t data[] = {0x02, 0x02}; // 2-byte encoding for 128
508+
const uint8_t *p = data;
509+
510+
// Insufficient buffer (should return 0)
511+
p = data;
512+
EXPECT_EQ(getUCLeb128(p, data + 1), 0u);
513+
EXPECT_EQ(p, data);
514+
515+
// Empty buffer
516+
p = data;
517+
EXPECT_EQ(getUCLeb128(p, data), 0u);
518+
EXPECT_EQ(p, data);
519+
}
520+
521+
// Test 9-byte format bounds checking
522+
{
523+
const uint8_t data[] = {0x00, 0x01, 0x02, 0x03, 0x04,
524+
0x05, 0x06, 0x07, 0x08, 0x09};
525+
const uint8_t *p = data;
526+
527+
// Sufficient buffer for 9-byte format
528+
EXPECT_EQ(getUCLeb128(p, data + 10), 0x0807060504030201ULL);
529+
530+
// Insufficient buffer for 9-byte format
531+
p = data;
532+
EXPECT_EQ(getUCLeb128(p, data + 8), 0u);
533+
}
534+
}
535+
536+
TEST(CLeb128Test, encode) {
537+
// Test round-trip consistency for all encoding lengths.
538+
const uint64_t vals[] = {
539+
0, // 1 byte
540+
128, // 2 bytes
541+
(1ULL << 14) + 2, // 3 bytes
542+
(1ULL << 21) + 3, // 4 bytes
543+
(1ULL << 28) + 4, // 5 bytes
544+
(1ULL << 35) + 5, // 6 bytes
545+
(1ULL << 42) + 6, // 7 bytes
546+
(1ULL << 49) + 7, // 8 bytes
547+
UINT64_MAX / 2, // 9 bytes
548+
UINT64_MAX - 1, // 9 bytes
549+
};
550+
for (uint64_t val : vals) {
551+
std::string encoded;
552+
raw_string_ostream os(encoded);
553+
encodeUCLeb128(val, os);
554+
555+
const uint8_t *p0 = reinterpret_cast<const uint8_t *>(encoded.data());
556+
const uint8_t *p = p0;
557+
uint64_t decoded = getUCLeb128Unsafe(p);
558+
EXPECT_EQ(val, decoded) << "Round-trip failed for value " << val;
559+
EXPECT_EQ(p - p0, encoded.size());
560+
}
561+
}
562+
477563
} // anonymous namespace

0 commit comments

Comments
 (0)