@@ -48,29 +48,13 @@ int VarintLength(uint64_t v);
4848char * EncodeVarint32 (char * dst, uint32_t value);
4949char * EncodeVarint64 (char * dst, uint64_t value);
5050
51- // TODO(costan): Remove port::kLittleEndian and the fast paths based on
52- // std::memcpy when clang learns to optimize the generic code, as
53- // described in https://bugs.llvm.org/show_bug.cgi?id=41761
54- //
55- // The platform-independent code in DecodeFixed{32,64}() gets optimized to mov
56- // on x86 and ldr on ARM64, by both clang and gcc. However, only gcc optimizes
57- // the platform-independent code in EncodeFixed{32,64}() to mov / str.
58-
5951// Lower-level versions of Put... that write directly into a character buffer
6052// REQUIRES: dst has enough space for the value being written
6153
6254inline void EncodeFixed32 (char * dst, uint32_t value) {
6355 uint8_t * const buffer = reinterpret_cast <uint8_t *>(dst);
6456
65- if (port::kLittleEndian ) {
66- // Fast path for little-endian CPUs. All major compilers optimize this to a
67- // single mov (x86_64) / str (ARM) instruction.
68- std::memcpy (buffer, &value, sizeof (uint32_t ));
69- return ;
70- }
71-
72- // Platform-independent code.
73- // Currently, only gcc optimizes this to a single mov / str instruction.
57+ // Recent clang and gcc optimize this to a single mov / str instruction.
7458 buffer[0 ] = static_cast <uint8_t >(value);
7559 buffer[1 ] = static_cast <uint8_t >(value >> 8 );
7660 buffer[2 ] = static_cast <uint8_t >(value >> 16 );
@@ -80,15 +64,7 @@ inline void EncodeFixed32(char* dst, uint32_t value) {
8064inline void EncodeFixed64 (char * dst, uint64_t value) {
8165 uint8_t * const buffer = reinterpret_cast <uint8_t *>(dst);
8266
83- if (port::kLittleEndian ) {
84- // Fast path for little-endian CPUs. All major compilers optimize this to a
85- // single mov (x86_64) / str (ARM) instruction.
86- std::memcpy (buffer, &value, sizeof (uint64_t ));
87- return ;
88- }
89-
90- // Platform-independent code.
91- // Currently, only gcc optimizes this to a single mov / str instruction.
67+ // Recent clang and gcc optimize this to a single mov / str instruction.
9268 buffer[0 ] = static_cast <uint8_t >(value);
9369 buffer[1 ] = static_cast <uint8_t >(value >> 8 );
9470 buffer[2 ] = static_cast <uint8_t >(value >> 16 );
@@ -105,16 +81,7 @@ inline void EncodeFixed64(char* dst, uint64_t value) {
10581inline uint32_t DecodeFixed32 (const char * ptr) {
10682 const uint8_t * const buffer = reinterpret_cast <const uint8_t *>(ptr);
10783
108- if (port::kLittleEndian ) {
109- // Fast path for little-endian CPUs. All major compilers optimize this to a
110- // single mov (x86_64) / ldr (ARM) instruction.
111- uint32_t result;
112- std::memcpy (&result, buffer, sizeof (uint32_t ));
113- return result;
114- }
115-
116- // Platform-independent code.
117- // Clang and gcc optimize this to a single mov / ldr instruction.
84+ // Recent clang and gcc optimize this to a single mov / ldr instruction.
11885 return (static_cast <uint32_t >(buffer[0 ])) |
11986 (static_cast <uint32_t >(buffer[1 ]) << 8 ) |
12087 (static_cast <uint32_t >(buffer[2 ]) << 16 ) |
@@ -124,16 +91,7 @@ inline uint32_t DecodeFixed32(const char* ptr) {
12491inline uint64_t DecodeFixed64 (const char * ptr) {
12592 const uint8_t * const buffer = reinterpret_cast <const uint8_t *>(ptr);
12693
127- if (port::kLittleEndian ) {
128- // Fast path for little-endian CPUs. All major compilers optimize this to a
129- // single mov (x86_64) / ldr (ARM) instruction.
130- uint64_t result;
131- std::memcpy (&result, buffer, sizeof (uint64_t ));
132- return result;
133- }
134-
135- // Platform-independent code.
136- // Clang and gcc optimize this to a single mov / ldr instruction.
94+ // Recent clang and gcc optimize this to a single mov / ldr instruction.
13795 return (static_cast <uint64_t >(buffer[0 ])) |
13896 (static_cast <uint64_t >(buffer[1 ]) << 8 ) |
13997 (static_cast <uint64_t >(buffer[2 ]) << 16 ) |
0 commit comments