Skip to content

Commit c4c6a65

Browse files
authored
Merge pull request Tencent#1548 from TranslucentTB/master
Fix ARM NEON under MSVC
2 parents bb5f966 + e54aca7 commit c4c6a65

File tree

5 files changed

+108
-42
lines changed

5 files changed

+108
-42
lines changed

include/rapidjson/internal/clzll.h

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
// Tencent is pleased to support the open source community by making RapidJSON available.
2+
//
3+
// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
4+
//
5+
// Licensed under the MIT License (the "License"); you may not use this file except
6+
// in compliance with the License. You may obtain a copy of the License at
7+
//
8+
// http://opensource.org/licenses/MIT
9+
//
10+
// Unless required by applicable law or agreed to in writing, software distributed
11+
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12+
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
13+
// specific language governing permissions and limitations under the License.
14+
15+
#ifndef RAPIDJSON_CLZLL_H_
16+
#define RAPIDJSON_CLZLL_H_
17+
18+
#include "../rapidjson.h"
19+
20+
#if defined(_MSC_VER)
21+
#include <intrin.h>
22+
#if defined(_WIN64)
23+
#pragma intrinsic(_BitScanReverse64)
24+
#else
25+
#pragma intrinsic(_BitScanReverse)
26+
#endif
27+
#endif
28+
29+
RAPIDJSON_NAMESPACE_BEGIN
30+
namespace internal {
31+
32+
#if (defined(__GNUC__) && __GNUC__ >= 4) || RAPIDJSON_HAS_BUILTIN(__builtin_clzll)
33+
#define RAPIDJSON_CLZLL __builtin_clzll
34+
#else
35+
36+
inline uint32_t clzll(uint64_t x) {
37+
// Passing 0 to __builtin_clzll is UB in GCC and results in an
38+
// infinite loop in the software implementation.
39+
RAPIDJSON_ASSERT(x != 0);
40+
41+
#if defined(_MSC_VER)
42+
unsigned long r = 0;
43+
#if defined(_WIN64)
44+
_BitScanReverse64(&r, x);
45+
#else
46+
// Scan the high 32 bits.
47+
if (_BitScanReverse(&r, static_cast<uint32_t>(x >> 32)))
48+
return 63 - (r + 32);
49+
50+
// Scan the low 32 bits.
51+
_BitScanReverse(&r, static_cast<uint32_t>(x & 0xFFFFFFFF));
52+
#endif // _WIN64
53+
54+
return 63 - r;
55+
#else
56+
uint32_t r;
57+
while (!(x & (static_cast<uint64_t>(1) << 63))) {
58+
x <<= 1;
59+
++r;
60+
}
61+
62+
return r;
63+
#endif // _MSC_VER
64+
}
65+
66+
#define RAPIDJSON_CLZLL RAPIDJSON_NAMESPACE::internal::clzll
67+
#endif // (defined(__GNUC__) && __GNUC__ >= 4) || RAPIDJSON_HAS_BUILTIN(__builtin_clzll)
68+
69+
} // namespace internal
70+
RAPIDJSON_NAMESPACE_END
71+
72+
#endif // RAPIDJSON_CLZLL_H_

include/rapidjson/internal/diyfp.h

Lines changed: 2 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,11 @@
2020
#define RAPIDJSON_DIYFP_H_
2121

2222
#include "../rapidjson.h"
23+
#include "clzll.h"
2324
#include <limits>
2425

2526
#if defined(_MSC_VER) && defined(_M_AMD64) && !defined(__INTEL_COMPILER)
2627
#include <intrin.h>
27-
#pragma intrinsic(_BitScanReverse64)
2828
#pragma intrinsic(_umul128)
2929
#endif
3030

@@ -100,22 +100,8 @@ struct DiyFp {
100100
}
101101

102102
DiyFp Normalize() const {
103-
RAPIDJSON_ASSERT(f != 0); // https://stackoverflow.com/a/26809183/291737
104-
#if defined(_MSC_VER) && defined(_M_AMD64)
105-
unsigned long index;
106-
_BitScanReverse64(&index, f);
107-
return DiyFp(f << (63 - index), e - (63 - index));
108-
#elif defined(__GNUC__) && __GNUC__ >= 4
109-
int s = __builtin_clzll(f);
103+
int s = static_cast<int>(RAPIDJSON_CLZLL(f));
110104
return DiyFp(f << s, e - s);
111-
#else
112-
DiyFp res = *this;
113-
while (!(res.f & (static_cast<uint64_t>(1) << 63))) {
114-
res.f <<= 1;
115-
res.e--;
116-
}
117-
return res;
118-
#endif
119105
}
120106

121107
DiyFp NormalizeBoundary() const {

include/rapidjson/rapidjson.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -490,6 +490,12 @@ RAPIDJSON_NAMESPACE_END
490490
#define RAPIDJSON_VERSION_CODE(x,y,z) \
491491
(((x)*100000) + ((y)*100) + (z))
492492

493+
#if defined(__has_builtin)
494+
#define RAPIDJSON_HAS_BUILTIN(x) __has_builtin(x)
495+
#else
496+
#define RAPIDJSON_HAS_BUILTIN(x) 0
497+
#endif
498+
493499
///////////////////////////////////////////////////////////////////////////////
494500
// RAPIDJSON_DIAG_PUSH/POP, RAPIDJSON_DIAG_OFF
495501

include/rapidjson/reader.h

Lines changed: 22 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "allocators.h"
2121
#include "stream.h"
2222
#include "encodedstream.h"
23+
#include "internal/clzll.h"
2324
#include "internal/meta.h"
2425
#include "internal/stack.h"
2526
#include "internal/strtod.h"
@@ -443,16 +444,16 @@ inline const char *SkipWhitespace_SIMD(const char* p) {
443444

444445
x = vmvnq_u8(x); // Negate
445446
x = vrev64q_u8(x); // Rev in 64
446-
uint64_t low = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 0); // extract
447-
uint64_t high = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 1); // extract
447+
uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0); // extract
448+
uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1); // extract
448449

449450
if (low == 0) {
450451
if (high != 0) {
451-
int lz =__builtin_clzll(high);;
452+
uint32_t lz = RAPIDJSON_CLZLL(high);
452453
return p + 8 + (lz >> 3);
453454
}
454455
} else {
455-
int lz = __builtin_clzll(low);;
456+
uint32_t lz = RAPIDJSON_CLZLL(low);
456457
return p + (lz >> 3);
457458
}
458459
}
@@ -479,16 +480,16 @@ inline const char *SkipWhitespace_SIMD(const char* p, const char* end) {
479480

480481
x = vmvnq_u8(x); // Negate
481482
x = vrev64q_u8(x); // Rev in 64
482-
uint64_t low = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 0); // extract
483-
uint64_t high = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 1); // extract
483+
uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0); // extract
484+
uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1); // extract
484485

485486
if (low == 0) {
486487
if (high != 0) {
487-
int lz = __builtin_clzll(high);
488+
uint32_t lz = RAPIDJSON_CLZLL(high);
488489
return p + 8 + (lz >> 3);
489490
}
490491
} else {
491-
int lz = __builtin_clzll(low);
492+
uint32_t lz = RAPIDJSON_CLZLL(low);
492493
return p + (lz >> 3);
493494
}
494495
}
@@ -1244,19 +1245,19 @@ class GenericReader {
12441245
x = vorrq_u8(x, vcltq_u8(s, s3));
12451246

12461247
x = vrev64q_u8(x); // Rev in 64
1247-
uint64_t low = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 0); // extract
1248-
uint64_t high = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 1); // extract
1248+
uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0); // extract
1249+
uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1); // extract
12491250

12501251
SizeType length = 0;
12511252
bool escaped = false;
12521253
if (low == 0) {
12531254
if (high != 0) {
1254-
unsigned lz = (unsigned)__builtin_clzll(high);;
1255+
uint32_t lz = RAPIDJSON_CLZLL(high);
12551256
length = 8 + (lz >> 3);
12561257
escaped = true;
12571258
}
12581259
} else {
1259-
unsigned lz = (unsigned)__builtin_clzll(low);;
1260+
uint32_t lz = RAPIDJSON_CLZLL(low);
12601261
length = lz >> 3;
12611262
escaped = true;
12621263
}
@@ -1314,19 +1315,19 @@ class GenericReader {
13141315
x = vorrq_u8(x, vcltq_u8(s, s3));
13151316

13161317
x = vrev64q_u8(x); // Rev in 64
1317-
uint64_t low = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 0); // extract
1318-
uint64_t high = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 1); // extract
1318+
uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0); // extract
1319+
uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1); // extract
13191320

13201321
SizeType length = 0;
13211322
bool escaped = false;
13221323
if (low == 0) {
13231324
if (high != 0) {
1324-
unsigned lz = (unsigned)__builtin_clzll(high);
1325+
uint32_t lz = RAPIDJSON_CLZLL(high);
13251326
length = 8 + (lz >> 3);
13261327
escaped = true;
13271328
}
13281329
} else {
1329-
unsigned lz = (unsigned)__builtin_clzll(low);
1330+
uint32_t lz = RAPIDJSON_CLZLL(low);
13301331
length = lz >> 3;
13311332
escaped = true;
13321333
}
@@ -1370,17 +1371,17 @@ class GenericReader {
13701371
x = vorrq_u8(x, vcltq_u8(s, s3));
13711372

13721373
x = vrev64q_u8(x); // Rev in 64
1373-
uint64_t low = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 0); // extract
1374-
uint64_t high = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 1); // extract
1374+
uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0); // extract
1375+
uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1); // extract
13751376

13761377
if (low == 0) {
13771378
if (high != 0) {
1378-
int lz = __builtin_clzll(high);
1379+
uint32_t lz = RAPIDJSON_CLZLL(high);
13791380
p += 8 + (lz >> 3);
13801381
break;
13811382
}
13821383
} else {
1383-
int lz = __builtin_clzll(low);
1384+
uint32_t lz = RAPIDJSON_CLZLL(low);
13841385
p += lz >> 3;
13851386
break;
13861387
}
@@ -1403,7 +1404,7 @@ class GenericReader {
14031404
RAPIDJSON_FORCEINLINE Ch Peek() const { return is.Peek(); }
14041405
RAPIDJSON_FORCEINLINE Ch TakePush() { return is.Take(); }
14051406
RAPIDJSON_FORCEINLINE Ch Take() { return is.Take(); }
1406-
RAPIDJSON_FORCEINLINE void Push(char) {}
1407+
RAPIDJSON_FORCEINLINE void Push(char) {}
14071408

14081409
size_t Tell() { return is.Tell(); }
14091410
size_t Length() { return 0; }

include/rapidjson/writer.h

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#define RAPIDJSON_WRITER_H_
1717

1818
#include "stream.h"
19+
#include "internal/clzll.h"
1920
#include "internal/meta.h"
2021
#include "internal/stack.h"
2122
#include "internal/strfunc.h"
@@ -226,7 +227,7 @@ class Writer {
226227
return Key(str.data(), SizeType(str.size()));
227228
}
228229
#endif
229-
230+
230231
bool EndObject(SizeType memberCount = 0) {
231232
(void)memberCount;
232233
RAPIDJSON_ASSERT(level_stack_.GetSize() >= sizeof(Level)); // not inside an Object
@@ -668,19 +669,19 @@ inline bool Writer<StringBuffer>::ScanWriteUnescapedString(StringStream& is, siz
668669
x = vorrq_u8(x, vcltq_u8(s, s3));
669670

670671
x = vrev64q_u8(x); // Rev in 64
671-
uint64_t low = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 0); // extract
672-
uint64_t high = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 1); // extract
672+
uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0); // extract
673+
uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1); // extract
673674

674675
SizeType len = 0;
675676
bool escaped = false;
676677
if (low == 0) {
677678
if (high != 0) {
678-
unsigned lz = (unsigned)__builtin_clzll(high);
679+
uint32_t lz = RAPIDJSON_CLZLL(high);
679680
len = 8 + (lz >> 3);
680681
escaped = true;
681682
}
682683
} else {
683-
unsigned lz = (unsigned)__builtin_clzll(low);
684+
uint32_t lz = RAPIDJSON_CLZLL(low);
684685
len = lz >> 3;
685686
escaped = true;
686687
}

0 commit comments

Comments
 (0)