Skip to content

Commit bb5250a

Browse files
committed
[MERGE #5372 @VSadov] Improving performance of SCA Deserialization
Merge pull request #5372 from VSadov:SCAperf01 Some low-hanging fruit related to SCA deserialization perf. There are also some "Full" changes reducing transient garbage in the cloner. Related to OS:#17883520 Not quite fixing the entire reported perf gap, but should be narrowing it by 10%-20%
2 parents 0269d7f + 944f30a commit bb5250a

File tree

4 files changed

+71
-29
lines changed

4 files changed

+71
-29
lines changed

lib/Common/DataStructures/Comparer.h

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ struct DefaultComparer<size_t>
6868
#ifdef TARGET_64
6969
// For 64 bits we want all 64 bits of the pointer to be represented in the hash code.
7070
uint32 hi = ((UINT_PTR) i >> 32);
71-
uint32 lo = (uint32) (i & 0xFFFFFFFF);
71+
uint32 lo = (uint32)i;
7272
hash_t hash = hi ^ lo;
7373
#else
7474
hash_t hash = i;
@@ -112,19 +112,15 @@ struct RecyclerPointerComparer
112112
}
113113
};
114114

115-
// FNV-1a hash -> https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
116-
// #define CC_HASH_OFFSET_VALUE 2166136261
117-
// #define CC_HASH_LOGIC(hash, byte) \
118-
// hash ^= byte; \
119-
// hash *= 16777619
120-
121-
// previous hash function.
122-
// TODO: hash function below is bad for key distribution.
123-
// FNV-1a above results better but expensive for lookups in small data sets.
124-
#define CC_HASH_OFFSET_VALUE 0
125-
#define CC_HASH_LOGIC(hash, byte) \
126-
hash = _rotl(hash, 7); \
127-
hash ^= byte;
115+
// TODO: FNV is a proven hash especially for short strings, which is common case here.
116+
// Still. it may be worth to consider a more recent block-based hash.
117+
// They tend to be faster, but it need to be examined against typical workloads.
118+
//
119+
// FNV-1a hash -> https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
120+
#define CC_HASH_OFFSET_VALUE 2166136261
121+
#define CC_HASH_LOGIC(hash, byte) \
122+
hash ^= byte; \
123+
hash *= 16777619
128124

129125
template <>
130126
struct DefaultComparer<GUID>

lib/Common/DataStructures/SizePolicy.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,13 @@ struct PowerOf2Policy
3232
inline static hash_t GetBucket(hash_t hashCode, uint size, int modFunctionIndex)
3333
{
3434
AssertMsg(Math::IsPow2(size) && size > 1, "Size is not a power of 2.");
35+
36+
// we often deal with keys that differ in higher bits only, so smudge the entropy down a little
37+
// we do not need a perfect avalanche here, since this is for a hashtable lookup
38+
// the following is sufficient and reasonably cheap
39+
hashCode ^= (uint)hashCode >> 15;
40+
hashCode ^= (uint)hashCode >> 7;
41+
3542
hash_t targetBucket = hashCode & (size-1);
3643
return targetBucket;
3744
}

lib/Runtime/Base/ThreadContext.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -461,7 +461,7 @@ class ThreadContext sealed :
461461
};
462462

463463
public:
464-
typedef JsUtil::BaseHashSet<const Js::PropertyRecord *, HeapAllocator, PrimeSizePolicy, const Js::PropertyRecord *,
464+
typedef JsUtil::BaseHashSet<const Js::PropertyRecord *, HeapAllocator, PowerOf2SizePolicy, const Js::PropertyRecord *,
465465
Js::PropertyRecordStringHashComparer, JsUtil::SimpleHashedEntry, JsUtil::AsymetricResizeLock> PropertyMap;
466466
PropertyMap * propertyMap;
467467

lib/Runtime/Types/TypePath.h

Lines changed: 53 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ namespace Js
88
{
99
class TinyDictionary
1010
{
11-
static const int PowerOf2_BUCKETS = 8;
11+
static const int PowerOf2_BUCKETS = 16;
1212
static const int BUCKETS_DWORDS = PowerOf2_BUCKETS / sizeof(DWORD);
1313
static const byte NIL = 0xff;
1414

@@ -19,37 +19,73 @@ namespace Js
1919
TinyDictionary()
2020
{
2121
CompileAssert(BUCKETS_DWORDS * sizeof(DWORD) == PowerOf2_BUCKETS);
22-
CompileAssert(BUCKETS_DWORDS == 2);
22+
CompileAssert(BUCKETS_DWORDS == 4);
2323
DWORD* init = bucketsData;
24-
init[0] = init[1] = 0xffffffff;
24+
init[0] = init[1] = init[2] = init[3] =0xffffffff;
25+
}
26+
27+
uint32 ReduceKeyToIndex(PropertyId key)
28+
{
29+
// we use 4-bit bucket index, but we often have keys that are larger.
30+
// use Fibonacci hash to reduce the possibility of collisions
31+
#if TARGET_64
32+
return (key * 11400714819323198485llu) >> 60;
33+
#else
34+
return (key * 2654435769ul) >> 28;
35+
#endif
2536
}
2637

2738
void Add(PropertyId key, byte value)
2839
{
40+
Assert(value < 128);
41+
2942
byte* buckets = reinterpret_cast<byte*>(bucketsData);
30-
uint32 bucketIndex = key & (PowerOf2_BUCKETS - 1);
43+
uint32 bucketIndex = ReduceKeyToIndex(key);
3144

3245
byte i = buckets[bucketIndex];
33-
buckets[bucketIndex] = value;
34-
next[value] = i;
46+
if (i == NIL)
47+
{
48+
// set the highest bit to mark the value as the last in the chain
49+
buckets[bucketIndex] = value | 128;
50+
}
51+
else
52+
{
53+
buckets[bucketIndex] = value;
54+
next[value] = i;
55+
}
3556
}
3657

3758
// Template shared with diagnostics
3859
template <class Data>
3960
inline bool TryGetValue(PropertyId key, PropertyIndex* index, const Data& data)
4061
{
4162
byte* buckets = reinterpret_cast<byte*>(bucketsData);
42-
uint32 bucketIndex = key & (PowerOf2_BUCKETS - 1);
63+
uint32 bucketIndex = ReduceKeyToIndex(key);
4364

44-
for (byte i = buckets[bucketIndex] ; i != NIL ; i = next[i])
65+
byte i = buckets[bucketIndex];
66+
if (i != NIL)
4567
{
46-
if (data[i]->GetPropertyId()== key)
47-
{
48-
*index = i;
49-
return true;
68+
for(;;)
69+
{
70+
byte idx = i & 127; // strip the sentinel bit
71+
72+
if (data[idx]->GetPropertyId() == key)
73+
{
74+
*index = idx;
75+
return true;
76+
}
77+
78+
if (i & 128)
79+
{
80+
// this was the last value in the chain
81+
break;
82+
}
83+
84+
Assert(idx != (next[idx] & 127));
85+
i = next[idx];
5086
}
51-
Assert(i != next[i]);
5287
}
88+
5389
return false;
5490
}
5591
};
@@ -75,7 +111,10 @@ namespace Js
75111
#define TYPE_PATH_ALLOC_GRANULARITY_GAP 3
76112
#endif
77113
#endif
78-
// Although we can allocate 2 more, this will put struct Data into another bucket. Just waste some slot in that case for 32-bit
114+
// Although we can allocate 2 more,
115+
// TinyDictionary can hold only up to 128 items (see TinyDictionary::Add),
116+
// Besides 128+ would put struct Data into another bucket.
117+
// Just waste some slot in that case for 32-bit
79118
static const uint MaxPathTypeHandlerLength = 128;
80119
static const uint InitialTypePathSize = 16 + TYPE_PATH_ALLOC_GRANULARITY_GAP;
81120

0 commit comments

Comments
 (0)