Skip to content

Commit 74d9767

Browse files
perf: simplify call path
Optimize serialization performance
2 parents 4d3072e + 2ba6b16 commit 74d9767

File tree

7 files changed

+231
-191
lines changed

7 files changed

+231
-191
lines changed

src/Nino.Core/FastMap.cs

Lines changed: 33 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
using System.Collections;
33
using System.Collections.Generic;
44
using System.Runtime.CompilerServices;
5+
using System.Runtime.InteropServices;
56

67
namespace Nino.Core
78
{
@@ -17,14 +18,20 @@ public sealed class FastMap<TKey, TValue> : IDisposable, IEnumerable<KeyValuePai
1718
private const int MaxKickCount = 16;
1819
private const int MinCapacity = 8;
1920

21+
// Optimized layout: Pack=1 eliminates padding, improving cache utilization
22+
// Trade-off: potential unaligned access (acceptable on modern CPUs) for better memory density
23+
[StructLayout(LayoutKind.Sequential, Pack = 1)]
2024
private struct Entry
2125
{
2226
public uint HashCode;
27+
public bool IsOccupied;
2328
public TKey Key;
2429
public TValue Value;
25-
public bool IsOccupied;
2630
}
2731

32+
// Cache the equality comparer to avoid repeated lookups in hot paths
33+
private static readonly IEqualityComparer<TKey> KeyComparer = EqualityComparer<TKey>.Default;
34+
2835
private Entry[] _table1;
2936
private Entry[] _table2;
3037
private int _capacity;
@@ -36,22 +43,27 @@ private struct Entry
3643
public int Capacity => _capacity;
3744
public bool IsCreated => _table1 != null;
3845

46+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
47+
private static int TransformHashCode(int hashCode)
48+
{
49+
return hashCode ^ (hashCode >> 16);
50+
}
51+
3952
public ref TValue this[in TKey key]
4053
{
4154
[MethodImpl(MethodImplOptions.AggressiveInlining)]
4255
get
4356
{
44-
var hashCode = key.GetHashCode();
45-
hashCode ^= hashCode >> 16;
57+
var hashCode = TransformHashCode(key.GetHashCode());
4658

4759
var index1 = hashCode & _capacityMask;
4860
ref Entry entry1 = ref _table1[index1];
49-
if (entry1.HashCode == hashCode && EqualityComparer<TKey>.Default.Equals(entry1.Key, key))
61+
if (entry1.HashCode == hashCode && KeyComparer.Equals(entry1.Key, key))
5062
return ref entry1.Value;
5163

5264
var index2 = (hashCode >> 8) & _capacityMask;
5365
ref Entry entry2 = ref _table2[index2];
54-
if (entry2.HashCode == hashCode && EqualityComparer<TKey>.Default.Equals(entry2.Key, key))
66+
if (entry2.HashCode == hashCode && KeyComparer.Equals(entry2.Key, key))
5567
return ref entry2.Value;
5668
throw new KeyNotFoundException();
5769
}
@@ -147,7 +159,7 @@ public bool TryAdd(in TKey key, in TValue value)
147159
_version++;
148160
return true;
149161
}
150-
else if (entry1.HashCode == hashCode && EqualityComparer<TKey>.Default.Equals(entry1.Key, key))
162+
else if (entry1.HashCode == hashCode && KeyComparer.Equals(entry1.Key, key))
151163
return false;
152164

153165
var index2 = (hashCode >> 8) & _capacityMask;
@@ -162,7 +174,7 @@ public bool TryAdd(in TKey key, in TValue value)
162174
_version++;
163175
return true;
164176
}
165-
else if (entry2.HashCode == hashCode && EqualityComparer<TKey>.Default.Equals(entry2.Key, key))
177+
else if (entry2.HashCode == hashCode && KeyComparer.Equals(entry2.Key, key))
166178
return false;
167179

168180
bool res = CuckooInsert(hashCode, key, value, false);
@@ -188,7 +200,7 @@ private bool TryAddOrUpdate(in TKey key, in TValue value)
188200
_version++;
189201
return true;
190202
}
191-
else if (entry1.HashCode == hashCode && EqualityComparer<TKey>.Default.Equals(entry1.Key, key))
203+
else if (entry1.HashCode == hashCode && KeyComparer.Equals(entry1.Key, key))
192204
{
193205
entry1.Value = value;
194206
_version++;
@@ -207,7 +219,7 @@ private bool TryAddOrUpdate(in TKey key, in TValue value)
207219
_version++;
208220
return true;
209221
}
210-
else if (entry2.HashCode == hashCode && EqualityComparer<TKey>.Default.Equals(entry2.Key, key))
222+
else if (entry2.HashCode == hashCode && KeyComparer.Equals(entry2.Key, key))
211223
{
212224
entry2.Value = value;
213225
_version++;
@@ -242,7 +254,7 @@ private bool CuckooInsert(int hashCode, in TKey key, in TValue value, bool updat
242254
_count++;
243255
return true;
244256
}
245-
else if (updateIfExists && entry.HashCode == currentHashCode && EqualityComparer<TKey>.Default.Equals(entry.Key, currentKey))
257+
else if (updateIfExists && entry.HashCode == currentHashCode && KeyComparer.Equals(entry.Key, currentKey))
246258
{
247259
entry.Value = currentValue;
248260
return true;
@@ -266,7 +278,7 @@ private bool CuckooInsert(int hashCode, in TKey key, in TValue value, bool updat
266278
_count++;
267279
return true;
268280
}
269-
else if (updateIfExists && entry.HashCode == currentHashCode && EqualityComparer<TKey>.Default.Equals(entry.Key, currentKey))
281+
else if (updateIfExists && entry.HashCode == currentHashCode && KeyComparer.Equals(entry.Key, currentKey))
270282
{
271283
entry.Value = currentValue;
272284
return true;
@@ -291,7 +303,7 @@ public ref TValue GetValueRefOrAddDefault(TKey key, out bool exists)
291303

292304
var index1 = hashCode & _capacityMask;
293305
ref Entry entry1 = ref _table1[index1];
294-
if (entry1.HashCode == hashCode && EqualityComparer<TKey>.Default.Equals(entry1.Key, key))
306+
if (entry1.HashCode == hashCode && KeyComparer.Equals(entry1.Key, key))
295307
{
296308
exists = true;
297309
return ref entry1.Value;
@@ -300,7 +312,7 @@ public ref TValue GetValueRefOrAddDefault(TKey key, out bool exists)
300312
var index2 = (hashCode >> 8) & _capacityMask;
301313

302314
ref Entry entry2 = ref _table2[index2];
303-
if (entry2.HashCode == hashCode && EqualityComparer<TKey>.Default.Equals(entry2.Key, key))
315+
if (entry2.HashCode == hashCode && KeyComparer.Equals(entry2.Key, key))
304316
{
305317
exists = true;
306318
return ref entry2.Value;
@@ -344,15 +356,15 @@ public bool TryGetValue(in TKey key, out TValue value)
344356

345357
var index1 = hashCode & _capacityMask;
346358
ref Entry entry1 = ref _table1[index1];
347-
if (entry1.HashCode == hashCode && EqualityComparer<TKey>.Default.Equals(entry1.Key, key))
359+
if (entry1.HashCode == hashCode && KeyComparer.Equals(entry1.Key, key))
348360
{
349361
value = entry1.Value;
350362
return true;
351363
}
352364

353365
var index2 = (hashCode >> 8) & _capacityMask;
354366
ref Entry entry2 = ref _table2[index2];
355-
if (entry2.HashCode == hashCode && EqualityComparer<TKey>.Default.Equals(entry2.Key, key))
367+
if (entry2.HashCode == hashCode && KeyComparer.Equals(entry2.Key, key))
356368
{
357369
value = entry2.Value;
358370
return true;
@@ -370,12 +382,12 @@ public ref TValue GetValueRef(in TKey key)
370382

371383
var index1 = hashCode & _capacityMask;
372384
ref Entry entry1 = ref _table1[index1];
373-
if (entry1.HashCode == hashCode && EqualityComparer<TKey>.Default.Equals(entry1.Key, key))
385+
if (entry1.HashCode == hashCode && KeyComparer.Equals(entry1.Key, key))
374386
return ref entry1.Value;
375387

376388
var index2 = (hashCode >> 8) & _capacityMask;
377389
ref Entry entry2 = ref _table2[index2];
378-
if (entry2.HashCode == hashCode && EqualityComparer<TKey>.Default.Equals(entry2.Key, key))
390+
if (entry2.HashCode == hashCode && KeyComparer.Equals(entry2.Key, key))
379391
return ref entry2.Value;
380392

381393
throw new KeyNotFoundException();
@@ -389,12 +401,12 @@ public bool ContainsKey(in TKey key)
389401

390402
var index1 = hashCode & _capacityMask;
391403
ref Entry entry1 = ref _table1[index1];
392-
if (entry1.HashCode == hashCode && EqualityComparer<TKey>.Default.Equals(entry1.Key, key))
404+
if (entry1.HashCode == hashCode && KeyComparer.Equals(entry1.Key, key))
393405
return true;
394406

395407
var index2 = (hashCode >> 8) & _capacityMask;
396408
ref Entry entry2 = ref _table2[index2];
397-
return entry2.HashCode == hashCode && EqualityComparer<TKey>.Default.Equals(entry2.Key, key);
409+
return entry2.HashCode == hashCode && KeyComparer.Equals(entry2.Key, key);
398410
}
399411

400412
[MethodImpl(MethodImplOptions.AggressiveInlining)]
@@ -423,7 +435,7 @@ public bool Remove(TKey key)
423435

424436
var index1 = hashCode & _capacityMask;
425437
ref Entry entry1 = ref _table1[index1];
426-
if (entry1.HashCode == hashCode && EqualityComparer<TKey>.Default.Equals(entry1.Key, key))
438+
if (entry1.HashCode == hashCode && KeyComparer.Equals(entry1.Key, key))
427439
{
428440
entry1.IsOccupied = false;
429441
_count--;
@@ -433,7 +445,7 @@ public bool Remove(TKey key)
433445

434446
var index2 = (hashCode >> 8) & _capacityMask;
435447
ref Entry entry2 = ref _table2[index2];
436-
if (entry2.HashCode == hashCode && EqualityComparer<TKey>.Default.Equals(entry2.Key, key))
448+
if (entry2.HashCode == hashCode && KeyComparer.Equals(entry2.Key, key))
437449
{
438450
entry2.IsOccupied = false;
439451
_count--;

src/Nino.Core/NinoDeserializer.cs

Lines changed: 51 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,37 @@ public static class CachedDeserializer<T>
192192
// ReSharper disable once StaticMemberInGenericType
193193
internal static readonly bool IsSimpleType = !IsReferenceOrContainsReferences && !HasBaseType;
194194

195+
// Inline cache entries - pack type ID and delegate together for better cache locality
196+
internal struct CacheEntry
197+
{
198+
public int TypeId;
199+
public DeserializeDelegate<T> Deserializer;
200+
}
201+
202+
internal struct CacheEntryRef
203+
{
204+
public int TypeId;
205+
public DeserializeDelegateRef<T> Deserializer;
206+
}
207+
208+
// Inline cache for polymorphic deserialization (4 entries per type, separate for out/ref)
209+
// Reduced from 8 to 4 for better cache locality with single array access
210+
// Shared across threads - benign races on cache updates are acceptable
211+
internal static readonly CacheEntry[] Cache = new CacheEntry[]
212+
{
213+
new CacheEntry { TypeId = int.MinValue },
214+
new CacheEntry { TypeId = int.MinValue },
215+
new CacheEntry { TypeId = int.MinValue },
216+
new CacheEntry { TypeId = int.MinValue }
217+
};
218+
internal static readonly CacheEntryRef[] CacheRef = new CacheEntryRef[]
219+
{
220+
new CacheEntryRef { TypeId = int.MinValue },
221+
new CacheEntryRef { TypeId = int.MinValue },
222+
new CacheEntryRef { TypeId = int.MinValue },
223+
new CacheEntryRef { TypeId = int.MinValue }
224+
};
225+
195226
public static void SetDeserializer(int typeId, DeserializeDelegate<T> deserializer,
196227
DeserializeDelegateRef<T> deserializerRef, DeserializeDelegate<T> optimalDeserializer,
197228
DeserializeDelegateRef<T> optimalDeserializerRef)
@@ -389,18 +420,22 @@ public static void DeserializePolymorphic(out T value, ref Reader reader)
389420
return;
390421
}
391422

392-
// FAST PATH: Cache hit (optimized for monomorphic arrays)
393-
if (typeId == reader.CachedTypeId && reader.CachedDeserializer is DeserializeDelegate<T> cachedDeserializer)
423+
// Check 4-entry inline cache using bitwise AND indexing
424+
// Single struct array access for better cache locality (vs two separate arrays)
425+
int cacheSlot = (int)typeId & 3; // Faster than % 4 for power-of-2
426+
ref CacheEntry entry = ref Cache[cacheSlot];
427+
if (typeId == entry.TypeId)
394428
{
395-
cachedDeserializer(out value, ref reader);
429+
entry.Deserializer(out value, ref reader);
396430
return;
397431
}
398432

399-
// SLOW PATH: Full lookup in subtype map and update cache
433+
// Cache miss - look up in FastMap and update cache
400434
if (SubTypeDeserializers.TryGetValue(typeId, out var subTypeDeserializer))
401435
{
402-
reader.CachedTypeId = typeId;
403-
reader.CachedDeserializer = subTypeDeserializer;
436+
// Update the cache slot with both type ID and deserializer
437+
entry.TypeId = typeId;
438+
entry.Deserializer = subTypeDeserializer;
404439
subTypeDeserializer(out value, ref reader);
405440
return;
406441
}
@@ -457,19 +492,22 @@ public static void DeserializeRefPolymorphic(ref T value, ref Reader reader)
457492
return;
458493
}
459494

460-
// FAST PATH: Cache hit (optimized for monomorphic arrays)
461-
if (typeId == reader.CachedTypeIdRef &&
462-
reader.CachedDeserializerRef is DeserializeDelegateRef<T> cachedDeserializerRef)
495+
// Check 4-entry inline cache using bitwise AND indexing
496+
// Single struct array access for better cache locality (vs two separate arrays)
497+
int cacheSlotRef = (int)typeId & 3; // Faster than % 4 for power-of-2
498+
ref CacheEntryRef entryRef = ref CacheRef[cacheSlotRef];
499+
if (typeId == entryRef.TypeId)
463500
{
464-
cachedDeserializerRef(ref value, ref reader);
501+
entryRef.Deserializer(ref value, ref reader);
465502
return;
466503
}
467504

468-
// SLOW PATH: Full lookup in subtype map and update cache
505+
// Cache miss - look up in FastMap and update cache
469506
if (SubTypeDeserializerRefs.TryGetValue(typeId, out var subTypeDeserializer))
470507
{
471-
reader.CachedTypeIdRef = typeId;
472-
reader.CachedDeserializerRef = subTypeDeserializer;
508+
// Update the cache slot with both type ID and deserializer
509+
entryRef.TypeId = typeId;
510+
entryRef.Deserializer = subTypeDeserializer;
473511
subTypeDeserializer(ref value, ref reader);
474512
return;
475513
}

src/Nino.Core/NinoHelper.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,9 @@ internal static class NinoHelper
1212
/// <summary>
1313
/// Gets the generated namespace for a type's assembly.
1414
/// This must match the logic in NinoTypeHelper.GetNamespace()
15+
/// Called only in error paths - intentionally not inlined to keep call sites small.
1516
/// </summary>
16-
[MethodImpl(MethodImplOptions.AggressiveInlining)]
17+
[MethodImpl(MethodImplOptions.NoInlining)]
1718
internal static string GetGeneratedNamespace(Type type)
1819
{
1920
var assemblyName = type.Assembly.GetName().Name ?? string.Empty;

src/Nino.Core/NinoSerializer.cs

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,18 @@ public static class CachedSerializer<T>
188188
// ReSharper disable once StaticMemberInGenericType
189189
internal static readonly bool IsSimpleType = !IsReferenceOrContainsReferences && !HasBaseType;
190190

191+
// Inline cache entry - packs type handle and delegate together for better cache locality
192+
internal struct CacheEntry
193+
{
194+
public IntPtr TypeHandle;
195+
public SerializeDelegate<T> Serializer;
196+
}
197+
198+
// Inline cache for polymorphic serialization (4 entries per type)
199+
// Reduced from 8 to 4 for better cache locality with single array access
200+
// Shared across threads - benign races on cache updates are acceptable
201+
internal static readonly CacheEntry[] Cache = new CacheEntry[4];
202+
191203
public static void SetSerializer(SerializeDelegate<T> serializer)
192204
{
193205
_serializer = serializer;
@@ -304,17 +316,22 @@ public static unsafe void SerializePolymorphic(T val, ref Writer writer)
304316
return;
305317
}
306318

307-
if (actualTypeHandle == writer.CachedTypeHandle &&
308-
writer.CachedSerializer is SerializeDelegate<T> cachedSer)
319+
// Check 4-entry inline cache using bitwise AND indexing
320+
// Single struct array access for better cache locality (vs two separate arrays)
321+
int cacheSlot = (int)actualTypeHandle & 3; // Faster than % 4 for power-of-2
322+
ref CacheEntry entry = ref Cache[cacheSlot];
323+
if (actualTypeHandle == entry.TypeHandle)
309324
{
310-
cachedSer(val, ref writer);
325+
entry.Serializer(val, ref writer);
311326
return;
312327
}
313328

329+
// Cache miss - look up in FastMap and update cache
314330
if (SubTypeSerializers.TryGetValue(actualTypeHandle, out var subTypeSerializer))
315331
{
316-
writer.CachedTypeHandle = actualTypeHandle;
317-
writer.CachedSerializer = subTypeSerializer;
332+
// Update the cache slot with both type handle and serializer
333+
entry.TypeHandle = actualTypeHandle;
334+
entry.Serializer = subTypeSerializer;
318335
subTypeSerializer(val, ref writer);
319336
return;
320337
}

0 commit comments

Comments
 (0)