Skip to content

Commit 0b78790

Browse files
committed
Improve deserialization performance by > 40% in the normal case and > 25% in the worst case.
Profiling showed that during deserialization, significant time was being spent in the following functions: 1. System.Type.IsAssignableFrom(class System.Type) 14.35% 2. MongoDB.Bson.IO.BsonBinaryReader.ReadBsonType() 11.46% 3. MongoDB.Bson.Serialization.BsonClassMap.GetMemberMapIndexForElement(string) 7.68% This change addresses all 3 items: 1. Fixed by placing a quick type check before the hot path IsAssignableFrom calls. In most cases the types are equivalent which eliminates the call to IsAssignableFrom. 2/3. Most of the time in ReadBsonType is due to a call to ReadCString. All of the calls to GetMemberMapIndexForElement use the value returned by ReadCString. To improve this, I've implemented a trie (http://en.wikipedia.org/wiki/Trie) optimized for mapping input bytes sequences directly to result values specified by the caller. In the case of BsonClassMapSerializer, the trie directly maps the input binary UTF8 representation of element names to member map indexes. This completely eliminates all dictionary lookups, UTF8 string conversions, intermediary memory allocation, and does so in O(m) time where m is the length of the input string. Contrast this with the existing implementation where there are 5 (!!!) O(m) operations (1. Find the null byte, 2. Count the number of UTF8 characters, 3. Decode the UTF8 characters into a string, 4. Hash the string, 5. Compare the string the key in the hashtable bucket). Additionally, I’ve factored the trie so any deserializer can make use of one when decoding to immediately switch to the correct state when decoding. Additionally I’ve also implemented two other minor improvements: 1. Replaced the lookup table based GetLeastSignificantBit implementation in BsonClassMapSerializer with a binary search based one. This is significantly faster on .Net because array accesses require an indirection and a managed bounds check. In my own side by side testing, the binary search based one is 40-50% faster. 2. Added the readonly attribute to _declaredMemberMaps in BsonClassMap and cleaned up the initialization of this value. Minor improvement but the field is now consistent with _allMemberMaps and fewer copies happen during construction.
1 parent f74ddc3 commit 0b78790

16 files changed

+885
-170
lines changed

Bson/Bson.csproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@
8686
<Compile Include="IO\BsonDocumentReaderSettings.cs" />
8787
<Compile Include="IO\BsonDocumentWriterSettings.cs" />
8888
<Compile Include="IO\BsonReaderSettings.cs" />
89+
<Compile Include="IO\BsonTrie.cs" />
8990
<Compile Include="IO\BsonWriterSettings.cs" />
9091
<Compile Include="ObjectModel\GuidRepresentation.cs" />
9192
<Compile Include="ObjectModel\GuidConverter.cs" />

Bson/IO/BsonBinaryReader.cs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ public override bool ReadBoolean()
161161
/// Reads a BsonType from the reader.
162162
/// </summary>
163163
/// <returns>A BsonType.</returns>
164-
public override BsonType ReadBsonType()
164+
public override BsonType ReadBsonType(BsonTrie bsonTrie)
165165
{
166166
if (Disposed) { ThrowObjectDisposedException(); }
167167
if (State == BsonReaderState.Initial || State == BsonReaderState.Done || State == BsonReaderState.ScopeDocument)
@@ -204,7 +204,7 @@ public override BsonType ReadBsonType()
204204
break;
205205
case ContextType.Document:
206206
case ContextType.ScopeDocument:
207-
CurrentName = _buffer.ReadCString();
207+
CurrentName = _buffer.ReadCString(bsonTrie);
208208
State = BsonReaderState.Name;
209209
break;
210210
default:
@@ -417,8 +417,8 @@ public override void ReadRegularExpression(out string pattern, out string option
417417
{
418418
if (Disposed) { ThrowObjectDisposedException(); }
419419
VerifyBsonType("ReadRegularExpression", BsonType.RegularExpression);
420-
pattern = _buffer.ReadCString();
421-
options = _buffer.ReadCString();
420+
pattern = (string)_buffer.ReadCString(null);
421+
options = (string)_buffer.ReadCString(null);
422422
State = GetNextState();
423423
}
424424

Bson/IO/BsonBinaryReaderBookmark.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ public class BsonBinaryReaderBookmark : BsonReaderBookmark
3333
internal BsonBinaryReaderBookmark(
3434
BsonReaderState state,
3535
BsonType currentBsonType,
36-
string currentName,
36+
object currentName,
3737
BsonBinaryReaderContext context,
3838
int position)
3939
: base(state, currentBsonType, currentName)

Bson/IO/BsonBuffer.cs

Lines changed: 96 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -144,11 +144,11 @@ public int Position
144144
// private static methods
145145
private static string[] BuildAsciiStringTable()
146146
{
147-
var asciiStringTable = new string[128];
147+
var asciiStringTable = new string[95]; // 95 printable ASCII characters; first 32 are control characters; 127 is the delete character
148148

149-
for (int i = 0; i < 128; ++i)
149+
for (var i = 0; i < asciiStringTable.Length; ++i)
150150
{
151-
asciiStringTable[i] = new string((char)i, 1);
151+
asciiStringTable[i] = new string((char)(i + 32), 1);
152152
}
153153

154154
return asciiStringTable;
@@ -533,31 +533,7 @@ public string ReadString()
533533
{
534534
throw new FileFormatException("String is missing null terminator.");
535535
}
536-
537-
switch (length)
538-
{
539-
// special case empty strings
540-
case 1:
541-
value = string.Empty;
542-
break;
543-
544-
// special case single character strings
545-
case 2:
546-
var c1 = _chunk[_chunkOffset];
547-
if (c1 < 128)
548-
{
549-
value = __asciiStringTable[c1];
550-
}
551-
else
552-
{
553-
value = __utf8Encoding.GetString(_chunk, _chunkOffset, 1); // let GetString throw a DecoderFallbackException
554-
}
555-
break;
556-
557-
default:
558-
value = __utf8Encoding.GetString(_chunk, _chunkOffset, length - 1); // don't decode the null terminator
559-
break;
560-
}
536+
value = GetSingleChunkString(length - 1); // don't decode the null terminator
561537
Position += length;
562538
}
563539
else
@@ -577,8 +553,9 @@ public string ReadString()
577553
/// <summary>
578554
/// Reads a BSON CString from the reader (a null terminated string).
579555
/// </summary>
580-
/// <returns>A String.</returns>
581-
public string ReadCString()
556+
/// <param name="bsonTrie">An optional BsonTrie to use during decoding.</param>
557+
/// <returns>A value decoded using the optional BsonTrie or a String if no BsonTrie was specified or no value could be decoded using the BsonTrie.</returns>
558+
public object ReadCString(BsonTrie bsonTrie)
582559
{
583560
if (_disposed) { throw new ObjectDisposedException("BsonBuffer"); }
584561
// optimize for the case where the null terminator is on the same chunk
@@ -592,44 +569,20 @@ public string ReadCString()
592569
partialCount = _length - _position; // populated part of last chunk
593570
}
594571

595-
if (partialCount > 0)
572+
var bsonTrieNode = bsonTrie != null ? bsonTrie.Root : null;
573+
var index = FindNull(
574+
bsonTrie,
575+
ref bsonTrieNode,
576+
_chunk,
577+
_chunkOffset,
578+
partialCount);
579+
if (index != -1)
596580
{
597-
var c1 = _chunk[_chunkOffset];
598-
599-
// special case empty strings
600-
if (c1 == 0)
601-
{
602-
Position += 1;
603-
return string.Empty;
604-
}
605-
606-
if (partialCount > 1)
607-
{
608-
// special case single character strings
609-
if (_chunk[_chunkOffset + 1] == 0)
610-
{
611-
string value;
612-
if (c1 < 128)
613-
{
614-
value = __asciiStringTable[c1];
615-
}
616-
else
617-
{
618-
value = __utf8Encoding.GetString(_chunk, _chunkOffset, 1); // let GetString throw a DecoderFallbackException
619-
}
620-
Position += 2;
621-
return value;
622-
}
623-
624-
var index = Array.IndexOf<byte>(_chunk, 0, _chunkOffset + 2, partialCount - 2);
625-
if (index != -1)
626-
{
627-
var stringLength = index - _chunkOffset;
628-
var value = __utf8Encoding.GetString(_chunk, _chunkOffset, stringLength);
629-
Position += stringLength + 1;
630-
return value;
631-
}
632-
}
581+
var stringLength = index - _chunkOffset;
582+
var value = bsonTrieNode != null && bsonTrieNode.HasValue ?
583+
bsonTrieNode.Value : GetSingleChunkString(stringLength);
584+
Position += stringLength + 1;
585+
return value;
633586
}
634587

635588
// the null terminator is not on the same chunk so keep looking starting with the next chunk
@@ -646,13 +599,27 @@ public string ReadCString()
646599
{
647600
partialCount = _length - localPosition; // populated part of last chunk
648601
}
649-
var index = Array.IndexOf<byte>(localChunk, 0, 0, partialCount);
602+
index = FindNull(
603+
bsonTrie,
604+
ref bsonTrieNode,
605+
localChunk,
606+
0,
607+
partialCount);
650608
if (index != -1)
651609
{
652610
localPosition += index;
653611
var stringLength = localPosition - _position;
654-
var value = __utf8Encoding.GetString(ReadBytes(stringLength)); // ReadBytes advances over string
655-
Position += 1; // skip over null byte at end
612+
object value;
613+
if (bsonTrieNode != null && bsonTrieNode.HasValue)
614+
{
615+
value = bsonTrieNode.Value;
616+
Position += stringLength + 1;
617+
}
618+
else
619+
{
620+
value = __utf8Encoding.GetString(ReadBytes(stringLength)); // ReadBytes advances over string
621+
Position += 1; // skip over null byte at end
622+
}
656623
return value;
657624
}
658625
localChunkIndex++;
@@ -1009,5 +976,64 @@ private void EnsureSpaceAvailable(int needed)
1009976
}
1010977
}
1011978
}
979+
980+
private static int FindNull(
981+
BsonTrie bsonTrie,
982+
ref BsonTrieNode bsonTrieNode,
983+
byte[] buffer,
984+
int index,
985+
int count)
986+
{
987+
while (count > 0)
988+
{
989+
if (bsonTrieNode == null)
990+
{
991+
return Array.IndexOf<byte>(buffer, 0, index, count);
992+
}
993+
994+
var c = buffer[index];
995+
996+
if (c == 0)
997+
{
998+
if (!bsonTrieNode.HasValue)
999+
{
1000+
bsonTrieNode = null;
1001+
}
1002+
1003+
return index;
1004+
}
1005+
1006+
bsonTrieNode = bsonTrie.GetNext(
1007+
bsonTrieNode,
1008+
c);
1009+
1010+
++index;
1011+
1012+
--count;
1013+
}
1014+
1015+
return -1;
1016+
}
1017+
1018+
private string GetSingleChunkString(int length)
1019+
{
1020+
switch (length)
1021+
{
1022+
// special case empty strings
1023+
case 0:
1024+
return string.Empty;
1025+
1026+
// special case single character strings
1027+
case 1:
1028+
int tableIndex = _chunk[_chunkOffset] - 32;
1029+
if ((uint)tableIndex < __asciiStringTable.Length)
1030+
{
1031+
return __asciiStringTable[tableIndex];
1032+
}
1033+
break;
1034+
}
1035+
1036+
return __utf8Encoding.GetString(_chunk, _chunkOffset, length);
1037+
}
10121038
}
10131039
}

Bson/IO/BsonDocumentReader.cs

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ public override bool ReadBoolean()
104104
/// Reads a BsonType from the reader.
105105
/// </summary>
106106
/// <returns>A BsonType.</returns>
107-
public override BsonType ReadBsonType()
107+
public override BsonType ReadBsonType(BsonTrie bsonTrie)
108108
{
109109
if (Disposed) { ThrowObjectDisposedException(); }
110110
if (State == BsonReaderState.Initial || State == BsonReaderState.ScopeDocument)
@@ -137,7 +137,16 @@ public override BsonType ReadBsonType()
137137
State = BsonReaderState.EndOfDocument;
138138
return BsonType.EndOfDocument;
139139
}
140-
CurrentName = currentElement.Name;
140+
object currentName;
141+
if (bsonTrie != null &&
142+
bsonTrie.TryGetValue(currentElement.Name, out currentName))
143+
{
144+
CurrentName = currentName;
145+
}
146+
else
147+
{
148+
CurrentName = currentElement.Name;
149+
}
141150
_currentValue = currentElement.Value;
142151
State = BsonReaderState.Name;
143152
break;

Bson/IO/BsonDocumentReaderBookmark.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ public class BsonDocumentReaderBookmark : BsonReaderBookmark
3333
internal BsonDocumentReaderBookmark(
3434
BsonReaderState state,
3535
BsonType currentBsonType,
36-
string currentName,
36+
object currentName,
3737
BsonDocumentReaderContext context,
3838
BsonValue currentValue)
3939
: base(state, currentBsonType, currentName)

Bson/IO/BsonReader.cs

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ public abstract class BsonReader : IDisposable
3131
private BsonReaderSettings _settings;
3232
private BsonReaderState _state;
3333
private BsonType _currentBsonType;
34-
private string _currentName;
34+
private object _currentName;
3535

3636
// constructors
3737
/// <summary>
@@ -54,6 +54,15 @@ public BsonType CurrentBsonType
5454
protected set { _currentBsonType = value; }
5555
}
5656

57+
/// <summary>
58+
/// Gets the current name.
59+
/// </summary>
60+
public object CurrentName
61+
{
62+
get { return _currentName; }
63+
protected set { _currentName = value; }
64+
}
65+
5766
/// <summary>
5867
/// Gets the settings of the reader.
5968
/// </summary>
@@ -72,15 +81,6 @@ public BsonReaderState State
7281
}
7382

7483
// protected properties
75-
/// <summary>
76-
/// Gets the current name.
77-
/// </summary>
78-
protected string CurrentName
79-
{
80-
get { return _currentName; }
81-
set { _currentName = value; }
82-
}
83-
8484
/// <summary>
8585
/// Gets whether the BsonReader has been disposed.
8686
/// </summary>
@@ -364,11 +364,17 @@ public bool ReadBoolean(string name)
364364
return ReadBoolean();
365365
}
366366

367+
/// <returns>A BsonType.</returns>
368+
public BsonType ReadBsonType()
369+
{
370+
return this.ReadBsonType(null);
371+
}
372+
367373
/// <summary>
368374
/// Reads a BsonType from the reader.
369375
/// </summary>
370376
/// <returns>A BsonType.</returns>
371-
public abstract BsonType ReadBsonType();
377+
public abstract BsonType ReadBsonType(BsonTrie bsonTrie);
372378

373379
/// <summary>
374380
/// Reads a BSON DateTime from the reader.
@@ -529,7 +535,7 @@ public string ReadName()
529535
}
530536

531537
_state = BsonReaderState.Value;
532-
return _currentName;
538+
return _currentName as string;
533539
}
534540

535541
/// <summary>

Bson/IO/BsonReaderBookmark.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ public abstract class BsonReaderBookmark
2828
// private fields
2929
private BsonReaderState _state;
3030
private BsonType _currentBsonType;
31-
private string _currentName;
31+
private object _currentName;
3232

3333
// constructors
3434
/// <summary>
@@ -37,7 +37,7 @@ public abstract class BsonReaderBookmark
3737
/// <param name="state">The state of the reader.</param>
3838
/// <param name="currentBsonType">The current BSON type.</param>
3939
/// <param name="currentName">The name of the current element.</param>
40-
protected BsonReaderBookmark(BsonReaderState state, BsonType currentBsonType, string currentName)
40+
protected BsonReaderBookmark(BsonReaderState state, BsonType currentBsonType, object currentName)
4141
{
4242
_state = state;
4343
_currentBsonType = currentBsonType;
@@ -64,7 +64,7 @@ public BsonType CurrentBsonType
6464
/// <summary>
6565
/// Gets the name of the current element.
6666
/// </summary>
67-
public string CurrentName
67+
public object CurrentName
6868
{
6969
get { return _currentName; }
7070
}

0 commit comments

Comments
 (0)