Skip to content

Commit 6c270e2

Browse files
author
rstam
committed
Some major refactoring of pull request 113: modify ReadCString to navigate a trie at the same time it is searching for the terminating null byte. The matching node in the trie (if found) contains the information needed to deserialize an element, thus avoiding a dictionary lookup.
1 parent adcaff5 commit 6c270e2

15 files changed

+344
-624
lines changed

Bson/IO/BsonBinaryReader.cs

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -160,10 +160,15 @@ public override bool ReadBoolean()
160160
/// <summary>
161161
/// Reads a BsonType from the reader.
162162
/// </summary>
163+
/// <param name="bsonTrie">An optional trie to search for a value that matches the next element name.</param>
164+
/// <param name="found">Set to true if a matching value was found in the trie.</param>
165+
/// <param name="value">Set to the matching value found in the trie or null if no matching value was found.</param>
163166
/// <returns>A BsonType.</returns>
164-
public override BsonType ReadBsonType(BsonTrie bsonTrie)
167+
public override BsonType ReadBsonType<TValue>(BsonTrie<TValue> bsonTrie, out bool found, out TValue value)
165168
{
166169
if (Disposed) { ThrowObjectDisposedException(); }
170+
found = false;
171+
value = default(TValue);
167172
if (State == BsonReaderState.Initial || State == BsonReaderState.Done || State == BsonReaderState.ScopeDocument)
168173
{
169174
// there is an implied type of Document for the top level and for scope documents
@@ -204,7 +209,7 @@ public override BsonType ReadBsonType(BsonTrie bsonTrie)
204209
break;
205210
case ContextType.Document:
206211
case ContextType.ScopeDocument:
207-
CurrentName = _buffer.ReadCString(bsonTrie);
212+
CurrentName = _buffer.ReadCString(bsonTrie, out found, out value);
208213
State = BsonReaderState.Name;
209214
break;
210215
default:
@@ -417,8 +422,8 @@ public override void ReadRegularExpression(out string pattern, out string option
417422
{
418423
if (Disposed) { ThrowObjectDisposedException(); }
419424
VerifyBsonType("ReadRegularExpression", BsonType.RegularExpression);
420-
pattern = (string)_buffer.ReadCString(null);
421-
options = (string)_buffer.ReadCString(null);
425+
pattern = _buffer.ReadCString();
426+
options = _buffer.ReadCString();
422427
State = GetNextState();
423428
}
424429

Bson/IO/BsonBinaryReaderBookmark.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ public class BsonBinaryReaderBookmark : BsonReaderBookmark
3333
internal BsonBinaryReaderBookmark(
3434
BsonReaderState state,
3535
BsonType currentBsonType,
36-
object currentName,
36+
string currentName,
3737
BsonBinaryReaderContext context,
3838
int position)
3939
: base(state, currentBsonType, currentName)

Bson/IO/BsonBuffer.cs

Lines changed: 69 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -144,11 +144,11 @@ public int Position
144144
// private static methods
145145
private static string[] BuildAsciiStringTable()
146146
{
147-
var asciiStringTable = new string[95]; // 95 printable ASCII characters; first 32 are control characters; 127 is the delete character
147+
var asciiStringTable = new string[128];
148148

149-
for (var i = 0; i < asciiStringTable.Length; ++i)
149+
for (int i = 0; i < 128; ++i)
150150
{
151-
asciiStringTable[i] = new string((char)(i + 32), 1);
151+
asciiStringTable[i] = new string((char)i, 1);
152152
}
153153

154154
return asciiStringTable;
@@ -533,7 +533,7 @@ public string ReadString()
533533
{
534534
throw new FileFormatException("String is missing null terminator.");
535535
}
536-
value = GetSingleChunkString(length - 1); // don't decode the null terminator
536+
value = DecodeUtf8String(_chunk, _chunkOffset, length - 1); // don't decode the null terminator
537537
Position += length;
538538
}
539539
else
@@ -550,14 +550,29 @@ public string ReadString()
550550
return value;
551551
}
552552

553+
/// <summary>
554+
/// Reads a BSON CString from the reader (a null terminated string).
555+
/// </summary>
556+
/// <returns>A string.</returns>
557+
public string ReadCString()
558+
{
559+
bool found;
560+
object value;
561+
return ReadCString(null, out found, out value);
562+
}
563+
553564
/// <summary>
554565
/// Reads a BSON CString from the reader (a null terminated string).
555566
/// </summary>
556567
/// <param name="bsonTrie">An optional BsonTrie to use during decoding.</param>
557-
/// <returns>A value decoded using the optional BsonTrie or a String if no BsonTrie was specified or no value could be decoded using the BsonTrie.</returns>
558-
public object ReadCString(BsonTrie bsonTrie)
568+
/// <param name="found">Set to true if the string was found in the trie.</param>
569+
/// <param name="value">Set to the value found in the trie; otherwise, null.</param>
570+
/// <returns>A string.</returns>
571+
public string ReadCString<TValue>(BsonTrie<TValue> bsonTrie, out bool found, out TValue value)
559572
{
560573
if (_disposed) { throw new ObjectDisposedException("BsonBuffer"); }
574+
found = false;
575+
value = default(TValue);
561576
// optimize for the case where the null terminator is on the same chunk
562577
int partialCount;
563578
if (_chunkIndex < _chunks.Count - 1)
@@ -570,19 +585,23 @@ public object ReadCString(BsonTrie bsonTrie)
570585
}
571586

572587
var bsonTrieNode = bsonTrie != null ? bsonTrie.Root : null;
573-
var index = FindNull(
574-
bsonTrie,
575-
ref bsonTrieNode,
576-
_chunk,
577-
_chunkOffset,
578-
partialCount);
588+
var index = IndexOfNull(_chunk, _chunkOffset, partialCount, ref bsonTrieNode);
579589
if (index != -1)
580590
{
581591
var stringLength = index - _chunkOffset;
582-
var value = bsonTrieNode != null && bsonTrieNode.HasValue ?
583-
bsonTrieNode.Value : GetSingleChunkString(stringLength);
592+
string cstring;
593+
if (bsonTrieNode != null && bsonTrieNode.HasValue)
594+
{
595+
cstring = bsonTrieNode.ElementName;
596+
value = bsonTrieNode.Value;
597+
found = true;
598+
}
599+
else
600+
{
601+
cstring = DecodeUtf8String(_chunk, _chunkOffset, stringLength);
602+
}
584603
Position += stringLength + 1;
585-
return value;
604+
return cstring;
586605
}
587606

588607
// the null terminator is not on the same chunk so keep looking starting with the next chunk
@@ -599,28 +618,25 @@ public object ReadCString(BsonTrie bsonTrie)
599618
{
600619
partialCount = _length - localPosition; // populated part of last chunk
601620
}
602-
index = FindNull(
603-
bsonTrie,
604-
ref bsonTrieNode,
605-
localChunk,
606-
0,
607-
partialCount);
621+
index = IndexOfNull(localChunk, 0, partialCount, ref bsonTrieNode);
608622
if (index != -1)
609623
{
610624
localPosition += index;
611625
var stringLength = localPosition - _position;
612-
object value;
626+
string cstring;
613627
if (bsonTrieNode != null && bsonTrieNode.HasValue)
614628
{
629+
cstring = bsonTrieNode.ElementName;
615630
value = bsonTrieNode.Value;
631+
found = true;
616632
Position += stringLength + 1;
617633
}
618634
else
619635
{
620-
value = __utf8Encoding.GetString(ReadBytes(stringLength)); // ReadBytes advances over string
636+
cstring = __utf8Encoding.GetString(ReadBytes(stringLength)); // ReadBytes advances over string
621637
Position += 1; // skip over null byte at end
622638
}
623-
return value;
639+
return cstring;
624640
}
625641
localChunkIndex++;
626642
localPosition += __chunkSize;
@@ -945,6 +961,27 @@ public void WriteZero()
945961
}
946962

947963
// private methods
964+
private string DecodeUtf8String(byte[] buffer, int index, int count)
965+
{
966+
switch (count)
967+
{
968+
// special case empty strings
969+
case 0:
970+
return "";
971+
972+
// special case single character strings
973+
case 1:
974+
var byte1 = (int)buffer[index];
975+
if (byte1 < __asciiStringTable.Length)
976+
{
977+
return __asciiStringTable[byte1];
978+
}
979+
break;
980+
}
981+
982+
return __utf8Encoding.GetString(buffer, index, count);
983+
}
984+
948985
private void EnsureDataAvailable(int needed)
949986
{
950987
if (_length - _position < needed)
@@ -977,63 +1014,30 @@ private void EnsureSpaceAvailable(int needed)
9771014
}
9781015
}
9791016

980-
private static int FindNull(
981-
BsonTrie bsonTrie,
982-
ref BsonTrieNode bsonTrieNode,
1017+
private static int IndexOfNull<TValue>(
9831018
byte[] buffer,
9841019
int index,
985-
int count)
1020+
int count,
1021+
ref BsonTrieNode<TValue> bsonTrieNode)
9861022
{
987-
while (count > 0)
1023+
for (; count > 0; index++, count--)
9881024
{
1025+
// bsonTrieNode might be null on entry or it might become null while navigating the trie
9891026
if (bsonTrieNode == null)
9901027
{
9911028
return Array.IndexOf<byte>(buffer, 0, index, count);
9921029
}
9931030

994-
var c = buffer[index];
995-
996-
if (c == 0)
1031+
var keyByte = buffer[index];
1032+
if (keyByte == 0)
9971033
{
998-
if (!bsonTrieNode.HasValue)
999-
{
1000-
bsonTrieNode = null;
1001-
}
1002-
10031034
return index;
10041035
}
10051036

1006-
bsonTrieNode = bsonTrie.GetNext(
1007-
bsonTrieNode,
1008-
c);
1009-
1010-
++index;
1011-
1012-
--count;
1037+
bsonTrieNode = bsonTrieNode.GetChild(keyByte); // might return null
10131038
}
10141039

10151040
return -1;
10161041
}
1017-
1018-
private string GetSingleChunkString(int length)
1019-
{
1020-
switch (length)
1021-
{
1022-
// special case empty strings
1023-
case 0:
1024-
return string.Empty;
1025-
1026-
// special case single character strings
1027-
case 1:
1028-
int tableIndex = _chunk[_chunkOffset] - 32;
1029-
if ((uint)tableIndex < __asciiStringTable.Length)
1030-
{
1031-
return __asciiStringTable[tableIndex];
1032-
}
1033-
break;
1034-
}
1035-
1036-
return __utf8Encoding.GetString(_chunk, _chunkOffset, length);
1037-
}
10381042
}
10391043
}

Bson/IO/BsonDocumentReader.cs

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -103,10 +103,15 @@ public override bool ReadBoolean()
103103
/// <summary>
104104
/// Reads a BsonType from the reader.
105105
/// </summary>
106+
/// <param name="bsonTrie">An optional trie to search for a value that matches the next element name.</param>
107+
/// <param name="found">Set to true if a matching value was found in the trie.</param>
108+
/// <param name="value">Set to the matching value found in the trie or null if no matching value was found.</param>
106109
/// <returns>A BsonType.</returns>
107-
public override BsonType ReadBsonType(BsonTrie bsonTrie)
110+
public override BsonType ReadBsonType<TValue>(BsonTrie<TValue> bsonTrie, out bool found, out TValue value)
108111
{
109112
if (Disposed) { ThrowObjectDisposedException(); }
113+
found = false;
114+
value = default(TValue);
110115
if (State == BsonReaderState.Initial || State == BsonReaderState.ScopeDocument)
111116
{
112117
// there is an implied type of Document for the top level and for scope documents
@@ -137,16 +142,11 @@ public override BsonType ReadBsonType(BsonTrie bsonTrie)
137142
State = BsonReaderState.EndOfDocument;
138143
return BsonType.EndOfDocument;
139144
}
140-
object currentName;
141-
if (bsonTrie != null &&
142-
bsonTrie.TryGetValue(currentElement.Name, out currentName))
145+
if (bsonTrie != null)
143146
{
144-
CurrentName = currentName;
145-
}
146-
else
147-
{
148-
CurrentName = currentElement.Name;
147+
found = bsonTrie.TryGetValue(currentElement.Name, out value);
149148
}
149+
CurrentName = currentElement.Name;
150150
_currentValue = currentElement.Value;
151151
State = BsonReaderState.Name;
152152
break;

Bson/IO/BsonDocumentReaderBookmark.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ public class BsonDocumentReaderBookmark : BsonReaderBookmark
3333
internal BsonDocumentReaderBookmark(
3434
BsonReaderState state,
3535
BsonType currentBsonType,
36-
object currentName,
36+
string currentName,
3737
BsonDocumentReaderContext context,
3838
BsonValue currentValue)
3939
: base(state, currentBsonType, currentName)

Bson/IO/BsonReader.cs

Lines changed: 21 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ public abstract class BsonReader : IDisposable
3131
private BsonReaderSettings _settings;
3232
private BsonReaderState _state;
3333
private BsonType _currentBsonType;
34-
private object _currentName;
34+
private string _currentName;
3535

3636
// constructors
3737
/// <summary>
@@ -54,15 +54,6 @@ public BsonType CurrentBsonType
5454
protected set { _currentBsonType = value; }
5555
}
5656

57-
/// <summary>
58-
/// Gets the current name.
59-
/// </summary>
60-
public object CurrentName
61-
{
62-
get { return _currentName; }
63-
protected set { _currentName = value; }
64-
}
65-
6657
/// <summary>
6758
/// Gets the settings of the reader.
6859
/// </summary>
@@ -81,6 +72,15 @@ public BsonReaderState State
8172
}
8273

8374
// protected properties
75+
/// <summary>
76+
/// Gets the current name.
77+
/// </summary>
78+
protected string CurrentName
79+
{
80+
get { return _currentName; }
81+
set { _currentName = value; }
82+
}
83+
8484
/// <summary>
8585
/// Gets whether the BsonReader has been disposed.
8686
/// </summary>
@@ -364,17 +364,25 @@ public bool ReadBoolean(string name)
364364
return ReadBoolean();
365365
}
366366

367+
/// <summary>
368+
/// Reads a BsonType from the reader.
369+
/// </summary>
367370
/// <returns>A BsonType.</returns>
368371
public BsonType ReadBsonType()
369372
{
370-
return this.ReadBsonType(null);
373+
bool found;
374+
object value;
375+
return ReadBsonType(null, out found, out value);
371376
}
372377

373378
/// <summary>
374379
/// Reads a BsonType from the reader.
375380
/// </summary>
381+
/// <param name="bsonTrie">An optional trie to search for a value that matches the next element name.</param>
382+
/// <param name="found">Set to true if a matching value was found in the trie.</param>
383+
/// <param name="value">Set to the matching value found in the trie or null if no matching value was found.</param>
376384
/// <returns>A BsonType.</returns>
377-
public abstract BsonType ReadBsonType(BsonTrie bsonTrie);
385+
public abstract BsonType ReadBsonType<TValue>(BsonTrie<TValue> bsonTrie, out bool found, out TValue value);
378386

379387
/// <summary>
380388
/// Reads a BSON DateTime from the reader.
@@ -535,7 +543,7 @@ public string ReadName()
535543
}
536544

537545
_state = BsonReaderState.Value;
538-
return _currentName as string;
546+
return _currentName;
539547
}
540548

541549
/// <summary>

0 commit comments

Comments
 (0)