Skip to content

Commit 62c5966

Browse files
author
rstam
committed
Merged pull request 105 (optimizes reading strings of length 0 or 1) with some changes and added some new unit tests.
1 parent f42fb8d commit 62c5966

File tree

3 files changed

+190
-41
lines changed

3 files changed

+190
-41
lines changed

Bson/IO/BsonBuffer.cs

Lines changed: 53 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,6 @@
1818
using System.IO;
1919
using System.Linq;
2020
using System.Text;
21-
using System.Threading;
22-
using System.Reflection;
2321

2422
namespace MongoDB.Bson.IO
2523
{
@@ -144,6 +142,18 @@ public int Position
144142
}
145143

146144
// private static methods
145+
private static string[] BuildAsciiStringTable()
146+
{
147+
var asciiStringTable = new string[128];
148+
149+
for (int i = 0; i < 128; ++i)
150+
{
151+
asciiStringTable[i] = new string((char)i, 1);
152+
}
153+
154+
return asciiStringTable;
155+
}
156+
147157
private static byte[] GetChunk()
148158
{
149159
lock (__chunkPool)
@@ -508,48 +518,59 @@ public void ReadObjectId(out int timestamp, out int machine, out short pid, out
508518
public string ReadString()
509519
{
510520
if (_disposed) { throw new ObjectDisposedException("BsonBuffer"); }
511-
var length = ReadInt32();
521+
var length = ReadInt32(); // length including the null terminator
522+
if (length <= 0)
523+
{
524+
var message = string.Format("Invalid string length: {0} (the length includes the null terminator so it must be greater than or equal to 1).", length);
525+
throw new FileFormatException(message);
526+
}
512527
EnsureDataAvailable(length);
513-
--length;
528+
514529
string value;
515530
if (__chunkSize - _chunkOffset >= length)
516531
{
532+
if (_chunk[_chunkOffset + length - 1] != 0)
533+
{
534+
throw new FileFormatException("String is missing null terminator.");
535+
}
536+
517537
switch (length)
518538
{
519539
// special case empty strings
520-
case 0:
540+
case 1:
521541
value = string.Empty;
522542
break;
523543

524544
// special case single character strings
525-
case 1:
545+
case 2:
526546
var c = _chunk[_chunkOffset];
527-
if (c >= 128)
547+
if (c < 127)
548+
{
549+
value = __asciiStringTable[c];
550+
}
551+
else
528552
{
529-
// multiple bytes required
530-
throw new DecoderFallbackException("[" + c.ToString("X2") + "] is an invalid character");
553+
value = __utf8Encoding.GetString(_chunk, _chunkOffset, 1); // let GetString throw a DecoderFallbackException
531554
}
532-
value = __asciiStringTable[c];
533-
++Position;
534555
break;
535556

536557
default:
537-
value = __utf8Encoding.GetString(_chunk, _chunkOffset, length);
538-
Position += length;
558+
value = __utf8Encoding.GetString(_chunk, _chunkOffset, length - 1); // don't decode the null terminator
539559
break;
540560
}
561+
Position += length;
541562
}
542563
else
543564
{
544565
// straddles chunk boundary
545-
var bytes = ReadBytes(length);
546-
value = __utf8Encoding.GetString(bytes, 0, length);
547-
}
548-
byte terminator = ReadByte();
549-
if (terminator != 0)
550-
{
551-
throw new FileFormatException("String is missing null terminator.");
566+
var bytes = ReadBytes(length); // read the null terminator also
567+
if (bytes[length - 1] != 0)
568+
{
569+
throw new FileFormatException("String is missing null terminator.");
570+
}
571+
value = __utf8Encoding.GetString(bytes, 0, length - 1); // don't decode the null terminator
552572
}
573+
553574
return value;
554575
}
555576

@@ -573,12 +594,12 @@ public string ReadCString()
573594

574595
if (partialCount > 0)
575596
{
576-
var c = _chunk[_chunkOffset];
597+
var c1 = _chunk[_chunkOffset];
577598

578599
// special case empty strings
579-
if (c == 0)
600+
if (c1 == 0)
580601
{
581-
++Position;
602+
Position += 1;
582603
return string.Empty;
583604
}
584605

@@ -587,17 +608,21 @@ public string ReadCString()
587608
// special case single character strings
588609
if (_chunk[_chunkOffset + 1] == 0)
589610
{
590-
if (c >= 128)
611+
string value;
612+
if (c1 < 127)
591613
{
592-
// multiple bytes required
593-
throw new DecoderFallbackException("[" + c.ToString("X2") + "] is an invalid character");
614+
value = __asciiStringTable[c1];
615+
}
616+
else
617+
{
618+
value = __utf8Encoding.GetString(_chunk, _chunkOffset, 1); // let GetString throw a DecoderFallbackException
594619
}
595620
Position += 2;
596-
return __asciiStringTable[c];
621+
return value;
597622
}
598623

599624
var index = Array.IndexOf<byte>(_chunk, 0, _chunkOffset + 2, partialCount - 2);
600-
if (index >= 0)
625+
if (index != -1)
601626
{
602627
var stringLength = index - _chunkOffset;
603628
var value = __utf8Encoding.GetString(_chunk, _chunkOffset, stringLength);
@@ -952,19 +977,6 @@ public void WriteZero()
952977
}
953978
}
954979

955-
// private static methods
956-
private static string[] BuildAsciiStringTable()
957-
{
958-
var asciiStringTable = new string[128];
959-
960-
for (int i = 0; i < 128; ++i)
961-
{
962-
asciiStringTable[i] = new string((char)i, 1);
963-
}
964-
965-
return asciiStringTable;
966-
}
967-
968980
// private methods
969981
private void EnsureDataAvailable(int needed)
970982
{

BsonUnitTests/BsonUnitTests.csproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@
109109
<Compile Include="DefaultSerializer\Serializers\NullableTypeSerializerTests.cs" />
110110
<Compile Include="DefaultSerializer\Serializers\DiscriminatorTests.cs" />
111111
<Compile Include="DefaultSerializer\Serializers\StructSerializerTests.cs" />
112+
<Compile Include="IO\BsonBufferTests.cs" />
112113
<Compile Include="IO\BsonBufferValueStraddlesChunksTests.cs" />
113114
<Compile Include="IO\BsonDocumentReaderTests.cs" />
114115
<Compile Include="IO\BsonDocumentWriterTests.cs" />

BsonUnitTests/IO/BsonBufferTests.cs

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
/* Copyright 2010-2012 10gen Inc.
2+
*
3+
* Licensed under the Apache License, Version 2.0 (the "License");
4+
* you may not use this file except in compliance with the License.
5+
* You may obtain a copy of the License at
6+
*
7+
* http://www.apache.org/licenses/LICENSE-2.0
8+
*
9+
* Unless required by applicable law or agreed to in writing, software
10+
* distributed under the License is distributed on an "AS IS" BASIS,
11+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
* See the License for the specific language governing permissions and
13+
* limitations under the License.
14+
*/
15+
16+
using System;
17+
using System.Collections.Generic;
18+
using System.IO;
19+
using System.Linq;
20+
using System.Text;
21+
using NUnit.Framework;
22+
23+
using MongoDB.Bson;
24+
using MongoDB.Bson.IO;
25+
using MongoDB.Bson.Serialization;
26+
27+
namespace MongoDB.BsonUnitTests.IO
28+
{
29+
[TestFixture]
30+
public class BsonBufferTests
31+
{
32+
[Test]
33+
public void TestReadCStringEmpty()
34+
{
35+
var bytes = new byte[] { 8, 0, 0, 0, (byte)BsonType.Boolean, 0, 0, 0 };
36+
Assert.AreEqual(8, bytes.Length);
37+
var document = BsonSerializer.Deserialize<BsonDocument>(bytes);
38+
Assert.AreEqual("", document.GetElement(0).Name);
39+
}
40+
41+
[Test]
42+
public void TestReadCStringOneCharacter()
43+
{
44+
var bytes = new byte[] { 9, 0, 0, 0, (byte)BsonType.Boolean, (byte)'b', 0, 0, 0 };
45+
Assert.AreEqual(9, bytes.Length);
46+
var document = BsonSerializer.Deserialize<BsonDocument>(bytes);
47+
Assert.AreEqual("b", document.GetElement(0).Name);
48+
}
49+
50+
[Test]
51+
public void TestReadCStringOneCharacterDecoderException()
52+
{
53+
var bytes = new byte[] { 9, 0, 0, 0, (byte)BsonType.Boolean, 0x80, 0, 0, 0 };
54+
Assert.AreEqual(9, bytes.Length);
55+
var ex = Assert.Throws<DecoderFallbackException>(() => { BsonSerializer.Deserialize<BsonDocument>(bytes); });
56+
}
57+
58+
[Test]
59+
public void TestReadCStringTwoCharacters()
60+
{
61+
var bytes = new byte[] { 10, 0, 0, 0, (byte)BsonType.Boolean, (byte)'b', (byte)'b', 0, 0, 0 };
62+
Assert.AreEqual(10, bytes.Length);
63+
var document = BsonSerializer.Deserialize<BsonDocument>(bytes);
64+
Assert.AreEqual("bb", document.GetElement(0).Name);
65+
}
66+
67+
[Test]
68+
public void TestReadCStringTwoCharactersDecoderException()
69+
{
70+
var bytes = new byte[] { 10, 0, 0, 0, (byte)BsonType.Boolean, (byte)'b', 0x80, 0, 0, 0 };
71+
Assert.AreEqual(10, bytes.Length);
72+
var ex = Assert.Throws<DecoderFallbackException>(() => { BsonSerializer.Deserialize<BsonDocument>(bytes); });
73+
}
74+
75+
[Test]
76+
public void TestReadStringEmpty()
77+
{
78+
var bytes = new byte[] { 13, 0, 0, 0, (byte)BsonType.String, (byte)'s', 0, 1, 0, 0, 0, 0, 0 };
79+
Assert.AreEqual(13, bytes.Length);
80+
var document = BsonSerializer.Deserialize<BsonDocument>(bytes);
81+
Assert.AreEqual("", document["s"].AsString);
82+
}
83+
84+
[Test]
85+
public void TestReadStringInvalidLength()
86+
{
87+
var bytes = new byte[] { 13, 0, 0, 0, (byte)BsonType.String, (byte)'s', 0, 0, 0, 0, 0, 0, 0 };
88+
Assert.AreEqual(13, bytes.Length);
89+
var ex = Assert.Throws<FileFormatException>(() => { BsonSerializer.Deserialize<BsonDocument>(bytes); });
90+
Assert.AreEqual("Invalid string length: 0 (the length includes the null terminator so it must be greater than or equal to 1).", ex.Message);
91+
}
92+
93+
[Test]
94+
public void TestReadStringMissingNullTerminator()
95+
{
96+
var bytes = new byte[] { 13, 0, 0, 0, (byte)BsonType.String, (byte)'s', 0, 1, 0, 0, 0, 123, 0 };
97+
Assert.AreEqual(13, bytes.Length);
98+
var ex = Assert.Throws<FileFormatException>(() => { BsonSerializer.Deserialize<BsonDocument>(bytes); });
99+
Assert.AreEqual("String is missing null terminator.", ex.Message);
100+
}
101+
102+
[Test]
103+
public void TestReadStringOneCharacter()
104+
{
105+
var bytes = new byte[] { 14, 0, 0, 0, (byte)BsonType.String, (byte)'s', 0, 2, 0, 0, 0, (byte)'x', 0, 0 };
106+
Assert.AreEqual(14, bytes.Length);
107+
var document = BsonSerializer.Deserialize<BsonDocument>(bytes);
108+
Assert.AreEqual("x", document["s"].AsString);
109+
}
110+
111+
[Test]
112+
public void TestReadStringOneCharacterDecoderException()
113+
{
114+
var bytes = new byte[] { 14, 0, 0, 0, (byte)BsonType.String, (byte)'s', 0, 2, 0, 0, 0, 0x80, 0, 0 };
115+
Assert.AreEqual(14, bytes.Length);
116+
var ex = Assert.Throws<DecoderFallbackException>(() => { BsonSerializer.Deserialize<BsonDocument>(bytes); });
117+
}
118+
119+
[Test]
120+
public void TestReadStringTwoCharacters()
121+
{
122+
var bytes = new byte[] { 15, 0, 0, 0, (byte)BsonType.String, (byte)'s', 0, 3, 0, 0, 0, (byte)'x', (byte)'y', 0, 0 };
123+
Assert.AreEqual(15, bytes.Length);
124+
var document = BsonSerializer.Deserialize<BsonDocument>(bytes);
125+
Assert.AreEqual("xy", document["s"].AsString);
126+
}
127+
128+
[Test]
129+
public void TestReadStringTwoCharactersDecoderException()
130+
{
131+
var bytes = new byte[] { 15, 0, 0, 0, (byte)BsonType.String, (byte)'s', 0, 3, 0, 0, 0, (byte)'x', 0x80, 0, 0 };
132+
Assert.AreEqual(15, bytes.Length);
133+
var ex = Assert.Throws<DecoderFallbackException>(() => { BsonSerializer.Deserialize<BsonDocument>(bytes); });
134+
}
135+
}
136+
}

0 commit comments

Comments
 (0)