@@ -31,6 +31,8 @@ public class BsonBuffer : IDisposable
31
31
private static Stack < byte [ ] > __chunkPool = new Stack < byte [ ] > ( ) ;
32
32
private static int __maxChunkPoolSize = 64 ;
33
33
private const int __chunkSize = 16 * 1024 ; // 16KiB
34
+ private static readonly string [ ] __asciiStringTable = BuildAsciiStringTable ( ) ;
35
+ private static readonly UTF8Encoding __utf8Encoding = new UTF8Encoding ( false , true ) ; // throw on invalid bytes
34
36
private static readonly bool [ ] __validBsonTypes = new bool [ 256 ] ;
35
37
36
38
// private fields
@@ -506,17 +508,17 @@ public string ReadString()
506
508
{
507
509
if ( _disposed ) { throw new ObjectDisposedException ( "BsonBuffer" ) ; }
508
510
var length = ReadInt32 ( ) ;
509
- EnsureDataAvailable ( length + 1 ) ;
511
+ EnsureDataAvailable ( length ) ;
510
512
string value ;
511
513
if ( __chunkSize - _chunkOffset >= length - 1 )
512
514
{
513
- value = Encoding . UTF8 . GetString ( _chunk , _chunkOffset , length - 1 ) ;
515
+ value = ParseString ( _chunk , _chunkOffset , length - 1 ) ;
514
516
Position += length - 1 ;
515
517
}
516
518
else
517
519
{
518
520
// straddles chunk boundary
519
- value = Encoding . UTF8 . GetString ( ReadBytes ( length - 1 ) ) ;
521
+ value = __utf8Encoding . GetString ( ReadBytes ( length - 1 ) ) ;
520
522
}
521
523
byte terminator = ReadByte ( ) ;
522
524
if ( terminator != 0 )
@@ -543,11 +545,11 @@ public string ReadCString()
543
545
{
544
546
partialCount = _length - _position ; // populated part of last chunk
545
547
}
546
- var index = Array . IndexOf < byte > ( _chunk , 0 , _chunkOffset , partialCount ) ;
547
- if ( index != - 1 )
548
+
549
+ string value ;
550
+ var stringLength = TryParseCString ( _chunk , _chunkOffset , partialCount , out value ) ;
551
+ if ( stringLength >= 0 )
548
552
{
549
- var stringLength = index - _chunkOffset ;
550
- var value = Encoding . UTF8 . GetString ( _chunk , _chunkOffset , stringLength ) ;
551
553
Position += stringLength + 1 ;
552
554
return value ;
553
555
}
@@ -566,12 +568,12 @@ public string ReadCString()
566
568
{
567
569
partialCount = _length - localPosition ; // populated part of last chunk
568
570
}
569
- index = Array . IndexOf < byte > ( localChunk , 0 , 0 , partialCount ) ;
571
+ var index = Array . IndexOf < byte > ( localChunk , 0 , 0 , partialCount ) ;
570
572
if ( index != - 1 )
571
573
{
572
574
localPosition += index ;
573
- var stringLength = localPosition - _position ;
574
- var value = Encoding . UTF8 . GetString ( ReadBytes ( stringLength ) ) ; // ReadBytes advances over string
575
+ stringLength = localPosition - _position ;
576
+ value = __utf8Encoding . GetString ( ReadBytes ( stringLength ) ) ; // ReadBytes advances over string
575
577
Position += 1 ; // skip over null byte at end
576
578
return value ;
577
579
}
@@ -711,18 +713,18 @@ public void WriteBytes(byte[] value)
711
713
public void WriteCString ( string value )
712
714
{
713
715
if ( _disposed ) { throw new ObjectDisposedException ( "BsonBuffer" ) ; }
714
- int maxLength = Encoding . UTF8 . GetMaxByteCount ( value . Length ) + 1 ;
716
+ int maxLength = __utf8Encoding . GetMaxByteCount ( value . Length ) + 1 ;
715
717
EnsureSpaceAvailable ( maxLength ) ;
716
718
if ( __chunkSize - _chunkOffset >= maxLength )
717
719
{
718
- int length = Encoding . UTF8 . GetBytes ( value , 0 , value . Length , _chunk , _chunkOffset ) ;
720
+ int length = __utf8Encoding . GetBytes ( value , 0 , value . Length , _chunk , _chunkOffset ) ;
719
721
_chunk [ _chunkOffset + length ] = 0 ;
720
722
Position += length + 1 ;
721
723
}
722
724
else
723
725
{
724
726
// straddles chunk boundary
725
- byte [ ] bytes = Encoding . UTF8 . GetBytes ( value ) ;
727
+ byte [ ] bytes = __utf8Encoding . GetBytes ( value ) ;
726
728
WriteBytes ( bytes ) ;
727
729
WriteByte ( 0 ) ;
728
730
}
@@ -829,11 +831,11 @@ public void WriteObjectId(int timestamp, int machine, short pid, int increment)
829
831
public void WriteString ( string value )
830
832
{
831
833
if ( _disposed ) { throw new ObjectDisposedException ( "BsonBuffer" ) ; }
832
- int maxLength = Encoding . UTF8 . GetMaxByteCount ( value . Length ) + 5 ;
834
+ int maxLength = __utf8Encoding . GetMaxByteCount ( value . Length ) + 5 ;
833
835
EnsureSpaceAvailable ( maxLength ) ;
834
836
if ( __chunkSize - _chunkOffset >= maxLength )
835
837
{
836
- int length = Encoding . UTF8 . GetBytes ( value , 0 , value . Length , _chunk , _chunkOffset + 4 ) ; // write string first
838
+ int length = __utf8Encoding . GetBytes ( value , 0 , value . Length , _chunk , _chunkOffset + 4 ) ; // write string first
837
839
int lengthPlusOne = length + 1 ;
838
840
_chunk [ _chunkOffset + 0 ] = ( byte ) ( lengthPlusOne ) ; // now we know the length
839
841
_chunk [ _chunkOffset + 1 ] = ( byte ) ( lengthPlusOne >> 8 ) ;
@@ -845,7 +847,7 @@ public void WriteString(string value)
845
847
else
846
848
{
847
849
// straddles chunk boundary
848
- byte [ ] bytes = Encoding . UTF8 . GetBytes ( value ) ;
850
+ byte [ ] bytes = __utf8Encoding . GetBytes ( value ) ;
849
851
WriteInt32 ( bytes . Length + 1 ) ;
850
852
WriteBytes ( bytes ) ;
851
853
WriteByte ( 0 ) ;
@@ -897,6 +899,100 @@ public void WriteZero()
897
899
}
898
900
}
899
901
902
+ // private static methods
903
+ private static string [ ] BuildAsciiStringTable ( )
904
+ {
905
+ var asciiStringTable = new string [ 128 ] ;
906
+
907
+ for ( int i = 0 ; i < 128 ; ++ i )
908
+ {
909
+ asciiStringTable [ i ] = new string ( ( char ) i , 1 ) ;
910
+ }
911
+
912
+ return asciiStringTable ;
913
+ }
914
+
915
+ private static string ParseString ( byte [ ] buffer , int startIndex , int stringLength )
916
+ {
917
+ switch ( stringLength )
918
+ {
919
+ // special case empty strings
920
+ case 0 :
921
+ return string . Empty ;
922
+
923
+ // special case single character strings
924
+ case 1 :
925
+ var c = buffer [ startIndex ] ;
926
+ if ( c >= 128 )
927
+ {
928
+ // multiple bytes required
929
+ throw new DecoderFallbackException ( "[" + c . ToString ( "X2" ) + "] is an invalid character" ) ;
930
+ }
931
+ return __asciiStringTable [ c ] ;
932
+ }
933
+
934
+ return __utf8Encoding . GetString ( buffer , startIndex , stringLength ) ;
935
+ }
936
+
937
+ /// <returns>The number of bytes parsed excluding the null terminator; -1 otherwise.</returns>
938
+ private static int TryParseCString ( byte [ ] buffer , int startIndex , int length , out string value )
939
+ {
940
+ if ( length < 1 )
941
+ {
942
+ value = null ;
943
+ return - 1 ;
944
+ }
945
+
946
+ // special case empty strings
947
+ var c1 = buffer [ startIndex ] ;
948
+ if ( c1 == 0 )
949
+ {
950
+ value = string . Empty ;
951
+ return 0 ;
952
+ }
953
+
954
+ if ( length < 2 )
955
+ {
956
+ value = null ;
957
+ return - 1 ;
958
+ }
959
+
960
+ // special case single character strings
961
+ var c2 = buffer [ startIndex + 1 ] ;
962
+ if ( c2 == 0 )
963
+ {
964
+ if ( c1 >= 128 )
965
+ {
966
+ // multiple bytes required
967
+ throw new DecoderFallbackException ( "[" + c1 . ToString ( "X2" ) + "] is an invalid character" ) ;
968
+ }
969
+ value = __asciiStringTable [ c1 ] ;
970
+ return 1 ;
971
+ }
972
+
973
+ // special case the _id string
974
+ if ( length >= 4 &&
975
+ c1 == 0x5f && // '_'
976
+ c2 == 0x69 && // 'i'
977
+ buffer [ startIndex + 2 ] == 0x64 && // 'd'
978
+ buffer [ startIndex + 3 ] == 0 ) // '/0'
979
+ {
980
+ value = "_id" ;
981
+ return 3 ;
982
+ }
983
+
984
+ var index = Array . IndexOf < byte > ( buffer , 0 , startIndex + 2 , length - 2 ) ;
985
+ if ( index != - 1 )
986
+ {
987
+ var stringLength = index - startIndex ;
988
+ value = __utf8Encoding . GetString ( buffer , startIndex , stringLength ) ;
989
+ return stringLength ;
990
+ }
991
+
992
+ value = null ;
993
+ return - 1 ;
994
+ }
995
+
900
996
// private methods
901
997
private void EnsureDataAvailable ( int needed )
902
998
{
0 commit comments