@@ -745,14 +745,21 @@ bool IsUtf8()
745
745
}
746
746
}
747
747
748
+ private static readonly Encoding _iso88591ExceptionFallback = Encoding . GetEncoding ( "ISO-8859-1" , new EncoderExceptionFallback ( ) , new DecoderExceptionFallback ( ) ) ;
748
749
/// <summary>
749
750
/// Checks if the given string can be accurately represented and retrieved in ISO-8859-1 encoding.
750
751
/// </summary>
751
752
private static bool IsValidISO ( string input )
752
753
{
753
- var bytes = Encoding . GetEncoding ( "ISO-8859-1" ) . GetBytes ( input ) ;
754
- var result = Encoding . GetEncoding ( "ISO-8859-1" ) . GetString ( bytes ) ;
755
- return String . Equals ( input , result ) ;
754
+ try
755
+ {
756
+ _ = _iso88591ExceptionFallback . GetByteCount ( input ) ;
757
+ return true ;
758
+ }
759
+ catch ( EncoderFallbackException )
760
+ {
761
+ return false ;
762
+ }
756
763
}
757
764
758
765
/// <summary>
@@ -866,18 +873,13 @@ private static BitArray PlainTextToBinaryAlphanumeric(string plainText)
866
873
return codeText ;
867
874
}
868
875
869
- /// <summary>
870
- /// Returns a string that contains the original string, with characters that cannot be encoded by a
871
- /// specified encoding (default of ISO-8859-2) with a replacement character.
872
- /// </summary>
873
- private static string ConvertToIso8859 ( string value , string Iso = "ISO-8859-2" )
874
- {
875
- Encoding iso = Encoding . GetEncoding ( Iso ) ;
876
- Encoding utf8 = Encoding . UTF8 ;
877
- byte [ ] utfBytes = utf8 . GetBytes ( value ) ;
878
- byte [ ] isoBytes = Encoding . Convert ( utf8 , iso , utfBytes ) ;
879
- return iso . GetString ( isoBytes ) ;
880
- }
876
+ private static readonly Encoding _iso8859_1 =
877
+ #if NET5_0_OR_GREATER
878
+ Encoding . Latin1 ;
879
+ #else
880
+ Encoding . GetEncoding ( "ISO-8859-1" ) ;
881
+ #endif
882
+ private static Encoding _iso8859_2 ;
881
883
882
884
/// <summary>
883
885
/// Converts plain text into a binary format using byte mode encoding, which supports various character encodings through ECI (Extended Channel Interpretations).
@@ -894,35 +896,69 @@ private static string ConvertToIso8859(string value, string Iso = "ISO-8859-2")
894
896
/// </remarks>
895
897
private static BitArray PlainTextToBinaryByte ( string plainText , EciMode eciMode , bool utf8BOM , bool forceUtf8 )
896
898
{
897
- byte [ ] codeBytes ;
899
+ Encoding targetEncoding ;
898
900
899
901
// Check if the text is valid ISO-8859-1 and UTF-8 is not forced, then encode using ISO-8859-1.
900
902
if ( IsValidISO ( plainText ) && ! forceUtf8 )
901
- codeBytes = Encoding . GetEncoding ( "ISO-8859-1" ) . GetBytes ( plainText ) ;
903
+ {
904
+ targetEncoding = _iso8859_1 ;
905
+ utf8BOM = false ;
906
+ }
902
907
else
903
908
{
904
909
// Determine the encoding based on the specified ECI mode.
905
910
switch ( eciMode )
906
911
{
907
912
case EciMode . Iso8859_1 :
908
913
// Convert text to ISO-8859-1 and encode.
909
- codeBytes = Encoding . GetEncoding ( "ISO-8859-1" ) . GetBytes ( ConvertToIso8859 ( plainText , "ISO-8859-1" ) ) ;
914
+ targetEncoding = _iso8859_1 ;
915
+ utf8BOM = false ;
910
916
break ;
911
917
case EciMode . Iso8859_2 :
918
+ // Note: ISO-8859-2 is not natively supported on .NET Core
919
+ //
920
+ // Users must install the System.Text.Encoding.CodePages package and call Encoding.RegisterProvider(CodePagesEncodingProvider.Instance)
921
+ // before using this encoding mode.
922
+ if ( _iso8859_2 == null )
923
+ _iso8859_2 = Encoding . GetEncoding ( "ISO-8859-2" ) ;
912
924
// Convert text to ISO-8859-2 and encode.
913
- codeBytes = Encoding . GetEncoding ( "ISO-8859-2" ) . GetBytes ( ConvertToIso8859 ( plainText , "ISO-8859-2" ) ) ;
925
+ targetEncoding = _iso8859_2 ;
926
+ utf8BOM = false ;
914
927
break ;
915
928
case EciMode . Default :
916
929
case EciMode . Utf8 :
917
930
default :
918
931
// Handle UTF-8 encoding, optionally adding a BOM if specified.
919
- codeBytes = utf8BOM ? Encoding . UTF8 . GetPreamble ( ) . Concat ( Encoding . UTF8 . GetBytes ( plainText ) ) . ToArray ( ) : Encoding . UTF8 . GetBytes ( plainText ) ;
932
+ targetEncoding = Encoding . UTF8 ;
920
933
break ;
921
934
}
922
935
}
923
936
937
+ #if NET5_0_OR_GREATER
938
+ // In .NET 5.0 and later, we can use stackalloc for small arrays to prevent heap allocations
939
+ int count = targetEncoding . GetByteCount ( plainText ) ;
940
+ Span < byte > codeBytes = count < 2000 ? stackalloc byte [ count ] : new byte [ count ] ;
941
+ targetEncoding . GetBytes ( plainText , codeBytes ) ;
942
+ #else
943
+ byte [ ] codeBytes ;
944
+ codeBytes = targetEncoding . GetBytes ( plainText ) ;
945
+ #endif
946
+
924
947
// Convert the array of bytes into a BitArray.
925
- return ToBitArray ( codeBytes ) ;
948
+ if ( utf8BOM )
949
+ {
950
+ // convert to bit array, leaving 24 bits for the UTF-8 preamble
951
+ var bitArray = ToBitArray ( codeBytes , 24 ) ;
952
+ // write UTF8 preamble (EF BB BF) to the BitArray
953
+ DecToBin ( 0xEF , 8 , bitArray , 0 ) ;
954
+ DecToBin ( 0xBB , 8 , bitArray , 8 ) ;
955
+ DecToBin ( 0xBF , 8 , bitArray , 16 ) ;
956
+ return bitArray ;
957
+ }
958
+ else
959
+ {
960
+ return ToBitArray ( codeBytes ) ;
961
+ }
926
962
}
927
963
928
964
/// <summary>
@@ -932,7 +968,13 @@ private static BitArray PlainTextToBinaryByte(string plainText, EciMode eciMode,
932
968
/// <param name="byteArray">The byte array to convert into a BitArray.</param>
933
969
/// <param name="prefixZeros">The number of leading zeros to prepend to the resulting BitArray.</param>
934
970
/// <returns>A BitArray representing the bits of the input byteArray, with optional leading zeros.</returns>
935
- private static BitArray ToBitArray ( byte [ ] byteArray , int prefixZeros = 0 )
971
+ private static BitArray ToBitArray (
972
+ #if NET5_0_OR_GREATER
973
+ ReadOnlySpan < byte > byteArray ,
974
+ #else
975
+ byte [ ] byteArray ,
976
+ #endif
977
+ int prefixZeros = 0 )
936
978
{
937
979
// Calculate the total number of bits in the resulting BitArray including the prefix zeros.
938
980
var bitArray = new BitArray ( ( int ) ( ( uint ) byteArray . Length * 8 ) + prefixZeros ) ;
0 commit comments