Improvements in unit tests

josesimoes · josesimoes · commit 4249524dddcd · 2025-12-09T18:26:41.000Z
- Move encoding unit tests to separate project (where in StringBuilder one which was hindeing discovery). - Add new enconding/deconding unit tests (following #237)
diff --git a/Tests/NFUnitTestEncoding/EncodingTests.cs b/Tests/NFUnitTestEncoding/EncodingTests.cs
@@ -275,5 +275,233 @@ public void Utf8EncodingTests_TestFullASCIIRange()
          byte[] reencoded = Encoding.UTF8.GetBytes(decoded);
          RoundtripUtf8(reencoded, input, 127);
      }
+
+     // NEW TESTS FOR FIXES
+
+     [TestMethod]
+     public void Utf8EncodingTests_TestIncrementalDecodingExactBuffer()
+     {
+ // Test the fix for exact buffer size (no space for null terminator)
+         // This was the original issue where iMaxChars=1, outputUTF16_size=1
+         string testString = "AB";
+ byte[] utf8Bytes = Encoding.UTF8.GetBytes(testString);
+     
+         // Decode one character at a time with exact buffer
+     char[] outputChars = new char[1];
+    int bytesUsed, charsUsed;
+         bool completed;
+         
+         var decoder = Encoding.UTF8.GetDecoder();
+decoder.Convert(utf8Bytes, 0, 1, outputChars, 0, 1, false, out bytesUsed, out charsUsed, out completed);
+   
+   Assert.AreEqual(1, bytesUsed);
+         Assert.AreEqual(1, charsUsed);
+  Assert.AreEqual('A', outputChars[0]);
+     }
+
+     [TestMethod]
+     public void Utf8EncodingTests_TestInvalidSurrogatePairHandling()
+     {
+         // Test UTF-16 to UTF-8 conversion with invalid surrogate pairs
+         // High surrogate (0xD800) followed by a regular character 'A' (0x41)
+    // The high surrogate should be replaced with U+FFFD and 'A' should be preserved
+ 
+      // Create string with high surrogate followed by 'A'
+         char[] chars = new char[] { (char)0xD800, 'A', 'B' };
+         string testString = new string(chars);
+      
+         byte[] encoded = Encoding.UTF8.GetBytes(testString);
+
+         // Expect: U+FFFD (0xEF 0xBF 0xBD) + 'A' (0x41) + 'B' (0x42)
+  byte[] expected = new byte[] { 0xEF, 0xBF, 0xBD, 0x41, 0x42 };
+         CollectionAssert.AreEqual(expected, encoded);
+     }
+
+     [TestMethod]
+     public void Utf8EncodingTests_TestInvalidSurrogatePairMiddle()
+     {
+         // High surrogate followed by another high surrogate
+      char[] chars = new char[] { 'A', (char)0xD800, (char)0xD801, 'B' };
+ string testString = new string(chars);
+         
+         byte[] encoded = Encoding.UTF8.GetBytes(testString);
+         
+ // Expect: 'A' (0x41) + U+FFFD (0xEF 0xBF 0xBD) + U+FFFD (0xEF 0xBF 0xBD) + 'B' (0x42)
+     byte[] expected = new byte[] { 0x41, 0xEF, 0xBF, 0xBD, 0xEF, 0xBF, 0xBD, 0x42 };
+ CollectionAssert.AreEqual(expected, encoded);
+     }
+
+     [TestMethod]
+     public void Utf8EncodingTests_TestUnpairedLowSurrogate()
+     {
+// Low surrogate without preceding high surrogate
+         char[] chars = new char[] { 'A', (char)0xDC00, 'B' };
+         string testString = new string(chars);
+       
+         byte[] encoded = Encoding.UTF8.GetBytes(testString);
+     
+         // Expect: 'A' (0x41) + U+FFFD (0xEF 0xBF 0xBD) + 'B' (0x42)
+         byte[] expected = new byte[] { 0x41, 0xEF, 0xBF, 0xBD, 0x42 };
+CollectionAssert.AreEqual(expected, encoded);
+     }
+
+     [TestMethod]
+   public void Utf8EncodingTests_TestUnpairedHighSurrogateAtEnd()
+     {
+         // High surrogate at the end of input
+         char[] chars = new char[] { 'A', 'B', (char)0xD800 };
+   string testString = new string(chars);
+         
+  byte[] encoded = Encoding.UTF8.GetBytes(testString);
+         
+      // Expect: 'A' (0x41) + 'B' (0x42) + U+FFFD (0xEF 0xBF 0xBD)
+         byte[] expected = new byte[] { 0x41, 0x42, 0xEF, 0xBF, 0xBD };
+         CollectionAssert.AreEqual(expected, encoded);
+     }
+
+     [TestMethod]
+  public void Utf8EncodingTests_TestPartial2ByteSequence()
+     {
+         // Start of 2-byte sequence without continuation byte
+         byte[] input = new byte[] { 0x41, 0xC2 }; // 'A' followed by incomplete 2-byte sequence
+       byte[] expected = new byte[] { 0x41, 0xEF, 0xBF, 0xBD };
+  RoundtripUtf8(input, expected, 2);
+     }
+
+     [TestMethod]
+     public void Utf8EncodingTests_TestPartial3ByteSequence()
+     {
+         // Start of 3-byte sequence with only 1 continuation byte
+         byte[] input = new byte[] { 0x41, 0xE2, 0x82 }; // 'A' followed by incomplete 3-byte sequence
+   byte[] expected = new byte[] { 0x41, 0xEF, 0xBF, 0xBD, 0xEF, 0xBF, 0xBD };
+         RoundtripUtf8(input, expected, 3);
+     }
+
+     [TestMethod]
+     public void Utf8EncodingTests_TestPartial4ByteSequence()
+ {
+       // Start of 4-byte sequence with only 2 continuation bytes
+         byte[] input = new byte[] { 0x41, 0xF0, 0x9F, 0x98 }; // 'A' followed by incomplete 4-byte sequence
+  byte[] expected = new byte[] { 0x41, 0xEF, 0xBF, 0xBD, 0xEF, 0xBF, 0xBD, 0xEF, 0xBF, 0xBD };
+         RoundtripUtf8(input, expected, 4);
+     }
+
+     [TestMethod]
+     public void Utf8EncodingTests_TestMixedValidAndInvalidSequences()
+ {
+         // Mix of valid and invalid sequences
+  byte[] input = new byte[] 
+         { 
+     0x41,   // 'A' - valid ASCII
+ 0xC2, 0xA9,       // © - valid 2-byte
+             0xE2, 0x82,  // incomplete 3-byte
+             0x42,             // 'B' - valid ASCII
+       0xF0, 0x9F, 0x98, 0x80, // 😀 - valid 4-byte
+        0xED, 0xA0, 0x80// invalid surrogate
+         };
+         
+    string decoded = Encoding.UTF8.GetString(input, 0, input.Length);
+         Assert.IsNotNull(decoded);
+     Assert.IsTrue(decoded.Contains("A"));
+         Assert.IsTrue(decoded.Contains("©"));
+         Assert.IsTrue(decoded.Contains("B"));
+     }
+
+     [TestMethod]
+   public void Utf8EncodingTests_TestValidSurrogatePair()
+ {
+         // Test proper handling of valid surrogate pairs
+         // 😀 (U+1F600) should encode to F0 9F 98 80 and decode back correctly
+         string emoji = "😀";
+      byte[] encoded = Encoding.UTF8.GetBytes(emoji);
+         
+     byte[] expected = new byte[] { 0xF0, 0x9F, 0x98, 0x80 };
+         CollectionAssert.AreEqual(expected, encoded);
+         
+         string decoded = Encoding.UTF8.GetString(encoded, 0, encoded.Length);
+         Assert.AreEqual(emoji, decoded);
+     }
+
+     [TestMethod]
+     public void Utf8EncodingTests_TestMultipleSurrogatePairs()
+     {
+         // Multiple emoji/surrogate pairs in sequence
+         string emojis = "😀😁😂";
+         byte[] encoded = Encoding.UTF8.GetBytes(emojis);
+         string decoded = Encoding.UTF8.GetString(encoded, 0, encoded.Length);
+   Assert.AreEqual(emojis, decoded);
+     }
+
+     [TestMethod]
+     public void Utf8EncodingTests_TestIncrementalDecodingMultiByte()
+     {
+         // Test incremental decoding of multi-byte sequences
+         byte[] utf8 = new byte[] { 0xE2, 0x82, 0xAC }; // €
+         
+         char[] output = new char[1];
+  int bytesUsed, charsUsed;
+ bool completed;
+         
+       var decoder = Encoding.UTF8.GetDecoder();
+      decoder.Convert(utf8, 0, 3, output, 0, 1, false, out bytesUsed, out charsUsed, out completed);
+         
+ Assert.AreEqual(3, bytesUsed);
+         Assert.AreEqual(1, charsUsed);
+       Assert.AreEqual('€', output[0]);
+     }
+
+     [TestMethod]
+     public void Utf8EncodingTests_TestOverlongEncodingRejection()
+     {
+      // Ensure overlong encodings are rejected and replaced with U+FFFD
+         // Overlong encoding of 'A' (should be 0x41, not C1 81)
+         byte[] input = new byte[] { 0xC1, 0x81 };
+       string decoded = Encoding.UTF8.GetString(input, 0, input.Length);
+ byte[] reencoded = Encoding.UTF8.GetBytes(decoded);
+         
+  // Should produce replacement characters
+         Assert.AreNotEqual(input.Length, reencoded.Length);
+         Assert.IsTrue(decoded.Contains("\uFFFD"));
+   }
+
+     [TestMethod]
+     public void Utf8EncodingTests_TestSequentialInvalidBytes()
+     {
+  // Multiple sequential invalid bytes
+ byte[] input = new byte[] { 0xFE, 0xFF, 0xFE };
+         string decoded = Encoding.UTF8.GetString(input, 0, input.Length);
+         byte[] reencoded = Encoding.UTF8.GetBytes(decoded);
+         
+         // Each invalid byte should become one replacement character
+   byte[] expected = new byte[] { 0xEF, 0xBF, 0xBD, 0xEF, 0xBF, 0xBD, 0xEF, 0xBF, 0xBD };
+         CollectionAssert.AreEqual(expected, reencoded);
+  Assert.AreEqual(3, decoded.Length);
+     }
+
+   [TestMethod]
+public void Utf8EncodingTests_TestBoundaryCodepoints()
+ {
+         // Test boundary values for different UTF-8 sequence lengths
+      
+   // U+007F - last 1-byte character
+         byte[] input1 = new byte[] { 0x7F };
+    RoundtripUtf8(input1, input1, 1);
+    
+         // U+0080 - first 2-byte character  
+ byte[] input2 = new byte[] { 0xC2, 0x80 };
+         RoundtripUtf8(input2, input2, 1);
+         
+         // U+07FF - last 2-byte character
+         byte[] input3 = new byte[] { 0xDF, 0xBF };
+         RoundtripUtf8(input3, input3, 1);
+       
+         // U+0800 - first 3-byte character
+       byte[] input4 = new byte[] { 0xE0, 0xA0, 0x80 };
+  RoundtripUtf8(input4, input4, 1);
+    
+         // U+FFFF - last 3-byte character (excluding surrogates)
+         byte[] input5 = new byte[] { 0xEF, 0xBF, 0xBF };
+         RoundtripUtf8(input5, input5, 1);
+     }
  }
 }
diff --git a/Tests/NFUnitTestEncoding/NFUnitTestEncoding.nfproj b/Tests/NFUnitTestEncoding/NFUnitTestEncoding.nfproj
@@ -0,0 +1,52 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="Current" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <PropertyGroup Label="Globals">
+    <NanoFrameworkProjectSystemPath>$(MSBuildExtensionsPath)\nanoFramework\v1.0\</NanoFrameworkProjectSystemPath>
+  </PropertyGroup>
+  <Import Project="$(NanoFrameworkProjectSystemPath)NFProjectSystem.Default.props" Condition="Exists('$(NanoFrameworkProjectSystemPath)NFProjectSystem.Default.props')" />
+  <ItemGroup>
+    <ProjectCapability Include="TestContainer" />
+  </ItemGroup>
+  <PropertyGroup>
+    <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+    <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+    <ProjectTypeGuids>{11A8DD76-328B-46DF-9F39-F559912D0360};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}</ProjectTypeGuids>
+    <ProjectGuid>5cc54ee8-8145-4b94-adf0-cd0879fcdf7d</ProjectGuid>
+    <OutputType>Library</OutputType>
+    <AppDesignerFolder>Properties</AppDesignerFolder>
+    <FileAlignment>512</FileAlignment>
+    <RootNamespace>NFUnitTestEncoding</RootNamespace>
+    <AssemblyName>NFUnitTest</AssemblyName>
+    <IsCodedUITest>False</IsCodedUITest>
+    <IsTestProject>true</IsTestProject>
+    <TestProjectType>UnitTest</TestProjectType>
+    <TargetFrameworkVersion>v1.0</TargetFrameworkVersion>
+  </PropertyGroup>
+  <Import Project="$(NanoFrameworkProjectSystemPath)NFProjectSystem.props" Condition="Exists('$(NanoFrameworkProjectSystemPath)NFProjectSystem.props')" />
+  <PropertyGroup>
+    <RunSettingsFilePath>$(MSBuildProjectDirectory)\nano.runsettings</RunSettingsFilePath>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="EncodingTests.cs" />
+    <Compile Include="Properties\AssemblyInfo.cs" />
+  </ItemGroup>
+  <ItemGroup>
+    <Reference Include="mscorlib">
+      <HintPath>..\..\packages\nanoFramework.CoreLibrary.1.17.11\lib\mscorlib.dll</HintPath>
+    </Reference>
+    <Reference Include="nanoFramework.TestFramework">
+      <HintPath>..\..\packages\nanoFramework.TestFramework.3.0.77\lib\nanoFramework.TestFramework.dll</HintPath>
+    </Reference>
+    <Reference Include="nanoFramework.UnitTestLauncher">
+      <HintPath>..\..\packages\nanoFramework.TestFramework.3.0.77\lib\nanoFramework.UnitTestLauncher.exe</HintPath>
+    </Reference>
+  </ItemGroup>
+  <ItemGroup>
+    <None Include="nano.runsettings" />
+    <None Include="packages.config" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\..\nanoFramework.System.Text\nanoFramework.System.Text.nfproj" />
+  </ItemGroup>
+  <Import Project="$(NanoFrameworkProjectSystemPath)NFProjectSystem.CSharp.targets" Condition="Exists('$(NanoFrameworkProjectSystemPath)NFProjectSystem.CSharp.targets')" />
+</Project>
diff --git a/Tests/NFUnitTestEncoding/Properties/AssemblyInfo.cs b/Tests/NFUnitTestEncoding/Properties/AssemblyInfo.cs
@@ -0,0 +1,31 @@
+﻿using System.Reflection;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+// General Information about an assembly is controlled through the following 
+// set of attributes. Change these attribute values to modify the information
+// associated with an assembly.
+[assembly: AssemblyDescription("")]
+[assembly: AssemblyConfiguration("")]
+[assembly: AssemblyCompany("")]
+[assembly: AssemblyCopyright("Copyright (c) 2021 nanoFramework contributors")]
+[assembly: AssemblyTrademark("")]
+[assembly: AssemblyCulture("")]
+
+// Setting ComVisible to false makes the types in this assembly not visible 
+// to COM components.  If you need to access a type in this assembly from 
+// COM, set the ComVisible attribute to true on that type.
+[assembly: ComVisible(false)]
+
+// Version information for an assembly consists of the following four values:
+//
+//      Major Version
+//      Minor Version 
+//      Build Number
+//      Revision
+//
+// You can specify all the values or you can default the Build and Revision Numbers 
+// by using the '*' as shown below:
+// [assembly: AssemblyVersion("1.0.*")]
+[assembly: AssemblyVersion("1.0.0.0")]
+[assembly: AssemblyFileVersion("1.0.0.0")]
diff --git a/Tests/NFUnitTestEncoding/nano.runsettings b/Tests/NFUnitTestEncoding/nano.runsettings
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="utf-8"?>
+<RunSettings>
+   <!-- Configurations that affect the Test Framework -->
+   <RunConfiguration>
+       <ResultsDirectory>.\TestResults</ResultsDirectory><!-- Path relative to solution directory -->
+       <TestSessionTimeout>120000</TestSessionTimeout><!-- Milliseconds -->
+       <TargetFrameworkVersion>net48</TargetFrameworkVersion>
+       <TargetPlatform>x64</TargetPlatform>
+   </RunConfiguration>
+   <nanoFrameworkAdapter>
+       <Logging>None</Logging> <!--Set to the desired level of logging for Unit Test execution. Possible values are: None, Detailed, Verbose, Error. -->
+       <IsRealHardware>False</IsRealHardware><!--Set to true to run tests on real hardware. -->
+       <RealHardwarePort>COM3</RealHardwarePort><!--Specify the COM port to use to connect to a nanoDevice. If none is specified, a device detection is performed and the 1st available one will be used. -->
+       <CLRVersion></CLRVersion><!--Specify the nanoCLR version to use. If not specified, the latest available will be used. -->
+       <PathToLocalCLRInstance></PathToLocalCLRInstance><!--Specify the path to a local nanoCLR instance. If not specified, the default one installed with nanoclr CLR witll be used. -->
+   </nanoFrameworkAdapter>
+</RunSettings>
diff --git a/Tests/NFUnitTestEncoding/packages.config b/Tests/NFUnitTestEncoding/packages.config
@@ -0,0 +1,5 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<packages>
+  <package id="nanoFramework.CoreLibrary" version="1.17.11" targetFramework="netnano1.0" />
+  <package id="nanoFramework.TestFramework" version="3.0.77" targetFramework="netnano1.0" developmentDependency="true" />
+</packages>
diff --git a/Tests/NFUnitTestEncoding/packages.lock.json b/Tests/NFUnitTestEncoding/packages.lock.json
@@ -0,0 +1,19 @@
+{
+  "version": 1,
+  "dependencies": {
+    ".NETnanoFramework,Version=v1.0": {
+      "nanoFramework.CoreLibrary": {
+        "type": "Direct",
+        "requested": "[1.17.11, 1.17.11]",
+        "resolved": "1.17.11",
+        "contentHash": "HezzAc0o2XrSGf85xSeD/6xsO6ohF9hX6/iMQ1IZS6Zw6umr4WfAN2Jv0BrPxkaYwzEegJxxZujkHoUIAqtOMw=="
+      },
+      "nanoFramework.TestFramework": {
+        "type": "Direct",
+        "requested": "[3.0.77, 3.0.77]",
+        "resolved": "3.0.77",
+        "contentHash": "Py5W1oN84KMBmOOHCzdz6pyi3bZTnQu9BoqIx0KGqkhG3V8kGoem/t+BuCM0pMIWAyl2iMP1n2S9624YXmBJZw=="
+      }
+    }
+  }
+}
diff --git a/Tests/NFUnitTestStringBuilder/NFUnitTestStringBuilder.nfproj b/Tests/NFUnitTestStringBuilder/NFUnitTestStringBuilder.nfproj
@@ -26,7 +26,6 @@
   </PropertyGroup>
   <Import Project="$(NanoFrameworkProjectSystemPath)NFProjectSystem.props" Condition="Exists('$(NanoFrameworkProjectSystemPath)NFProjectSystem.props')" />
   <ItemGroup>
-    <Compile Include="EncodingTests.cs" />
     <Compile Include="Properties\AssemblyInfo.cs" />
     <Compile Include="StringBuilderTests.cs" />
   </ItemGroup>
diff --git a/nanoFramework.System.Text.sln b/nanoFramework.System.Text.sln