Skip to content
This repository was archived by the owner on Jan 23, 2023. It is now read-only.

Commit 17112db

Browse files
GrabYourPitchforkswtgodbe
authored andcommitted
Fix EncoderNLS / DecoderNLS regression in reporting error index (#25397)
Also fixes incorrect asserts in the Encoding type
1 parent cf14314 commit 17112db

File tree

3 files changed

+23
-12
lines changed

3 files changed

+23
-12
lines changed

src/System.Private.CoreLib/shared/System/Text/DecoderNLS.cs

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -296,9 +296,10 @@ internal int DrainLeftoverDataForGetCharCount(ReadOnlySpan<byte> bytes, out int
296296
break;
297297
}
298298

299-
// Couldn't decode the buffer. Fallback the buffer instead.
299+
// Couldn't decode the buffer. Fallback the buffer instead. See comment in DrainLeftoverDataForGetChars
300+
// for more information on why a negative index is provided.
300301

301-
if (FallbackBuffer.Fallback(combinedBuffer.Slice(0, combinedBufferBytesConsumed).ToArray(), index: 0))
302+
if (FallbackBuffer.Fallback(combinedBuffer.Slice(0, combinedBufferBytesConsumed).ToArray(), index: -_leftoverByteCount))
302303
{
303304
charCount = _fallbackBuffer!.DrainRemainingDataForGetCharCount();
304305
Debug.Assert(charCount >= 0, "Fallback buffer shouldn't have returned a negative char count.");
@@ -360,9 +361,13 @@ internal int DrainLeftoverDataForGetChars(ReadOnlySpan<byte> bytes, Span<char> c
360361
break;
361362
}
362363

363-
// Couldn't decode the buffer. Fallback the buffer instead.
364+
// Couldn't decode the buffer. Fallback the buffer instead. The fallback mechanism relies
365+
// on a negative index to convey "the start of the invalid sequence was some number of
366+
// bytes back before the current buffer." Since we know the invalid sequence must have
367+
// started at the beginning of our leftover byte buffer, we can signal to our caller that
368+
// they must backtrack that many bytes to find the real start of the invalid sequence.
364369

365-
if (FallbackBuffer.Fallback(combinedBuffer.Slice(0, combinedBufferBytesConsumed).ToArray(), index: 0)
370+
if (FallbackBuffer.Fallback(combinedBuffer.Slice(0, combinedBufferBytesConsumed).ToArray(), index: -_leftoverByteCount)
366371
&& !_fallbackBuffer!.TryDrainRemainingDataForGetChars(chars, out charsWritten))
367372
{
368373
goto DestinationTooSmall;

src/System.Private.CoreLib/shared/System/Text/EncoderNLS.cs

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -302,12 +302,18 @@ internal int DrainLeftoverDataForGetByteCount(ReadOnlySpan<char> chars, out int
302302
}
303303
else
304304
{
305-
didFallback = FallbackBuffer.Fallback(_charLeftOver, secondChar, index: 0);
305+
// The fallback mechanism relies on a negative index to convey "the start of the invalid
306+
// sequence was some number of chars back before the current buffer." In this block and
307+
// in the block immediately thereafter, we know we have a single leftover high surrogate
308+
// character from a previous operation, so we provide an index of -1 to convey that the
309+
// char immediately before the current buffer was the start of the invalid sequence.
310+
311+
didFallback = FallbackBuffer.Fallback(_charLeftOver, secondChar, index: -1);
306312
}
307313
}
308314
else
309315
{
310-
didFallback = FallbackBuffer.Fallback(_charLeftOver, index: 0);
316+
didFallback = FallbackBuffer.Fallback(_charLeftOver, index: -1);
311317
}
312318

313319
// Now tally the number of bytes that would've been emitted as part of fallback.
@@ -367,7 +373,7 @@ internal bool TryDrainLeftoverDataForGetBytes(ReadOnlySpan<char> chars, Span<byt
367373
break;
368374

369375
case OperationStatus.InvalidData:
370-
FallbackBuffer.Fallback(_charLeftOver, secondChar, index: 0);
376+
FallbackBuffer.Fallback(_charLeftOver, secondChar, index: -1); // see comment in DrainLeftoverDataForGetByteCount
371377
break;
372378

373379
default:
@@ -377,7 +383,7 @@ internal bool TryDrainLeftoverDataForGetBytes(ReadOnlySpan<char> chars, Span<byt
377383
}
378384
else
379385
{
380-
FallbackBuffer.Fallback(_charLeftOver, index: 0);
386+
FallbackBuffer.Fallback(_charLeftOver, index: -1); // see comment in DrainLeftoverDataForGetByteCount
381387
}
382388
}
383389

src/System.Private.CoreLib/shared/System/Text/Encoding.Internal.cs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -257,7 +257,7 @@ private protected unsafe int GetByteCountWithFallback(char* pCharsOriginal, int
257257
private unsafe int GetByteCountWithFallback(char* pOriginalChars, int originalCharCount, int charsConsumedSoFar, EncoderNLS encoder)
258258
{
259259
Debug.Assert(encoder != null, "This code path should only be called from EncoderNLS.");
260-
Debug.Assert(0 <= charsConsumedSoFar && charsConsumedSoFar < originalCharCount, "Caller should've checked this condition.");
260+
Debug.Assert(0 <= charsConsumedSoFar && charsConsumedSoFar <= originalCharCount, "Caller should've checked this condition.");
261261

262262
// First, try draining any data that already exists on the encoder instance. If we can't complete
263263
// that operation, there's no point to continuing down to the main workhorse methods.
@@ -523,7 +523,7 @@ private protected unsafe int GetBytesWithFallback(char* pOriginalChars, int orig
523523
private unsafe int GetBytesWithFallback(char* pOriginalChars, int originalCharCount, byte* pOriginalBytes, int originalByteCount, int charsConsumedSoFar, int bytesWrittenSoFar, EncoderNLS encoder)
524524
{
525525
Debug.Assert(encoder != null, "This code path should only be called from EncoderNLS.");
526-
Debug.Assert(0 <= charsConsumedSoFar && charsConsumedSoFar < originalCharCount, "Caller should've checked this condition.");
526+
Debug.Assert(0 <= charsConsumedSoFar && charsConsumedSoFar <= originalCharCount, "Caller should've checked this condition.");
527527
Debug.Assert(0 <= bytesWrittenSoFar && bytesWrittenSoFar <= originalByteCount, "Caller should've checked this condition.");
528528

529529
// First, try draining any data that already exists on the encoder instance. If we can't complete
@@ -843,7 +843,7 @@ private protected unsafe int GetCharCountWithFallback(byte* pBytesOriginal, int
843843
private unsafe int GetCharCountWithFallback(byte* pOriginalBytes, int originalByteCount, int bytesConsumedSoFar, DecoderNLS decoder)
844844
{
845845
Debug.Assert(decoder != null, "This code path should only be called from DecoderNLS.");
846-
Debug.Assert(0 <= bytesConsumedSoFar && bytesConsumedSoFar < originalByteCount, "Caller should've checked this condition.");
846+
Debug.Assert(0 <= bytesConsumedSoFar && bytesConsumedSoFar <= originalByteCount, "Caller should've checked this condition.");
847847

848848
// First, try draining any data that already exists on the decoder instance. If we can't complete
849849
// that operation, there's no point to continuing down to the main workhorse methods.
@@ -1111,7 +1111,7 @@ private protected unsafe int GetCharsWithFallback(byte* pOriginalBytes, int orig
11111111
private protected unsafe int GetCharsWithFallback(byte* pOriginalBytes, int originalByteCount, char* pOriginalChars, int originalCharCount, int bytesConsumedSoFar, int charsWrittenSoFar, DecoderNLS decoder)
11121112
{
11131113
Debug.Assert(decoder != null, "This code path should only be called from DecoderNLS.");
1114-
Debug.Assert(0 <= bytesConsumedSoFar && bytesConsumedSoFar < originalByteCount, "Caller should've checked this condition.");
1114+
Debug.Assert(0 <= bytesConsumedSoFar && bytesConsumedSoFar <= originalByteCount, "Caller should've checked this condition.");
11151115
Debug.Assert(0 <= charsWrittenSoFar && charsWrittenSoFar <= originalCharCount, "Caller should've checked this condition.");
11161116

11171117
// First, try draining any data that already exists on the encoder instance. If we can't complete

0 commit comments

Comments
 (0)