Skip to content

Commit 0fe51bd

Browse files
Remove Boyer-Moore in favor of IndexOf (#51815)
1 parent 8669c7e commit 0fe51bd

File tree

3 files changed

+88
-106
lines changed

3 files changed

+88
-106
lines changed

src/Http/WebUtilities/src/MultipartBoundary.cs

Lines changed: 6 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -7,61 +7,26 @@ namespace Microsoft.AspNetCore.WebUtilities;
77

88
internal sealed class MultipartBoundary
99
{
10-
private readonly int[] _skipTable = new int[256];
11-
private readonly string _boundary;
10+
private readonly byte[] _boundaryBytes;
1211
private bool _expectLeadingCrlf;
1312

1413
public MultipartBoundary(string boundary, bool expectLeadingCrlf = true)
1514
{
1615
ArgumentNullException.ThrowIfNull(boundary);
1716

18-
_boundary = boundary;
1917
_expectLeadingCrlf = expectLeadingCrlf;
20-
Initialize(_boundary, _expectLeadingCrlf);
21-
}
18+
_boundaryBytes = Encoding.UTF8.GetBytes("\r\n--" + boundary);
2219

23-
private void Initialize(string boundary, bool expectLeadingCrlf)
24-
{
25-
if (expectLeadingCrlf)
26-
{
27-
BoundaryBytes = Encoding.UTF8.GetBytes("\r\n--" + boundary);
28-
}
29-
else
30-
{
31-
BoundaryBytes = Encoding.UTF8.GetBytes("--" + boundary);
32-
}
3320
FinalBoundaryLength = BoundaryBytes.Length + 2; // Include the final '--' terminator.
34-
35-
var length = BoundaryBytes.Length;
36-
for (var i = 0; i < _skipTable.Length; ++i)
37-
{
38-
_skipTable[i] = length;
39-
}
40-
for (var i = 0; i < length; ++i)
41-
{
42-
_skipTable[BoundaryBytes[i]] = Math.Max(1, length - 1 - i);
43-
}
44-
}
45-
46-
public int GetSkipValue(byte input)
47-
{
48-
return _skipTable[input];
4921
}
5022

51-
public bool ExpectLeadingCrlf
23+
public void ExpectLeadingCrlf()
5224
{
53-
get { return _expectLeadingCrlf; }
54-
set
55-
{
56-
if (value != _expectLeadingCrlf)
57-
{
58-
_expectLeadingCrlf = value;
59-
Initialize(_boundary, _expectLeadingCrlf);
60-
}
61-
}
25+
_expectLeadingCrlf = true;
6226
}
6327

64-
public byte[] BoundaryBytes { get; private set; } = default!; // This gets initialized as part of Initialize called from in the ctor.
28+
// Return either "--{boundary}" or "\r\n--{boundary}" depending on if we're looking for the end of a section
29+
public ReadOnlySpan<byte> BoundaryBytes => _boundaryBytes.AsSpan(_expectLeadingCrlf ? 0 : 2);
6530

6631
public int FinalBoundaryLength { get; private set; }
6732
}

src/Http/WebUtilities/src/MultipartReader.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ public MultipartReader(string boundary, Stream stream, int bufferSize)
9696
return null;
9797
}
9898
var headers = await ReadHeadersAsync(cancellationToken);
99-
_boundary.ExpectLeadingCrlf = true;
99+
_boundary.ExpectLeadingCrlf();
100100
_currentStream = new MultipartReaderStream(_stream, _boundary) { LengthLimit = BodyLengthLimit };
101101
long? baseStreamOffset = _stream.CanSeek ? (long?)_stream.Position : null;
102102
return new MultipartSection() { Headers = headers, Body = _currentStream, BaseStreamOffset = baseStreamOffset };

src/Http/WebUtilities/src/MultipartReaderStream.cs

Lines changed: 81 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,27 @@ public override int Read(byte[] buffer, int offset, int count)
167167
}
168168
var bufferedData = _innerStream.BufferedData;
169169

170-
// scan for a boundary match, full or partial.
170+
var index = bufferedData.AsSpan().IndexOf(_boundary.BoundaryBytes);
171+
if (index >= 0)
172+
{
173+
// There is data before the boundary, we should return it to the user
174+
if (index != 0)
175+
{
176+
// Sync, it's already buffered
177+
var slice = buffer.AsSpan(0, Math.Min(buffer.Length, index));
178+
179+
var readAmount = _innerStream.Read(slice);
180+
return UpdatePosition(readAmount);
181+
}
182+
else
183+
{
184+
var length = _boundary.BoundaryBytes.Length;
185+
186+
return ReadBoundary(this, length);
187+
}
188+
}
189+
190+
// scan for a partial boundary match.
171191
int read;
172192
if (SubMatch(bufferedData, _boundary.BoundaryBytes, out var matchOffset, out var matchCount))
173193
{
@@ -181,28 +201,33 @@ public override int Read(byte[] buffer, int offset, int count)
181201
var length = _boundary.BoundaryBytes.Length;
182202
Debug.Assert(matchCount == length);
183203

204+
return ReadBoundary(this, length);
205+
}
206+
207+
// No possible boundary match within the buffered data, return the data from the buffer.
208+
read = _innerStream.Read(buffer, offset, Math.Min(count, bufferedData.Count));
209+
return UpdatePosition(read);
210+
211+
static int ReadBoundary(MultipartReaderStream stream, int length)
212+
{
184213
// "The boundary may be followed by zero or more characters of
185214
// linear whitespace. It is then terminated by either another CRLF"
186215
// or -- for the final boundary.
187-
var boundary = _bytePool.Rent(length);
188-
read = _innerStream.Read(boundary, 0, length);
189-
_bytePool.Return(boundary);
216+
var boundary = stream._bytePool.Rent(length);
217+
var read = stream._innerStream.Read(boundary, 0, length);
218+
stream._bytePool.Return(boundary);
190219
Debug.Assert(read == length); // It should have all been buffered
191220

192-
var remainder = _innerStream.ReadLine(lengthLimit: 100); // Whitespace may exceed the buffer.
221+
var remainder = stream._innerStream.ReadLine(lengthLimit: 100).AsSpan(); // Whitespace may exceed the buffer.
193222
remainder = remainder.Trim();
194-
if (string.Equals("--", remainder, StringComparison.Ordinal))
223+
if (remainder.Equals("--", StringComparison.Ordinal))
195224
{
196-
FinalBoundaryFound = true;
225+
stream.FinalBoundaryFound = true;
197226
}
198-
Debug.Assert(FinalBoundaryFound || string.Equals(string.Empty, remainder, StringComparison.Ordinal), "Un-expected data found on the boundary line: " + remainder);
199-
_finished = true;
227+
Debug.Assert(stream.FinalBoundaryFound || remainder.IsEmpty, "Un-expected data found on the boundary line: " + remainder.ToString());
228+
stream._finished = true;
200229
return 0;
201230
}
202-
203-
// No possible boundary match within the buffered data, return the data from the buffer.
204-
read = _innerStream.Read(buffer, offset, Math.Min(count, bufferedData.Count));
205-
return UpdatePosition(read);
206231
}
207232

208233
public override Task<int> ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken)
@@ -222,6 +247,27 @@ public override async ValueTask<int> ReadAsync(Memory<byte> buffer, Cancellation
222247
}
223248
var bufferedData = _innerStream.BufferedData;
224249

250+
var index = bufferedData.AsSpan().IndexOf(_boundary.BoundaryBytes);
251+
252+
if (index >= 0)
253+
{
254+
// There is data before the boundary, we should return it to the user
255+
if (index != 0)
256+
{
257+
var slice = buffer[..Math.Min(buffer.Length, index)];
258+
259+
// Sync, it's already buffered
260+
var readAmount = _innerStream.Read(slice.Span);
261+
return UpdatePosition(readAmount);
262+
}
263+
else
264+
{
265+
var length = _boundary.BoundaryBytes.Length;
266+
267+
return await ReadBoundaryAsync(this, length, cancellationToken);
268+
}
269+
}
270+
225271
// scan for a boundary match, full or partial.
226272
int matchOffset;
227273
int matchCount;
@@ -231,70 +277,52 @@ public override async ValueTask<int> ReadAsync(Memory<byte> buffer, Cancellation
231277
// We found a possible match, return any data before it.
232278
if (matchOffset > bufferedData.Offset)
233279
{
234-
// Sync, it's already buffered
235280
var slice = buffer[..Math.Min(buffer.Length, matchOffset - bufferedData.Offset)];
236281

282+
// Sync, it's already buffered
237283
read = _innerStream.Read(slice.Span);
238284
return UpdatePosition(read);
239285
}
240286

241-
var length = _boundary.BoundaryBytes!.Length;
287+
var length = _boundary.BoundaryBytes.Length;
242288
Debug.Assert(matchCount == length);
243289

290+
return await ReadBoundaryAsync(this, length, cancellationToken);
291+
}
292+
293+
// No possible boundary match within the buffered data, return the data from the buffer.
294+
read = _innerStream.Read(buffer.Span[..Math.Min(buffer.Length, bufferedData.Count)]);
295+
return UpdatePosition(read);
296+
297+
static async Task<int> ReadBoundaryAsync(MultipartReaderStream stream, int length, CancellationToken cancellationToken)
298+
{
244299
// "The boundary may be followed by zero or more characters of
245300
// linear whitespace. It is then terminated by either another CRLF"
246301
// or -- for the final boundary.
247-
var boundary = _bytePool.Rent(length);
248-
read = _innerStream.Read(boundary, 0, length);
249-
_bytePool.Return(boundary);
302+
var boundary = stream._bytePool.Rent(length);
303+
var read = stream._innerStream.Read(boundary, 0, length);
304+
stream._bytePool.Return(boundary);
250305
Debug.Assert(read == length); // It should have all been buffered
251306

252-
var remainder = await _innerStream.ReadLineAsync(lengthLimit: 100, cancellationToken: cancellationToken); // Whitespace may exceed the buffer.
307+
var remainder = await stream._innerStream.ReadLineAsync(lengthLimit: 100, cancellationToken: cancellationToken); // Whitespace may exceed the buffer.
253308
remainder = remainder.Trim();
254309
if (string.Equals("--", remainder, StringComparison.Ordinal))
255310
{
256-
FinalBoundaryFound = true;
311+
stream.FinalBoundaryFound = true;
257312
}
258-
Debug.Assert(FinalBoundaryFound || string.Equals(string.Empty, remainder, StringComparison.Ordinal), "Un-expected data found on the boundary line: " + remainder);
313+
Debug.Assert(stream.FinalBoundaryFound || string.Equals(string.Empty, remainder, StringComparison.Ordinal), "Un-expected data found on the boundary line: " + remainder);
259314

260-
_finished = true;
315+
stream._finished = true;
261316
return 0;
262317
}
263-
264-
// No possible boundary match within the buffered data, return the data from the buffer.
265-
read = _innerStream.Read(buffer.Span[..Math.Min(buffer.Length, bufferedData.Count)]);
266-
return UpdatePosition(read);
267318
}
268319

269-
// Does segment1 contain all of matchBytes, or does it end with the start of matchBytes?
270-
// 1: AAAAABBBBBCCCCC
271-
// 2: BBBBB
272-
// Or:
320+
// Does segment1 end with the start of matchBytes?
273321
// 1: AAAAABBB
274322
// 2: BBBBB
275-
private bool SubMatch(ArraySegment<byte> segment1, byte[] matchBytes, out int matchOffset, out int matchCount)
323+
private static bool SubMatch(ArraySegment<byte> segment1, ReadOnlySpan<byte> matchBytes, out int matchOffset, out int matchCount)
276324
{
277-
// case 1: does segment1 fully contain matchBytes?
278-
{
279-
var matchBytesLengthMinusOne = matchBytes.Length - 1;
280-
var matchBytesLastByte = matchBytes[matchBytesLengthMinusOne];
281-
var segmentEndMinusMatchBytesLength = segment1.Offset + segment1.Count - matchBytes.Length;
282-
283-
matchOffset = segment1.Offset;
284-
while (matchOffset < segmentEndMinusMatchBytesLength)
285-
{
286-
var lookaheadTailChar = segment1.Array![matchOffset + matchBytesLengthMinusOne];
287-
if (lookaheadTailChar == matchBytesLastByte &&
288-
CompareBuffers(segment1.Array, matchOffset, matchBytes, 0, matchBytesLengthMinusOne) == 0)
289-
{
290-
matchCount = matchBytes.Length;
291-
return true;
292-
}
293-
matchOffset += _boundary.GetSkipValue(lookaheadTailChar);
294-
}
295-
}
296-
297-
// case 2: does segment1 end with the start of matchBytes?
325+
matchOffset = Math.Max(segment1.Offset, segment1.Offset + segment1.Count - matchBytes.Length);
298326
var segmentEnd = segment1.Offset + segment1.Count;
299327

300328
// clear matchCount to zero
@@ -315,19 +343,8 @@ private bool SubMatch(ArraySegment<byte> segment1, byte[] matchBytes, out int ma
315343
break;
316344
}
317345
}
318-
return matchCount > 0;
319-
}
320346

321-
private static int CompareBuffers(byte[] buffer1, int offset1, byte[] buffer2, int offset2, int count)
322-
{
323-
for (; count-- > 0; offset1++, offset2++)
324-
{
325-
if (buffer1[offset1] != buffer2[offset2])
326-
{
327-
return buffer1[offset1] - buffer2[offset2];
328-
}
329-
}
330-
return 0;
347+
return matchCount > 0;
331348
}
332349

333350
public override void CopyTo(Stream destination, int bufferSize)

0 commit comments

Comments
 (0)