Skip to content

Commit af08a7c

Browse files
authored
Merge pull request #1169 from adamhathcock/copilot/fix-zip-parsing-regression
Fix ZIP parsing failure on non-seekable streams with short reads
2 parents a9c28a7 + 8a3be35 commit af08a7c

File tree

3 files changed

+234
-26
lines changed

3 files changed

+234
-26
lines changed

src/SharpCompress/IO/SharpCompressStream.cs

Lines changed: 105 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -206,11 +206,11 @@ public override int Read(byte[] buffer, int offset, int count)
206206
{
207207
ValidateBufferState();
208208

209-
// Fill buffer if needed
209+
// Fill buffer if needed, handling short reads from underlying stream
210210
if (_bufferedLength == 0)
211211
{
212-
_bufferedLength = Stream.Read(_buffer!, 0, _bufferSize);
213212
_bufferPosition = 0;
213+
_bufferedLength = FillBuffer(_buffer!, 0, _bufferSize);
214214
}
215215
int available = _bufferedLength - _bufferPosition;
216216
int toRead = Math.Min(count, available);
@@ -222,11 +222,8 @@ public override int Read(byte[] buffer, int offset, int count)
222222
return toRead;
223223
}
224224
// If buffer exhausted, refill
225-
int r = Stream.Read(_buffer!, 0, _bufferSize);
226-
if (r == 0)
227-
return 0;
228-
_bufferedLength = r;
229225
_bufferPosition = 0;
226+
_bufferedLength = FillBuffer(_buffer!, 0, _bufferSize);
230227
if (_bufferedLength == 0)
231228
{
232229
return 0;
@@ -250,6 +247,31 @@ public override int Read(byte[] buffer, int offset, int count)
250247
}
251248
}
252249

250+
/// <summary>
251+
/// Fills the buffer by reading from the underlying stream, handling short reads.
252+
/// Implements the ReadFully pattern: reads in a loop until buffer is full or EOF is reached.
253+
/// </summary>
254+
/// <param name="buffer">Buffer to fill</param>
255+
/// <param name="offset">Offset in buffer (always 0 in current usage)</param>
256+
/// <param name="count">Number of bytes to read</param>
257+
/// <returns>Total number of bytes read (may be less than count if EOF is reached)</returns>
258+
private int FillBuffer(byte[] buffer, int offset, int count)
259+
{
260+
// Implement ReadFully pattern but return the actual count read
261+
// This is the same logic as Utility.ReadFully but returns count instead of bool
262+
var total = 0;
263+
int read;
264+
while ((read = Stream.Read(buffer, offset + total, count - total)) > 0)
265+
{
266+
total += read;
267+
if (total >= count)
268+
{
269+
return total;
270+
}
271+
}
272+
return total;
273+
}
274+
253275
public override long Seek(long offset, SeekOrigin origin)
254276
{
255277
if (_bufferingEnabled)
@@ -324,13 +346,12 @@ CancellationToken cancellationToken
324346
{
325347
ValidateBufferState();
326348

327-
// Fill buffer if needed
349+
// Fill buffer if needed, handling short reads from underlying stream
328350
if (_bufferedLength == 0)
329351
{
330-
_bufferedLength = await Stream
331-
.ReadAsync(_buffer!, 0, _bufferSize, cancellationToken)
332-
.ConfigureAwait(false);
333352
_bufferPosition = 0;
353+
_bufferedLength = await FillBufferAsync(_buffer!, 0, _bufferSize, cancellationToken)
354+
.ConfigureAwait(false);
334355
}
335356
int available = _bufferedLength - _bufferPosition;
336357
int toRead = Math.Min(count, available);
@@ -342,13 +363,9 @@ CancellationToken cancellationToken
342363
return toRead;
343364
}
344365
// If buffer exhausted, refill
345-
int r = await Stream
346-
.ReadAsync(_buffer!, 0, _bufferSize, cancellationToken)
347-
.ConfigureAwait(false);
348-
if (r == 0)
349-
return 0;
350-
_bufferedLength = r;
351366
_bufferPosition = 0;
367+
_bufferedLength = await FillBufferAsync(_buffer!, 0, _bufferSize, cancellationToken)
368+
.ConfigureAwait(false);
352369
if (_bufferedLength == 0)
353370
{
354371
return 0;
@@ -369,6 +386,38 @@ CancellationToken cancellationToken
369386
}
370387
}
371388

389+
/// <summary>
390+
/// Async version of FillBuffer. Implements the ReadFullyAsync pattern.
391+
/// Reads in a loop until buffer is full or EOF is reached.
392+
/// </summary>
393+
private async Task<int> FillBufferAsync(
394+
byte[] buffer,
395+
int offset,
396+
int count,
397+
CancellationToken cancellationToken
398+
)
399+
{
400+
// Implement ReadFullyAsync pattern but return the actual count read
401+
// This is the same logic as Utility.ReadFullyAsync but returns count instead of bool
402+
var total = 0;
403+
int read;
404+
while (
405+
(
406+
read = await Stream
407+
.ReadAsync(buffer, offset + total, count - total, cancellationToken)
408+
.ConfigureAwait(false)
409+
) > 0
410+
)
411+
{
412+
total += read;
413+
if (total >= count)
414+
{
415+
return total;
416+
}
417+
}
418+
return total;
419+
}
420+
372421
public override async Task WriteAsync(
373422
byte[] buffer,
374423
int offset,
@@ -399,13 +448,15 @@ public override async ValueTask<int> ReadAsync(
399448
{
400449
ValidateBufferState();
401450

402-
// Fill buffer if needed
451+
// Fill buffer if needed, handling short reads from underlying stream
403452
if (_bufferedLength == 0)
404453
{
405-
_bufferedLength = await Stream
406-
.ReadAsync(_buffer.AsMemory(0, _bufferSize), cancellationToken)
407-
.ConfigureAwait(false);
408454
_bufferPosition = 0;
455+
_bufferedLength = await FillBufferMemoryAsync(
456+
_buffer.AsMemory(0, _bufferSize),
457+
cancellationToken
458+
)
459+
.ConfigureAwait(false);
409460
}
410461
int available = _bufferedLength - _bufferPosition;
411462
int toRead = Math.Min(buffer.Length, available);
@@ -417,13 +468,12 @@ public override async ValueTask<int> ReadAsync(
417468
return toRead;
418469
}
419470
// If buffer exhausted, refill
420-
int r = await Stream
421-
.ReadAsync(_buffer.AsMemory(0, _bufferSize), cancellationToken)
422-
.ConfigureAwait(false);
423-
if (r == 0)
424-
return 0;
425-
_bufferedLength = r;
426471
_bufferPosition = 0;
472+
_bufferedLength = await FillBufferMemoryAsync(
473+
_buffer.AsMemory(0, _bufferSize),
474+
cancellationToken
475+
)
476+
.ConfigureAwait(false);
427477
if (_bufferedLength == 0)
428478
{
429479
return 0;
@@ -442,6 +492,35 @@ public override async ValueTask<int> ReadAsync(
442492
}
443493
}
444494

495+
/// <summary>
496+
/// Async version of FillBuffer for Memory{byte}. Implements the ReadFullyAsync pattern.
497+
/// Reads in a loop until buffer is full or EOF is reached.
498+
/// </summary>
499+
private async ValueTask<int> FillBufferMemoryAsync(
500+
Memory<byte> buffer,
501+
CancellationToken cancellationToken
502+
)
503+
{
504+
// Implement ReadFullyAsync pattern but return the actual count read
505+
var total = 0;
506+
int read;
507+
while (
508+
(
509+
read = await Stream
510+
.ReadAsync(buffer.Slice(total), cancellationToken)
511+
.ConfigureAwait(false)
512+
) > 0
513+
)
514+
{
515+
total += read;
516+
if (total >= buffer.Length)
517+
{
518+
return total;
519+
}
520+
}
521+
return total;
522+
}
523+
445524
public override async ValueTask WriteAsync(
446525
ReadOnlyMemory<byte> buffer,
447526
CancellationToken cancellationToken = default
Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.IO;
4+
using SharpCompress.Readers;
5+
using Xunit;
6+
7+
namespace SharpCompress.Test.Zip;
8+
9+
/// <summary>
10+
/// Tests for ZIP reading with streams that return short reads.
11+
/// Reproduces the regression where ZIP parsing fails depending on Stream.Read chunking patterns.
12+
/// </summary>
13+
public class ZipShortReadTests : ReaderTests
14+
{
15+
/// <summary>
16+
/// A non-seekable stream that returns controlled short reads.
17+
/// Simulates real-world network/multipart streams that legally return fewer bytes than requested.
18+
/// </summary>
19+
private sealed class PatternReadStream : Stream
20+
{
21+
private readonly MemoryStream _inner;
22+
private readonly int _firstReadSize;
23+
private readonly int _chunkSize;
24+
private bool _firstReadDone;
25+
26+
public PatternReadStream(byte[] bytes, int firstReadSize, int chunkSize)
27+
{
28+
_inner = new MemoryStream(bytes, writable: false);
29+
_firstReadSize = firstReadSize;
30+
_chunkSize = chunkSize;
31+
}
32+
33+
public override int Read(byte[] buffer, int offset, int count)
34+
{
35+
int limit = !_firstReadDone ? _firstReadSize : _chunkSize;
36+
_firstReadDone = true;
37+
38+
int toRead = Math.Min(count, limit);
39+
return _inner.Read(buffer, offset, toRead);
40+
}
41+
42+
public override bool CanRead => true;
43+
public override bool CanSeek => false;
44+
public override bool CanWrite => false;
45+
public override long Length => throw new NotSupportedException();
46+
public override long Position
47+
{
48+
get => throw new NotSupportedException();
49+
set => throw new NotSupportedException();
50+
}
51+
52+
public override void Flush() => throw new NotSupportedException();
53+
54+
public override long Seek(long offset, SeekOrigin origin) =>
55+
throw new NotSupportedException();
56+
57+
public override void SetLength(long value) => throw new NotSupportedException();
58+
59+
public override void Write(byte[] buffer, int offset, int count) =>
60+
throw new NotSupportedException();
61+
}
62+
63+
/// <summary>
64+
/// Test that ZIP reading works correctly with short reads on non-seekable streams.
65+
/// Uses a test archive and different chunking patterns.
66+
/// </summary>
67+
[Theory]
68+
[InlineData("Zip.deflate.zip", 1000, 4096)]
69+
[InlineData("Zip.deflate.zip", 999, 4096)]
70+
[InlineData("Zip.deflate.zip", 100, 4096)]
71+
[InlineData("Zip.deflate.zip", 50, 512)]
72+
[InlineData("Zip.deflate.zip", 1, 1)] // Extreme case: 1 byte at a time
73+
[InlineData("Zip.deflate.dd.zip", 1000, 4096)]
74+
[InlineData("Zip.deflate.dd.zip", 999, 4096)]
75+
[InlineData("Zip.zip64.zip", 3816, 4096)]
76+
[InlineData("Zip.zip64.zip", 3815, 4096)] // Similar to the issue pattern
77+
public void Zip_Reader_Handles_Short_Reads(string zipFile, int firstReadSize, int chunkSize)
78+
{
79+
// Use an existing test ZIP file
80+
var zipPath = Path.Combine(TEST_ARCHIVES_PATH, zipFile);
81+
if (!File.Exists(zipPath))
82+
{
83+
return; // Skip if file doesn't exist
84+
}
85+
86+
var bytes = File.ReadAllBytes(zipPath);
87+
88+
// Baseline with MemoryStream (seekable, no short reads)
89+
var baseline = ReadEntriesFromStream(new MemoryStream(bytes, writable: false));
90+
Assert.NotEmpty(baseline);
91+
92+
// Non-seekable stream with controlled short read pattern
93+
var chunked = ReadEntriesFromStream(new PatternReadStream(bytes, firstReadSize, chunkSize));
94+
Assert.Equal(baseline, chunked);
95+
}
96+
97+
private List<string> ReadEntriesFromStream(Stream stream)
98+
{
99+
var names = new List<string>();
100+
using var reader = ReaderFactory.Open(stream, new ReaderOptions { LeaveStreamOpen = true });
101+
102+
while (reader.MoveToNextEntry())
103+
{
104+
if (reader.Entry.IsDirectory)
105+
{
106+
continue;
107+
}
108+
109+
names.Add(reader.Entry.Key!);
110+
111+
using var entryStream = reader.OpenEntryStream();
112+
entryStream.CopyTo(Stream.Null);
113+
}
114+
115+
return names;
116+
}
117+
}

tests/SharpCompress.Test/packages.lock.json

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,12 @@
2929
"Microsoft.NETFramework.ReferenceAssemblies.net48": "1.0.3"
3030
}
3131
},
32+
"Mono.Posix.NETStandard": {
33+
"type": "Direct",
34+
"requested": "[1.0.0, )",
35+
"resolved": "1.0.0",
36+
"contentHash": "vSN/L1uaVwKsiLa95bYu2SGkF0iY3xMblTfxc8alSziPuVfJpj3geVqHGAA75J7cZkMuKpFVikz82Lo6y6LLdA=="
37+
},
3238
"xunit": {
3339
"type": "Direct",
3440
"requested": "[2.9.3, )",
@@ -216,6 +222,12 @@
216222
"Microsoft.NETFramework.ReferenceAssemblies.net461": "1.0.3"
217223
}
218224
},
225+
"Mono.Posix.NETStandard": {
226+
"type": "Direct",
227+
"requested": "[1.0.0, )",
228+
"resolved": "1.0.0",
229+
"contentHash": "vSN/L1uaVwKsiLa95bYu2SGkF0iY3xMblTfxc8alSziPuVfJpj3geVqHGAA75J7cZkMuKpFVikz82Lo6y6LLdA=="
230+
},
219231
"xunit": {
220232
"type": "Direct",
221233
"requested": "[2.9.3, )",

0 commit comments

Comments
 (0)