Skip to content

Commit 9a7bdd3

Browse files
authored
Merge pull request #1172 from adamhathcock/copilot/fix-sevenzip-contiguous-streams
Fix SevenZipReader to maintain contiguous stream state for solid archives
2 parents af08a7c + 484bc74 commit 9a7bdd3

File tree

2 files changed

+162
-12
lines changed

2 files changed

+162
-12
lines changed

src/SharpCompress/Archives/SevenZip/SevenZipArchive.cs

Lines changed: 68 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -212,10 +212,31 @@ protected override IReader CreateReaderForSolidExtraction() =>
212212
public override long TotalSize =>
213213
_database?._packSizes.Aggregate(0L, (total, packSize) => total + packSize) ?? 0;
214214

215-
private sealed class SevenZipReader : AbstractReader<SevenZipEntry, SevenZipVolume>
215+
internal sealed class SevenZipReader : AbstractReader<SevenZipEntry, SevenZipVolume>
216216
{
217217
private readonly SevenZipArchive _archive;
218218
private SevenZipEntry? _currentEntry;
219+
private Stream? _currentFolderStream;
220+
private CFolder? _currentFolder;
221+
222+
/// <summary>
223+
/// Enables internal diagnostics for tests.
224+
/// When disabled (default), diagnostics properties return null to avoid exposing internal state.
225+
/// </summary>
226+
internal bool DiagnosticsEnabled { get; set; }
227+
228+
/// <summary>
229+
/// Current folder instance used to decide whether the solid folder stream should be reused.
230+
/// Only available when <see cref="DiagnosticsEnabled"/> is true.
231+
/// </summary>
232+
internal object? DiagnosticsCurrentFolder => DiagnosticsEnabled ? _currentFolder : null;
233+
234+
/// <summary>
235+
/// Current shared folder stream instance.
236+
/// Only available when <see cref="DiagnosticsEnabled"/> is true.
237+
/// </summary>
238+
internal Stream? DiagnosticsCurrentFolderStream =>
239+
DiagnosticsEnabled ? _currentFolderStream : null;
219240

220241
internal SevenZipReader(ReaderOptions readerOptions, SevenZipArchive archive)
221242
: base(readerOptions, ArchiveType.SevenZip) => this._archive = archive;
@@ -231,9 +252,10 @@ protected override IEnumerable<SevenZipEntry> GetEntries(Stream stream)
231252
_currentEntry = dir;
232253
yield return dir;
233254
}
234-
// For non-directory entries, yield them without creating shared streams
235-
// Each call to GetEntryStream() will create a fresh decompression stream
236-
// to avoid state corruption issues with async operations
255+
// For solid archives (entries in the same folder share a compressed stream),
256+
// we must iterate entries sequentially and maintain the folder stream state
257+
// across entries in the same folder to avoid recreating the decompression
258+
// stream for each file, which breaks contiguous streaming.
237259
foreach (var entry in entries.Where(x => !x.IsDirectory))
238260
{
239261
_currentEntry = entry;
@@ -243,19 +265,53 @@ protected override IEnumerable<SevenZipEntry> GetEntries(Stream stream)
243265

244266
protected override EntryStream GetEntryStream()
245267
{
246-
// Create a fresh decompression stream for each file (no state sharing).
247-
// However, the LZMA decoder has bugs in its async implementation that cause
248-
// state corruption even on fresh streams. The SyncOnlyStream wrapper
249-
// works around these bugs by forcing async operations to use sync equivalents.
250-
//
251-
// TODO: Fix the LZMA decoder async bugs (in LzmaStream, Decoder, OutWindow)
252-
// so this wrapper is no longer necessary.
253268
var entry = _currentEntry.NotNull("currentEntry is not null");
254269
if (entry.IsDirectory)
255270
{
256271
return CreateEntryStream(Stream.Null);
257272
}
258-
return CreateEntryStream(new SyncOnlyStream(entry.FilePart.GetCompressedStream()));
273+
274+
var filePart = (SevenZipFilePart)entry.FilePart;
275+
if (!filePart.Header.HasStream)
276+
{
277+
// Entries with no underlying stream (e.g., empty files or anti-items)
278+
// should return an empty stream, matching previous behavior.
279+
return CreateEntryStream(Stream.Null);
280+
}
281+
282+
var folder = filePart.Folder;
283+
// Check if we're starting a new folder - dispose old folder stream if needed
284+
if (folder != _currentFolder)
285+
{
286+
_currentFolderStream?.Dispose();
287+
_currentFolderStream = null;
288+
_currentFolder = folder;
289+
}
290+
291+
// Create the folder stream once per folder
292+
if (_currentFolderStream is null)
293+
{
294+
_currentFolderStream = _archive._database!.GetFolderStream(
295+
_archive.Volumes.Single().Stream,
296+
folder!,
297+
_archive._database.PasswordProvider
298+
);
299+
}
300+
301+
// Wrap with SyncOnlyStream to work around LZMA async bugs
302+
// Return a ReadOnlySubStream that reads from the shared folder stream
303+
return CreateEntryStream(
304+
new SyncOnlyStream(
305+
new ReadOnlySubStream(_currentFolderStream, entry.Size, leaveOpen: true)
306+
)
307+
);
308+
}
309+
310+
public override void Dispose()
311+
{
312+
_currentFolderStream?.Dispose();
313+
_currentFolderStream = null;
314+
base.Dispose();
259315
}
260316
}
261317

tests/SharpCompress.Test/SevenZip/SevenZipArchiveTests.cs

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,4 +251,98 @@ public void SevenZipArchive_TestSolidDetection()
251251
);
252252
Assert.False(nonSolidArchive.IsSolid);
253253
}
254+
255+
[Fact]
256+
public void SevenZipArchive_Solid_ExtractAllEntries_Contiguous()
257+
{
258+
// This test verifies that solid archives iterate entries as contiguous streams
259+
// rather than recreating the decompression stream for each entry
260+
var testArchive = Path.Combine(TEST_ARCHIVES_PATH, "7Zip.solid.7z");
261+
using var archive = SevenZipArchive.Open(testArchive);
262+
Assert.True(archive.IsSolid);
263+
264+
using var reader = archive.ExtractAllEntries();
265+
while (reader.MoveToNextEntry())
266+
{
267+
if (!reader.Entry.IsDirectory)
268+
{
269+
reader.WriteEntryToDirectory(
270+
SCRATCH_FILES_PATH,
271+
new ExtractionOptions { ExtractFullPath = true, Overwrite = true }
272+
);
273+
}
274+
}
275+
276+
VerifyFiles();
277+
}
278+
279+
[Fact]
280+
public void SevenZipArchive_Solid_VerifyStreamReuse()
281+
{
282+
// This test verifies that the folder stream is reused within each folder
283+
// and not recreated for each entry in solid archives
284+
var testArchive = Path.Combine(TEST_ARCHIVES_PATH, "7Zip.solid.7z");
285+
using var archive = SevenZipArchive.Open(testArchive);
286+
Assert.True(archive.IsSolid);
287+
288+
using var reader = archive.ExtractAllEntries();
289+
290+
var sevenZipReader = Assert.IsType<SevenZipArchive.SevenZipReader>(reader);
291+
sevenZipReader.DiagnosticsEnabled = true;
292+
293+
Stream? currentFolderStreamInstance = null;
294+
object? currentFolder = null;
295+
var entryCount = 0;
296+
var entriesInCurrentFolder = 0;
297+
var streamRecreationsWithinFolder = 0;
298+
299+
while (reader.MoveToNextEntry())
300+
{
301+
if (!reader.Entry.IsDirectory)
302+
{
303+
// Extract the entry to trigger GetEntryStream
304+
using var entryStream = reader.OpenEntryStream();
305+
var buffer = new byte[4096];
306+
while (entryStream.Read(buffer, 0, buffer.Length) > 0)
307+
{
308+
// Read the stream to completion
309+
}
310+
311+
entryCount++;
312+
313+
var folderStream = sevenZipReader.DiagnosticsCurrentFolderStream;
314+
var folder = sevenZipReader.DiagnosticsCurrentFolder;
315+
316+
Assert.NotNull(folderStream); // Folder stream should exist
317+
318+
// Check if we're in a new folder
319+
if (currentFolder == null || !ReferenceEquals(currentFolder, folder))
320+
{
321+
// Starting a new folder
322+
currentFolder = folder;
323+
currentFolderStreamInstance = folderStream;
324+
entriesInCurrentFolder = 1;
325+
}
326+
else
327+
{
328+
// Same folder - verify stream wasn't recreated
329+
entriesInCurrentFolder++;
330+
331+
if (!ReferenceEquals(currentFolderStreamInstance, folderStream))
332+
{
333+
// Stream was recreated within the same folder - this is the bug we're testing for!
334+
streamRecreationsWithinFolder++;
335+
}
336+
337+
currentFolderStreamInstance = folderStream;
338+
}
339+
}
340+
}
341+
342+
// Verify we actually tested multiple entries
343+
Assert.True(entryCount > 1, "Test should have multiple entries to verify stream reuse");
344+
345+
// The critical check: within a single folder, the stream should NEVER be recreated
346+
Assert.Equal(0, streamRecreationsWithinFolder); // Folder stream should remain the same for all entries in the same folder
347+
}
254348
}

0 commit comments

Comments
 (0)