Skip to content

Commit 65a86d8

Browse files
Fix mszip relying on exceptions (#52)
* Better fix * initial * Fixed * Move function to proper location, clear memory better * work so far * more * more work * No more per file seeking * Finished work for now. * First round of fixes. * Return if array is empty too
1 parent d7131df commit 65a86d8

File tree

3 files changed

+184
-8
lines changed

3 files changed

+184
-8
lines changed

SabreTools.Serialization/Readers/MicrosoftCabinet.cs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -203,13 +203,13 @@ private static CFDATA ParseDataBlock(Stream data, byte dataReservedSize)
203203
dataBlock.Checksum = data.ReadUInt32LittleEndian();
204204
dataBlock.CompressedSize = data.ReadUInt16LittleEndian();
205205
dataBlock.UncompressedSize = data.ReadUInt16LittleEndian();
206-
206+
207207
if (dataReservedSize > 0)
208-
dataBlock.ReservedData = data.ReadBytes(dataReservedSize);
208+
data.SeekIfPossible(dataReservedSize, SeekOrigin.Current);
209209

210210
if (dataBlock.CompressedSize > 0)
211-
dataBlock.CompressedData = data.ReadBytes(dataBlock.CompressedSize);
212-
211+
data.SeekIfPossible(dataBlock.CompressedSize, SeekOrigin.Current);
212+
213213
return dataBlock;
214214
}
215215

SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs

Lines changed: 76 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
using System;
2+
using System.Collections.Generic;
23
using System.IO;
34
using SabreTools.Data.Models.MicrosoftCabinet;
45
using SabreTools.IO.Extensions;
@@ -174,12 +175,22 @@ public bool Extract(string outputDirectory, bool includeDebug)
174175
// Loop through the current folders
175176
for (int f = 0; f < cabinet.Folders.Length; f++)
176177
{
178+
if (f == 0 && (cabinet.Files[0].FolderIndex == FolderIndex.CONTINUED_PREV_AND_NEXT
179+
|| cabinet.Files[0].FolderIndex == FolderIndex.CONTINUED_FROM_PREV))
180+
continue;
181+
177182
var folder = cabinet.Folders[f];
178183
allExtracted &= cabinet.ExtractFolder(Filename, outputDirectory, folder, f, ignorePrev, includeDebug);
179184
}
180185

181186
// Move to the next cabinet, if possible
187+
Array.ForEach(cabinet.Folders, folder => folder.DataBlocks = []);
188+
182189
cabinet = cabinet.Next;
190+
cabinet?.Prev = null;
191+
192+
// TODO: already-extracted data isn't being cleared from memory, at least not nearly enough.
193+
183194
if (cabinet?.Folders == null || cabinet.Folders.Length == 0)
184195
break;
185196
}
@@ -192,7 +203,7 @@ public bool Extract(string outputDirectory, bool includeDebug)
192203
return false;
193204
}
194205
}
195-
206+
196207
/// <summary>
197208
/// Extract the contents of a single folder
198209
/// </summary>
@@ -217,15 +228,77 @@ private bool ExtractFolder(string? filename,
217228

218229
// Loop through the files
219230
bool allExtracted = true;
220-
var files = GetFiles(folderIndex, ignorePrev);
231+
var filterFiles = GetSpannedFiles(filename, folderIndex, ignorePrev);
232+
List<CFFILE> fileList = [];
233+
234+
// Filtering, add debug output eventually
235+
for (int i = 0; i < filterFiles.Length; i++)
236+
{
237+
var file = filterFiles[i];
238+
239+
if (file.FolderIndex == FolderIndex.CONTINUED_PREV_AND_NEXT ||
240+
file.FolderIndex == FolderIndex.CONTINUED_FROM_PREV)
241+
{
242+
// debug output for inconsistencies would go here
243+
continue;
244+
}
245+
246+
fileList.Add(file);
247+
}
248+
249+
CFFILE[] files = fileList.ToArray();
250+
blockStream.SeekIfPossible(0, SeekOrigin.Begin);
221251
for (int i = 0; i < files.Length; i++)
222252
{
223253
var file = files[i];
224-
allExtracted &= ExtractFile(outputDirectory, blockStream, file, includeDebug);
254+
255+
allExtracted &= ExtractFiles(outputDirectory, blockStream, file, includeDebug);
225256
}
226257

227258
return allExtracted;
228259
}
260+
261+
// TODO: this will apparently improve memory usage/performance, but it's not clear if this implementation is enough for that to happen
262+
/// <summary>
263+
/// Extract the contents of a single file, intended to be used with all files in a straight shot
264+
/// </summary>
265+
/// <param name="outputDirectory">Path to the output directory</param>
266+
/// <param name="blockStream">Stream representing the uncompressed block data</param>
267+
/// <param name="file">File information</param>
268+
/// <param name="includeDebug">True to include debug data, false otherwise</param>
269+
/// <returns>True if the file extracted, false otherwise</returns>
270+
private static bool ExtractFiles(string outputDirectory, Stream blockStream, CFFILE file, bool includeDebug)
271+
{
272+
try
273+
{
274+
byte[] fileData = blockStream.ReadBytes((int)file.FileSize);
275+
276+
// Ensure directory separators are consistent
277+
string filename = file.Name;
278+
if (Path.DirectorySeparatorChar == '\\')
279+
filename = filename.Replace('/', '\\');
280+
else if (Path.DirectorySeparatorChar == '/')
281+
filename = filename.Replace('\\', '/');
282+
283+
// Ensure the full output directory exists
284+
filename = Path.Combine(outputDirectory, filename);
285+
var directoryName = Path.GetDirectoryName(filename);
286+
if (directoryName != null && !Directory.Exists(directoryName))
287+
Directory.CreateDirectory(directoryName);
288+
289+
// Open the output file for writing
290+
using var fs = File.Open(filename, FileMode.Create, FileAccess.Write, FileShare.None);
291+
fs.Write(fileData, 0, fileData.Length);
292+
fs.Flush();
293+
}
294+
catch (Exception ex)
295+
{
296+
if (includeDebug) Console.Error.WriteLine(ex);
297+
return false;
298+
}
299+
300+
return true;
301+
}
229302

230303
/// <summary>
231304
/// Extract the contents of a single file

SabreTools.Serialization/Wrappers/MicrosoftCabinet.cs

Lines changed: 104 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
using System.IO;
33
using SabreTools.Data.Models.MicrosoftCabinet;
44
using SabreTools.IO.Compression.MSZIP;
5+
using SabreTools.IO.Extensions;
56

67
namespace SabreTools.Serialization.Wrappers
78
{
@@ -315,6 +316,8 @@ private static CompressionType GetCompressionType(CFFOLDER folder)
315316
if (folder?.DataBlocks == null || folder.DataBlocks.Length == 0)
316317
return null;
317318

319+
GetData(folder);
320+
318321
// Get all files for the folder
319322
var files = GetFiles(folderIndex);
320323
if (files.Length == 0)
@@ -335,7 +338,7 @@ private static CompressionType GetCompressionType(CFFOLDER folder)
335338
// Get all blocks from Prev
336339
if (Prev?.Header != null && Prev.Folders != null)
337340
{
338-
int prevFolderIndex = Prev.FolderCount;
341+
int prevFolderIndex = Prev.FolderCount - 1;
339342
var prevFolder = Prev.Folders[prevFolderIndex - 1];
340343
prevBlocks = Prev.GetDataBlocks(filename, prevFolder, prevFolderIndex, skipNext: true) ?? [];
341344
}
@@ -360,6 +363,106 @@ private static CompressionType GetCompressionType(CFFOLDER folder)
360363
// Return all found blocks in order
361364
return [.. prevBlocks, .. folder.DataBlocks, .. nextBlocks];
362365
}
366+
367+
/// <summary>
368+
/// Loads in all the datablocks for the current folder.
369+
/// </summary>
370+
/// <param name="folder">The folder to have the datablocks loaded for</param>
371+
public void GetData(CFFOLDER folder)
372+
{
373+
if (folder.CabStartOffset <= 0)
374+
return;
375+
376+
uint offset = folder.CabStartOffset;
377+
for (int i = 0; i < folder.DataCount; i++)
378+
{
379+
offset += 8;
380+
381+
if (Header.DataReservedSize > 0)
382+
{
383+
folder.DataBlocks[i].ReservedData = ReadRangeFromSource(offset, Header.DataReservedSize);
384+
offset += Header.DataReservedSize;
385+
}
386+
387+
if (folder.DataBlocks[i].CompressedSize > 0)
388+
{
389+
folder.DataBlocks[i].CompressedData = ReadRangeFromSource(offset, folder.DataBlocks[i].CompressedSize);
390+
offset += folder.DataBlocks[i].CompressedSize;
391+
}
392+
}
393+
394+
}
395+
396+
/// <summary>
397+
/// Get all files for the current folder, plus connected spanned folders.
398+
/// </summary>
399+
/// <param name="folderIndex">Index of the folder in the cabinet</param>
400+
/// <param name="ignorePrev">True to ignore previous links, false otherwise</param>
401+
/// <returns>Array of all files for the folder</returns>
402+
private CFFILE[] GetSpannedFiles(string? filename, int folderIndex, bool ignorePrev = false, bool skipPrev = false, bool skipNext = false)
403+
{
404+
// Ignore invalid archives
405+
if (Files.IsNullOrEmpty())
406+
return [];
407+
408+
// Get all files with a name and matching index
409+
var files = Array.FindAll(Files, f =>
410+
{
411+
if (string.IsNullOrEmpty(f.Name))
412+
return false;
413+
414+
// Ignore links to previous cabinets, if required
415+
if (ignorePrev)
416+
{
417+
if (f.FolderIndex == FolderIndex.CONTINUED_FROM_PREV)
418+
return false;
419+
else if (f.FolderIndex == FolderIndex.CONTINUED_PREV_AND_NEXT)
420+
return false;
421+
}
422+
423+
int fileFolder = GetFolderIndex(f);
424+
return fileFolder == folderIndex;
425+
});
426+
427+
// Check if the folder spans in either direction
428+
bool spanPrev = Array.Exists(files, f => f.FolderIndex == FolderIndex.CONTINUED_FROM_PREV || f.FolderIndex == FolderIndex.CONTINUED_PREV_AND_NEXT);
429+
bool spanNext = Array.Exists(files, f => f.FolderIndex == FolderIndex.CONTINUED_TO_NEXT || f.FolderIndex == FolderIndex.CONTINUED_PREV_AND_NEXT);
430+
431+
// If the folder spans backward and Prev is not being skipped
432+
CFFILE[] prevFiles = [];
433+
if (!skipPrev && spanPrev)
434+
{
435+
// Try to get Prev if it doesn't exist
436+
if (Prev?.Header == null)
437+
Prev = OpenPrevious(filename);
438+
439+
// Get all files from Prev
440+
if (Prev?.Header != null && Prev.Folders != null)
441+
{
442+
int prevFolderIndex = Prev.FolderCount - 1;
443+
prevFiles = Prev.GetSpannedFiles(filename, prevFolderIndex, skipNext: true) ?? [];
444+
}
445+
}
446+
447+
// If the folder spans forward and Next is not being skipped
448+
CFFILE[] nextFiles = [];
449+
if (!skipNext && spanNext)
450+
{
451+
// Try to get Next if it doesn't exist
452+
if (Next?.Header == null)
453+
Next = OpenNext(filename);
454+
455+
// Get all files from Prev
456+
if (Next?.Header != null && Next.Folders != null)
457+
{
458+
var nextFolder = Next.Folders[0];
459+
nextFiles = Next.GetSpannedFiles(filename, 0, skipPrev: true) ?? [];
460+
}
461+
}
462+
463+
// Return all found files in order
464+
return [.. prevFiles, .. files, .. nextFiles];
465+
}
363466

364467
/// <summary>
365468
/// Get all files for the current folder index

0 commit comments

Comments
 (0)