Skip to content

Commit e84964a

Browse files
committed
Add MS-CAB extract and decompression
1 parent 0394ea5 commit e84964a

File tree

1 file changed

+171
-55
lines changed

1 file changed

+171
-55
lines changed

SabreTools.Serialization/Wrappers/MicrosoftCabinet.cs

Lines changed: 171 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
using System;
22
using System.IO;
3+
using SabreTools.IO.Extensions;
34
using SabreTools.Models.MicrosoftCabinet;
45

56
namespace SabreTools.Serialization.Wrappers
@@ -111,80 +112,99 @@ public MicrosoftCabinet(Cabinet? model, Stream? data)
111112
#region Cabinet Set
112113

113114
/// <summary>
114-
/// Open a cabinet set for reading, if possible
115+
/// Extract a cabinet set to an output directory, if possible
115116
/// </summary>
116117
/// <param name="filename">Filename for one cabinet in the set</param>
117-
/// <returns>Wrapper representing the set, null on error</returns>
118-
public static MicrosoftCabinet? OpenSet(string? filename)
118+
/// <param name="outDir">Path to the output directory</param>
119+
/// <param name="includeDebug">True to include debug data, false otherwise</param>
120+
/// <returns>Indicates if all files were able to be extracted</returns>
121+
public bool ExtractAll(string filename, string outDir, bool includeDebug)
119122
{
120-
// If the file is invalid
121-
if (string.IsNullOrEmpty(filename))
122-
return null;
123-
else if (!File.Exists(filename!))
124-
return null;
123+
// TODO: Seek to the first archive and make sure all parts are extracted
125124

126-
// Get the full file path and directory
127-
filename = Path.GetFullPath(filename);
128-
129-
// Read in the current file and try to parse
130-
var stream = File.Open(filename, FileMode.Open, FileAccess.Read, FileShare.ReadWrite);
131-
var current = Create(stream);
132-
if (current?.Header == null)
133-
return null;
125+
// If the archive is invalid
126+
if (Folders == null || Folders.Length == 0)
127+
return false;
134128

135-
// Seek to the first part of the cabinet set
136-
while (current.Header.CabinetPrev != null)
129+
try
137130
{
138-
// Attempt to open the previous cabinet
139-
var prev = current.OpenPrevious(filename);
140-
if (prev?.Header == null)
141-
break;
131+
// Loop through the folders
132+
for (int f = 0; f < Folders.Length; f++)
133+
{
134+
// Get the current folder for processing
135+
var folder = Folders[f];
136+
137+
// Decompress the blocks, if possible
138+
using var blockStream = DecompressBlocks(filename, folder, f);
139+
if (blockStream == null || blockStream.Length == 0)
140+
continue;
141+
142+
// Ensure files
143+
var files = GetFiles(f);
144+
if (files.Length == 0)
145+
continue;
146+
147+
// Loop through the files
148+
for (int i = 0; i < files.Length; i++)
149+
{
150+
try
151+
{
152+
var compressedFile = files[i];
153+
blockStream.Seek(compressedFile.FolderStartOffset, SeekOrigin.Begin);
154+
byte[] fileData = blockStream.ReadBytes((int)compressedFile.FileSize);
155+
156+
// Ensure directory separators are consistent
157+
string fileName = compressedFile.Name!;
158+
if (Path.DirectorySeparatorChar == '\\')
159+
fileName = fileName.Replace('/', '\\');
160+
else if (Path.DirectorySeparatorChar == '/')
161+
fileName = fileName.Replace('\\', '/');
162+
163+
string tempFile = Path.Combine(outDir, fileName);
164+
var directoryName = Path.GetDirectoryName(tempFile);
165+
if (directoryName != null && !Directory.Exists(directoryName))
166+
Directory.CreateDirectory(directoryName);
167+
168+
using var of = File.OpenWrite(tempFile);
169+
of.Write(fileData, 0, fileData.Length);
170+
of.Flush();
171+
}
172+
catch (Exception ex)
173+
{
174+
if (includeDebug) Console.WriteLine(ex);
175+
}
176+
}
177+
}
142178

143-
// Assign previous as new current
144-
current = prev;
179+
return true;
145180
}
146-
147-
// Cache the current start of the cabinet set
148-
var start = current;
149-
150-
// Read in the cabinet parts sequentially
151-
while (current.Header.CabinetNext != null)
181+
catch (Exception ex)
152182
{
153-
// Open the next cabinet and try to parse
154-
var next = current.OpenNext(filename);
155-
if (next?.Header == null)
156-
break;
157-
158-
// Add the next and previous links, resetting current
159-
next.Prev = current;
160-
current.Next = next;
161-
current = next;
183+
if (includeDebug) Console.WriteLine(ex);
184+
return false;
162185
}
163-
164-
// Return the start of the set
165-
return start;
166186
}
167187

168188
/// <summary>
169189
/// Open the next archive, if possible
170190
/// </summary>
171191
/// <param name="filename">Filename for one cabinet in the set</param>
172-
public MicrosoftCabinet? OpenNext(string file)
192+
private MicrosoftCabinet? OpenNext(string filename)
173193
{
174194
// Ignore invalid archives
175195
if (Header == null)
176196
return null;
177197

178198
// Normalize the filename
179-
file = Path.GetFullPath(file);
199+
filename = Path.GetFullPath(filename);
180200

181201
// Get if the cabinet has a next part
182202
string? next = Header.CabinetNext;
183203
if (string.IsNullOrEmpty(next))
184204
return null;
185205

186206
// Get the full next path
187-
string? folder = Path.GetDirectoryName(file);
207+
string? folder = Path.GetDirectoryName(filename);
188208
if (folder != null)
189209
next = Path.Combine(folder, next);
190210

@@ -197,22 +217,22 @@ public MicrosoftCabinet(Cabinet? model, Stream? data)
197217
/// Open the previous archive, if possible
198218
/// </summary>
199219
/// <param name="filename">Filename for one cabinet in the set</param>
200-
public MicrosoftCabinet? OpenPrevious(string file)
220+
private MicrosoftCabinet? OpenPrevious(string filename)
201221
{
202222
// Ignore invalid archives
203223
if (Header == null)
204224
return null;
205225

206226
// Normalize the filename
207-
file = Path.GetFullPath(file);
227+
filename = Path.GetFullPath(filename);
208228

209229
// Get if the cabinet has a previous part
210230
string? prev = Header.CabinetPrev;
211231
if (string.IsNullOrEmpty(prev))
212232
return null;
213233

214234
// Get the full next path
215-
string? folder = Path.GetDirectoryName(file);
235+
string? folder = Path.GetDirectoryName(filename);
216236
if (folder != null)
217237
prev = Path.Combine(folder, prev);
218238

@@ -302,7 +322,9 @@ private static uint S(byte[] a, int b, int x)
302322
/// <summary>
303323
/// Get the corrected folder index
304324
/// </summary>
305-
public int GetFolderIndex(CFFILE file)
325+
/// <param name="file">File to get the corrected index for</param>
326+
/// <returns>Corrected folder index for the current archive</returns>
327+
private int GetFolderIndex(CFFILE file)
306328
{
307329
return file.FolderIndex switch
308330
{
@@ -317,10 +339,102 @@ public int GetFolderIndex(CFFILE file)
317339

318340
#region Folders
319341

342+
/// <summary>
343+
/// Decompress all blocks for a folder
344+
/// </summary>
345+
/// <param name="filename">Filename for one cabinet in the set</param>
346+
/// <param name="folder">Folder containing the blocks to decompress</param>
347+
/// <param name="folderIndex">Index of the folder in the cabinet</param>
348+
/// <returns>Stream representing the decompressed data on success, null otherwise</returns>
349+
public Stream? DecompressBlocks(string filename, CFFOLDER? folder, int folderIndex)
350+
{
351+
// Ensure data blocks
352+
var dataBlocks = GetDataBlocks(filename, folder, folderIndex);
353+
if (dataBlocks == null || dataBlocks.Length == 0)
354+
return null;
355+
356+
// Setup decompressors
357+
var mszip = IO.Compression.MSZIP.Decompressor.Create();
358+
//uint quantumWindowBits = (uint)(((ushort)folder.CompressionType >> 8) & 0x1f);
359+
360+
// Loop through the data blocks
361+
var ms = new MemoryStream();
362+
for (int i = 0; i < dataBlocks.Length; i++)
363+
{
364+
var db = dataBlocks[i];
365+
if (db?.CompressedData == null)
366+
continue;
367+
368+
// Get the compression type
369+
var compressionType = GetCompressionType(folder!);
370+
switch (compressionType)
371+
{
372+
// Uncompressed data
373+
case CompressionType.TYPE_NONE:
374+
ms.Write(db.CompressedData, 0, db.CompressedData.Length);
375+
ms.Flush();
376+
break;
377+
378+
// MS-ZIP
379+
case CompressionType.TYPE_MSZIP:
380+
long position = ms.Position;
381+
mszip.CopyTo(db.CompressedData, ms);
382+
long decompressedSize = ms.Position - position;
383+
384+
// Pad to the correct size but throw a warning about this
385+
if (decompressedSize < db.UncompressedSize)
386+
{
387+
Console.Error.WriteLine($"Data block {i} in folder {folderIndex} had mismatching sizes. Expected: {db.UncompressedSize}, Got: {decompressedSize}");
388+
byte[] padding = new byte[db.UncompressedSize - decompressedSize];
389+
ms.Write(padding, 0, padding.Length);
390+
}
391+
392+
break;
393+
394+
// Quantum
395+
case CompressionType.TYPE_QUANTUM:
396+
// TODO: Unsupported
397+
break;
398+
399+
// LZX
400+
case CompressionType.TYPE_LZX:
401+
// TODO: Unsupported
402+
break;
403+
}
404+
}
405+
406+
return ms;
407+
}
408+
409+
/// <summary>
410+
/// Get the unmasked compression type for a folder
411+
/// </summary>
412+
/// <param name="folder">Folder to get the compression type for</param>
413+
/// <returns>Compression type on success, <see cref="ushort.MaxValue"/> on error</returns>
414+
private static CompressionType GetCompressionType(CFFOLDER folder)
415+
{
416+
if ((folder!.CompressionType & CompressionType.MASK_TYPE) == CompressionType.TYPE_NONE)
417+
return CompressionType.TYPE_NONE;
418+
else if ((folder.CompressionType & CompressionType.MASK_TYPE) == CompressionType.TYPE_MSZIP)
419+
return CompressionType.TYPE_MSZIP;
420+
else if ((folder.CompressionType & CompressionType.MASK_TYPE) == CompressionType.TYPE_QUANTUM)
421+
return CompressionType.TYPE_QUANTUM;
422+
else if ((folder.CompressionType & CompressionType.MASK_TYPE) == CompressionType.TYPE_LZX)
423+
return CompressionType.TYPE_LZX;
424+
else
425+
return (CompressionType)ushort.MaxValue;
426+
}
427+
320428
/// <summary>
321429
/// Get the set of data blocks for a folder
322430
/// </summary>
323-
public CFDATA[]? GetDataBlocks(string file, CFFOLDER? folder, int folderIndex, bool skipPrev = false, bool skipNext = false)
431+
/// <param name="filename">Filename for one cabinet in the set</param>
432+
/// <param name="folder">Folder containing the blocks to decompress</param>
433+
/// <param name="folderIndex">Index of the folder in the cabinet</param>
434+
/// <param name="skipPrev">Indicates if previous cabinets should be ignored</param>
435+
/// <param name="skipNext">Indicates if next cabinets should be ignored</param>
436+
/// <returns>Array of data blocks on success, null otherwise</returns>
437+
private CFDATA[]? GetDataBlocks(string filename, CFFOLDER? folder, int folderIndex, bool skipPrev = false, bool skipNext = false)
324438
{
325439
// Skip invalid folders
326440
if (folder?.DataBlocks == null || folder.DataBlocks.Length == 0)
@@ -335,24 +449,24 @@ public int GetFolderIndex(CFFILE file)
335449
CFDATA[] prevBlocks = [];
336450
if (!skipPrev && Array.Exists(files, f => f.FolderIndex == FolderIndex.CONTINUED_FROM_PREV || f.FolderIndex == FolderIndex.CONTINUED_PREV_AND_NEXT))
337451
{
338-
var prev = OpenPrevious(file);
452+
var prev = OpenPrevious(filename);
339453
if (prev?.Model?.Header != null && prev.Model.Folders != null)
340454
{
341455
int prevFolderIndex = prev.Model.Header.FolderCount;
342456
var prevFolder = prev.Model.Folders[prevFolderIndex - 1];
343-
prevBlocks = prev.GetDataBlocks(file, prevFolder, prevFolderIndex, skipNext: true) ?? [];
457+
prevBlocks = prev.GetDataBlocks(filename, prevFolder, prevFolderIndex, skipNext: true) ?? [];
344458
}
345459
}
346460

347461
// Check if the folder spans forward
348462
CFDATA[] nextBlocks = [];
349463
if (!skipNext && Array.Exists(files, f => f.FolderIndex == FolderIndex.CONTINUED_TO_NEXT || f.FolderIndex == FolderIndex.CONTINUED_PREV_AND_NEXT))
350464
{
351-
var next = OpenNext(file);
465+
var next = OpenNext(filename);
352466
if (next?.Model?.Header != null && next.Model.Folders != null)
353467
{
354468
var nextFolder = next.Model.Folders[0];
355-
nextBlocks = next.GetDataBlocks(file, nextFolder, 0, skipPrev: true) ?? [];
469+
nextBlocks = next.GetDataBlocks(filename, nextFolder, 0, skipPrev: true) ?? [];
356470
}
357471
}
358472

@@ -363,7 +477,9 @@ public int GetFolderIndex(CFFILE file)
363477
/// <summary>
364478
/// Get all files for the current folder index
365479
/// </summary>
366-
public CFFILE[] GetFiles(int folderIndex)
480+
/// <param name="folderIndex">Index of the folder in the cabinet</param>
481+
/// <returns>Array of all files for the folder</returns>
482+
private CFFILE[] GetFiles(int folderIndex)
367483
{
368484
// Ignore invalid archives
369485
if (Files == null)

0 commit comments

Comments
 (0)