|
9 | 9 | #include <queue> |
10 | 10 | #include <condition_variable> |
11 | 11 | #include <future> |
| 12 | +#include <iomanip> |
| 13 | +#include <chrono> |
12 | 14 |
|
13 | 15 | #include "../include/PyInstArchive.h" |
14 | 16 | #include "../include/zlib.h" |
15 | 17 |
|
| 18 | + |
| 19 | + |
16 | 20 | /** |
17 | 21 | * @brief The magic string used to identify PyInstaller archives. |
18 | 22 | * |
@@ -304,89 +308,123 @@ void PyInstArchive::displayInfo() { |
304 | 308 | * @param outputDir The directory where the extracted files will be saved. |
305 | 309 | */ |
306 | 310 | void PyInstArchive::timeExtractionProcess(const std::string& outputDir) { |
307 | | - auto start = std::chrono::steady_clock::now(); |
| 311 | + auto start = std::chrono::high_resolution_clock::now(); |
308 | 312 |
|
309 | | - std::vector<std::future<void>> futures; |
310 | | - for (const auto& tocEntry : tocList) { |
311 | | - futures.emplace_back(std::async(std::launch::async, &PyInstArchive::decompressAndExtractFile, this, std::ref(tocEntry), std::ref(outputDir))); |
312 | | - } |
| 313 | + MultiThreaedFileExtract(tocList, outputDir); |
313 | 314 |
|
314 | | - for (auto& future : futures) { |
315 | | - future.get(); |
316 | | - } |
| 315 | + auto end = std::chrono::high_resolution_clock::now(); |
| 316 | + std::chrono::duration<double> elapsed = end - start; |
317 | 317 |
|
318 | | - auto end = std::chrono::steady_clock::now(); |
319 | | - std::chrono::duration<double> elapsedSeconds = end - start; |
320 | | - int minutes = static_cast<int>(elapsedSeconds.count()) / 60; |
321 | | - double seconds = elapsedSeconds.count() - (minutes * 60); |
322 | | - std::cout << "Time: " << std::setfill('0') << minutes << ":" |
323 | | - << std::fixed << std::setprecision(2) << std::setw(5) << seconds << std::endl; |
| 318 | + std::cout << "[*] Extraction completed in " << elapsed.count() << " seconds.\n"; |
324 | 319 | } |
325 | 320 |
|
326 | 321 | /** |
327 | | - * @brief Decompresses and extracts a file from the PyInstaller archive to the specified output directory. |
| 322 | + * @brief Decompresses and extracts all files from the PyInstaller archive using multithreading. |
| 323 | + * |
| 324 | + * The `MultiThreadedFileExtract` method initializes a thread pool and enqueues tasks to decompress |
| 325 | + * and extract each file specified in the Table of Contents (TOC) entries. It leverages multithreading |
| 326 | + * to improve extraction performance by processing multiple files concurrently. |
328 | 327 | * |
329 | | - * This function reads the compressed data of the file from the archive, decompresses it if necessary, |
330 | | - * and writes the resulting data to a file in the specified output directory. The file extraction process |
331 | | - * is thread-safe, utilizing mutexes to ensure proper synchronization of file reading and console output. |
| 328 | + * @param tocEntries A vector of TOC entries representing the files to extract from the archive. |
| 329 | + * @param outputDir The directory where the extracted files will be saved. |
332 | 330 | * |
333 | | - * @param tocEntry The Table of Contents (TOC) entry that contains metadata about the file to be extracted. |
| 331 | + * @note The function creates a thread pool with a number of threads equal to the hardware concurrency. |
| 332 | + * If the hardware concurrency cannot be determined, it defaults to 4 threads. |
| 333 | + * @note Each TOC entry is processed by a separate task that calls `decompressAndExtractFile`. |
| 334 | + * @note The mutexes `mtx` and `printMtx` are used within the tasks to ensure thread-safe operations. |
| 335 | + * @note The ThreadPool destructor ensures all tasks are completed before the program continues. |
| 336 | + */ |
| 337 | +void PyInstArchive::MultiThreaedFileExtract(const std::vector<CTOCEntry>& tocEntries, const std::string& outputDir) { |
| 338 | + size_t numThreads = std::thread::hardware_concurrency(); |
| 339 | + if (numThreads == 0) numThreads = 4; // Fallback if hardware_concurrency can't determine |
| 340 | + |
| 341 | + ThreadPool pool(numThreads); |
| 342 | + |
| 343 | + for (const auto& tocEntry : tocEntries) { |
| 344 | + pool.enqueue([this, &tocEntry, &outputDir] { |
| 345 | + this->decompressAndExtractFile(tocEntry, outputDir, mtx, printMtx); |
| 346 | + }); |
| 347 | + } |
| 348 | +} |
| 349 | + |
| 350 | +/** |
| 351 | + * @brief Decompresses and extracts a single file from the PyInstaller archive. |
| 352 | + * |
| 353 | + * This method handles the decompression and extraction of a single file specified by the |
| 354 | + * Table of Contents (TOC) entry. It reads the compressed data from the archive file, |
| 355 | + * decompresses it if necessary, and writes the output to the specified directory, |
| 356 | + * preserving the file structure. Thread safety is ensured through mutex locks for file |
| 357 | + * access and console output, allowing concurrent execution in a multithreaded environment. |
| 358 | + * |
| 359 | + * @param tocEntry The Table of Contents entry representing the file to extract. |
334 | 360 | * @param outputDir The directory where the extracted file will be saved. |
| 361 | + * @param mtx Mutex to synchronize access to the file stream `fPtr` for reading. |
| 362 | + * @param printMtx Mutex to synchronize console output to prevent message interleaving. |
| 363 | + * |
| 364 | + * @note The function checks if the data is compressed and handles decompression using zlib. |
| 365 | + * @note Any errors during reading, decompression, or writing are logged to the console. |
| 366 | + * @note The function assumes that the output directory exists or can be created. |
| 367 | + * @note This method is designed to be thread-safe and can be called concurrently by multiple threads. |
335 | 368 | */ |
336 | | -void PyInstArchive::decompressAndExtractFile(const CTOCEntry& tocEntry, const std::string& outputDir) { |
| 369 | +void PyInstArchive::decompressAndExtractFile(const CTOCEntry& tocEntry, const std::string& outputDir, std::mutex& mtx, std::mutex& printMtx) { |
337 | 370 | std::vector<char> compressedData; |
| 371 | + |
| 372 | + // Read Compressed Data with File Lock |
338 | 373 | { |
339 | 374 | std::lock_guard<std::mutex> lock(mtx); |
340 | 375 | fPtr.seekg(tocEntry.position, std::ios::beg); |
341 | 376 | compressedData.resize(tocEntry.getCompressedDataSize()); |
342 | 377 | fPtr.read(compressedData.data(), tocEntry.getCompressedDataSize()); |
343 | 378 | } |
344 | 379 |
|
345 | | - // Decompress data |
| 380 | + // Decompress Data |
346 | 381 | std::vector<char> decompressedData; |
347 | 382 | if (tocEntry.isCompressed()) { |
348 | 383 | decompressedData.resize(tocEntry.uncmprsdDataSize); |
349 | 384 |
|
350 | 385 | z_stream strm = {}; |
351 | | - strm.avail_in = tocEntry.getCompressedDataSize(); |
| 386 | + strm.avail_in = static_cast<uInt>(tocEntry.getCompressedDataSize()); |
352 | 387 | strm.next_in = reinterpret_cast<Bytef*>(compressedData.data()); |
353 | | - strm.avail_out = tocEntry.uncmprsdDataSize; |
| 388 | + strm.avail_out = static_cast<uInt>(tocEntry.uncmprsdDataSize); |
354 | 389 | strm.next_out = reinterpret_cast<Bytef*>(decompressedData.data()); |
355 | 390 |
|
356 | 391 | if (inflateInit(&strm) != Z_OK) { |
357 | | - std::cerr << "[!] Error: Could not initialize zlib for decompression" << std::endl; |
| 392 | + std::lock_guard<std::mutex> lock(printMtx); |
| 393 | + std::cerr << "[!] Error: Could not initialize zlib for decompression\n"; |
358 | 394 | return; |
359 | 395 | } |
360 | 396 |
|
361 | 397 | int result = inflate(&strm, Z_FINISH); |
362 | 398 | inflateEnd(&strm); |
363 | 399 |
|
364 | 400 | if (result != Z_STREAM_END) { |
365 | | - std::cerr << "[!] Error: Decompression failed for " << tocEntry.getName() << std::endl; |
| 401 | + std::lock_guard<std::mutex> lock(printMtx); |
| 402 | + std::cerr << "[!] Error: Decompression failed for " << tocEntry.getName() << "\n"; |
366 | 403 | return; |
367 | 404 | } |
368 | 405 | } |
369 | 406 | else { |
370 | | - decompressedData = compressedData; |
| 407 | + decompressedData = std::move(compressedData); |
371 | 408 | } |
372 | 409 |
|
373 | | - // Extract file |
| 410 | + // Extract File |
374 | 411 | std::filesystem::path outputFilePath = std::filesystem::path(outputDir) / tocEntry.getName(); |
375 | 412 | std::filesystem::create_directories(outputFilePath.parent_path()); |
376 | 413 |
|
377 | | - std::ofstream outFile(outputFilePath, std::ios::binary); |
378 | | - if (!outFile.is_open()) { |
379 | | - std::cerr << "[!] Error: Could not open output file " << outputFilePath << std::endl; |
380 | | - return; |
| 414 | + { |
| 415 | + std::ofstream outFile(outputFilePath, std::ios::binary); |
| 416 | + if (!outFile.is_open()) { |
| 417 | + std::lock_guard<std::mutex> lock(printMtx); |
| 418 | + std::cerr << "[!] Error: Could not open output file " << outputFilePath << "\n"; |
| 419 | + return; |
| 420 | + } |
| 421 | + outFile.write(decompressedData.data(), decompressedData.size()); |
381 | 422 | } |
382 | 423 |
|
383 | | - outFile.write(decompressedData.data(), decompressedData.size()); |
384 | | - outFile.close(); |
385 | | - |
386 | | - // Synchronize print statements |
| 424 | + // Log Extraction Success |
387 | 425 | { |
388 | | - std::lock_guard<std::mutex> printLock(printMtx); |
389 | | - std::cout << "[+] Extracted: " << tocEntry.getName() << " (" << decompressedData.size() << " bytes)" << std::endl; |
| 426 | + std::lock_guard<std::mutex> lock(printMtx); |
| 427 | + std::cout << "[+] Extracted: " << tocEntry.getName() << " (" << decompressedData.size() << " bytes)\n"; |
390 | 428 | } |
391 | 429 | } |
392 | 430 |
|
|
0 commit comments