1111#include < future>
1212#include < iomanip>
1313#include < chrono>
14+ #include < Windows.h>
1415
1516#include " ../include/PyInstArchive.h"
1617#include " ../include/zlib.h"
@@ -61,6 +62,22 @@ void PyInstArchive::close() {
6162 }
6263}
6364
65+ /* *
66+ * @brief Retrieves the list of Table of Contents (TOC) entries from the PyInstaller archive.
67+ *
68+ * This method returns a constant reference to the vector containing the TOC entries.
69+ * The TOC entries represent individual files within the PyInstaller archive, including their
70+ * positions, compressed sizes, uncompressed sizes, compression flags, data types, and names.
71+ *
72+ * @return A constant reference to a vector of CTOCEntry objects representing the TOC entries.
73+ *
74+ * @note The vector returned by this method is read-only, ensuring the TOC entries cannot be modified
75+ * directly through the returned reference. To modify the TOC entries, use appropriate member functions.
76+ */
77+ const std::vector<CTOCEntry>& PyInstArchive::getTOCList () const {
78+ return tocList;
79+ }
80+
6481/* *
6582 * @brief Checks if the file is a valid PyInstaller archive.
6683 *
@@ -147,6 +164,59 @@ uint32_t swapBytes(uint32_t value) {
147164 ((value << 24 ) & 0xFF000000 );
148165}
149166
167+ /* *
168+ * @brief Retrieves the number of physical CPU cores on the system.
169+ *
170+ * This function uses the Windows API to obtain information about the system's logical processors
171+ * and their relationship to physical CPU cores. It first determines the required buffer size for
172+ * the processor information, allocates the buffer, and then retrieves the information.
173+ *
174+ * The function iterates through the retrieved data to count the number of physical cores and
175+ * returns this count. If an error occurs at any stage, the function outputs an error message and
176+ * returns a default value of 1.
177+ *
178+ * @return The number of physical CPU cores on the system. If an error occurs, returns 1.
179+ *
180+ * @note This function is platform-specific and intended for use on Windows systems.
181+ * @note The function uses `malloc` for buffer allocation and `free` for deallocation.
182+ */
183+ size_t getPhysicalCoreCount () {
184+ DWORD length = 0 ;
185+ // Initial call to get buffer size
186+ GetLogicalProcessorInformation (nullptr , &length);
187+ if (GetLastError () != ERROR_INSUFFICIENT_BUFFER) {
188+ std::cerr << " [!] Error: Unable to determine buffer size for processor information.\n " ;
189+ return 1 ; // Default to 1 if unable to determine
190+ }
191+
192+ // Allocate buffer for processor information
193+ SYSTEM_LOGICAL_PROCESSOR_INFORMATION* buffer = reinterpret_cast <SYSTEM_LOGICAL_PROCESSOR_INFORMATION*>(malloc (length));
194+ if (buffer == nullptr ) {
195+ std::cerr << " [!] Error: Memory allocation failed.\n " ;
196+ return 1 ;
197+ }
198+
199+ // Retrieve processor information
200+ if (!GetLogicalProcessorInformation (buffer, &length)) {
201+ std::cerr << " [!] Error: Unable to get logical processor information.\n " ;
202+ free (buffer);
203+ return 1 ;
204+ }
205+
206+ DWORD processorCoreCount = 0 ;
207+ DWORD count = length / sizeof (SYSTEM_LOGICAL_PROCESSOR_INFORMATION);
208+
209+ // Count the number of physical cores
210+ for (DWORD i = 0 ; i < count; ++i) {
211+ if (buffer[i].Relationship == RelationProcessorCore) {
212+ processorCoreCount++;
213+ }
214+ }
215+
216+ free (buffer);
217+ return static_cast <size_t >(processorCoreCount);
218+ }
219+
150220/* *
151221 * @brief Extracts and parses CArchive information from the PyInstaller file.
152222 *
@@ -308,9 +378,13 @@ void PyInstArchive::displayInfo() {
308378 * @param outputDir The directory where the extracted files will be saved.
309379 */
310380void PyInstArchive::timeExtractionProcess (const std::string& outputDir) {
381+ // Determine the number of physical cores to use as threads
382+ size_t numThreads = getPhysicalCoreCount ();
383+
311384 auto start = std::chrono::high_resolution_clock::now ();
312385
313- MultiThreaedFileExtract (tocList, outputDir);
386+ // Call MultiThreadedFileExtract with the required arguments
387+ MultiThreadedFileExtract (tocList, outputDir, numThreads);
314388
315389 auto end = std::chrono::high_resolution_clock::now ();
316390 std::chrono::duration<double > elapsed = end - start;
@@ -334,12 +408,30 @@ void PyInstArchive::timeExtractionProcess(const std::string& outputDir) {
334408 * @note The mutexes `mtx` and `printMtx` are used within the tasks to ensure thread-safe operations.
335409 * @note The ThreadPool destructor ensures all tasks are completed before the program continues.
336410 */
337- void PyInstArchive::MultiThreaedFileExtract (const std::vector<CTOCEntry>& tocEntries, const std::string& outputDir) {
338- size_t numThreads = std::thread::hardware_concurrency ();
339- if (numThreads == 0 ) numThreads = 4 ; // Fallback if hardware_concurrency can't determine
411+ void PyInstArchive::MultiThreadedFileExtract (const std::vector<CTOCEntry>& tocEntries, const std::string& outputDir, size_t numThreads) {
412+ size_t maxCores = getPhysicalCoreCount (); // Function to get number of physical cores
413+
414+ // Validate user-specified number of threads
415+ if (numThreads == 0 ) {
416+ numThreads = maxCores;
417+ std::cout << " [*] Using all available physical cores: " << numThreads << " \n " ;
418+ }
419+ else {
420+ if (numThreads > maxCores) {
421+ std::cout << " [!] Specified number of cores (" << numThreads << " ) exceeds available physical cores (" << maxCores << " ). Using maximum available cores.\n " ;
422+ numThreads = maxCores;
423+ }
424+ else {
425+ std::cout << " [*] Using user-specified number of cores: " << numThreads << " \n " ;
426+ }
427+ }
428+
429+ if (numThreads == 0 ) numThreads = 1 ; // Ensure at least one thread
340430
431+ // Initialize ThreadPool with the specified number of threads
341432 ThreadPool pool (numThreads);
342433
434+ // Enqueue tasks
343435 for (const auto & tocEntry : tocEntries) {
344436 pool.enqueue ([this , &tocEntry, &outputDir] {
345437 this ->decompressAndExtractFile (tocEntry, outputDir, mtx, printMtx);
@@ -429,29 +521,81 @@ void PyInstArchive::decompressAndExtractFile(const CTOCEntry& tocEntry, const st
429521}
430522
431523/* *
432- * @brief Parses command-line arguments for interacting with a PyInstaller archive.
524+ * @brief Parses command-line arguments and initiates the archive processing .
433525 *
434- * This method processes the command-line arguments, checks if the required parameters
435- * are provided, and then opens the specified PyInstaller archive. It can either display
436- * information about the archive or extract its files to the specified output directory.
526+ * This function handles the parsing of command-line arguments to determine the appropriate
527+ * operation to perform on the PyInstaller archive. It supports specifying the number of cores
528+ * to use for extraction, the command to execute (either to display information or to extract files),
529+ * the path to the archive, and the optional output directory.
530+ *
531+ * Supported arguments:
532+ * - `-cores N`: Specifies the number of cores to use for the extraction process. If not provided or set to 0, all available physical cores are used.
533+ * - `-i`: Command to display information about the archive (filenames, sizes).
534+ * - `-u`: Command to extract files from the archive.
535+ * - `<archive_path>`: The path to the PyInstaller archive file.
536+ * - `[output_dir]`: Optional output directory where the extracted files will be saved. Defaults to "unpacked".
537+ *
538+ * Example usage:
539+ * - `unpack.exe -cores 4 -u archive_file.exe output_dir`
540+ * - `unpack.exe -i archive_file.exe`
437541 *
438542 * @param argc The number of command-line arguments.
439543 * @param argv The array of command-line arguments.
440- *
441- * @note The command must be either "-i" to display archive information or "-u" to extract files.
442- * The archive path is required, and an optional output directory can be specified.
443- * @note If the output directory does not exist, it will be created automatically.
444- * @note Errors are logged if any arguments are invalid or if the archive cannot be processed.
445544 */
446545void parseArgs (int argc, char * argv[]) {
546+ // Default values
547+ int numCores = 0 ; // 0 indicates 'use all available physical cores'
548+ std::string command;
549+ std::string archivePath;
550+ std::string outputDir = " unpacked" ; // Default output directory
551+ int argIndex = 1 ;
552+
553+ // Check if there are enough arguments
447554 if (argc < 3 ) {
448- std::cerr << " [!] Usage: " << argv[0 ] << " [-i | -u] <archive_path> [output_dir]" << std::endl;
555+ std::cerr << " [!] Usage: " << argv[0 ] << " [-cores N] [- i | -u] <archive_path> [output_dir]" << std::endl;
449556 exit (1 );
450557 }
451558
452- std::string command = argv[1 ]; // Command (-i or -u)
453- std::string archivePath = argv[2 ]; // Archive file path
454- std::string outputDir = (argc > 3 ) ? argv[3 ] : " unpacked" ; // Output directory (default to "output")
559+ // Parse arguments
560+ while (argIndex < argc) {
561+ std::string arg = argv[argIndex];
562+
563+ if (arg == " -cores" ) {
564+ // Handle the -cores argument
565+ argIndex++;
566+ if (argIndex >= argc) {
567+ std::cerr << " [!] Error: Expected number after -cores" << std::endl;
568+ exit (1 );
569+ }
570+ numCores = atoi (argv[argIndex]);
571+ if (numCores <= 0 ) {
572+ std::cerr << " [!] Invalid number of cores specified. Using all available physical cores." << std::endl;
573+ numCores = 0 ;
574+ }
575+ argIndex++;
576+ }
577+ else if (arg == " -i" || arg == " -u" ) {
578+ // Handle the command (-i or -u)
579+ command = arg;
580+ argIndex++;
581+ }
582+ else if (archivePath.empty ()) {
583+ // First argument that's not an option is the archive path
584+ archivePath = arg;
585+ argIndex++;
586+ }
587+ else {
588+ // Optional output directory
589+ outputDir = arg;
590+ argIndex++;
591+ }
592+ }
593+
594+ // Validate required arguments
595+ if (command.empty () || archivePath.empty ()) {
596+ std::cerr << " [!] Usage: " << argv[0 ] << " [-cores N] [-i | -u] <archive_path> [output_dir]" << std::endl;
597+ exit (1 );
598+ }
455599
456600 // Check if the output directory exists, create it if it doesn't
457601 if (!std::filesystem::exists (outputDir)) {
@@ -479,7 +623,8 @@ void parseArgs(int argc, char* argv[]) {
479623 archive.displayInfo (); // Display information about the archive (filenames, sizes)
480624 }
481625 else if (command == " -u" ) {
482- archive.timeExtractionProcess (outputDir); // Extract files to the specified directory
626+ archive.parseTOC (); // Parse the Table of Contents before extraction
627+ archive.MultiThreadedFileExtract (archive.getTOCList (), outputDir, static_cast <size_t >(numCores));
483628 }
484629 else {
485630 std::cerr << " [!] Unknown command: " << command << std::endl;
0 commit comments