@@ -80,21 +80,30 @@ bool PyInstArchive::checkFile() {
8080 return false ;
8181 }
8282
83- while (true ) {
83+ std::vector<char > buffer (searchChunkSize + MAGIC.size () - 1 );
84+
85+ while (endPos >= MAGIC.size ()) {
8486 uint64_t startPos = endPos >= searchChunkSize ? endPos - searchChunkSize : 0 ;
8587 size_t chunkSize = endPos - startPos;
86- if (chunkSize < MAGIC.size ()) {
87- break ;
88- }
88+
8989 fPtr .seekg (startPos, std::ios::beg);
90- std::vector<char > data (chunkSize);
91- fPtr .read (data.data (), chunkSize);
90+ fPtr .read (buffer.data (), chunkSize);
91+
92+ for (size_t i = chunkSize; i < buffer.size (); ++i) {
93+ buffer[i] = buffer[i - chunkSize];
94+ }
95+
96+ for (size_t i = chunkSize; i-- > 0 ;) {
97+ if (std::memcmp (buffer.data () + i, MAGIC.c_str (), MAGIC.size ()) == 0 ) {
98+ cookiePos = startPos + i;
99+ break ;
100+ }
101+ }
92102
93- auto offs = std::string (data.data (), chunkSize).rfind (MAGIC);
94- if (offs != std::string::npos) {
95- cookiePos = startPos + offs;
103+ if (cookiePos != -1 ) {
96104 break ;
97105 }
106+
98107 endPos = startPos + MAGIC.size () - 1 ;
99108 if (startPos == 0 ) {
100109 break ;
@@ -107,9 +116,9 @@ bool PyInstArchive::checkFile() {
107116 }
108117
109118 fPtr .seekg (cookiePos + PYINST20_COOKIE_SIZE, std::ios::beg);
110- std::vector<char > buffer (64 );
111- fPtr .read (buffer .data (), 64 );
112- if (std::string (buffer .data (), 64 ).find (" python" ) != std::string::npos) {
119+ std::vector<char > buffer64 (64 );
120+ fPtr .read (buffer64 .data (), 64 );
121+ if (std::string (buffer64 .data (), 64 ).find (" python" ) != std::string::npos) {
113122 std::cout << " [+] Pyinstaller version: 2.1+" << std::endl;
114123 pyinstVer = 21 ;
115124 }
@@ -121,6 +130,7 @@ bool PyInstArchive::checkFile() {
121130 return true ;
122131}
123132
133+
124134/* *
125135 * @brief Swaps the byte order of a 32-bit integer to correct endianness.
126136 *
@@ -150,62 +160,51 @@ bool PyInstArchive::getCArchiveInfo() {
150160 try {
151161 uint32_t lengthofPackage, toc, tocLen, pyver;
152162
153- if (pyinstVer == 20 ) {
154- fPtr .seekg (cookiePos, std::ios::beg);
155- char buffer[PYINST20_COOKIE_SIZE];
156- fPtr .read (buffer, PYINST20_COOKIE_SIZE);
157- std::memcpy (&lengthofPackage, buffer + 8 , 4 );
158- std::memcpy (&toc, buffer + 12 , 4 );
159- std::memcpy (&tocLen, buffer + 16 , 4 );
160- std::memcpy (&pyver, buffer + 20 , 4 );
163+ // Check for version and load relevant data
164+ fPtr .seekg (cookiePos, std::ios::beg);
165+ char buffer[PYINST21_COOKIE_SIZE]; // Use a single buffer to handle both versions if possible
166+ fPtr .read (buffer, (pyinstVer == 20 ) ? PYINST20_COOKIE_SIZE : PYINST21_COOKIE_SIZE);
167+
168+ // Directly read values from the buffer
169+ if (pyinstVer == 20 || pyinstVer == 21 ) {
170+ // Read and immediately swap bytes (combine reading and byte order correction in one step)
171+ lengthofPackage = swapBytes (*reinterpret_cast <uint32_t *>(buffer + 8 ));
172+ toc = swapBytes (*reinterpret_cast <uint32_t *>(buffer + 12 ));
173+ tocLen = swapBytes (*reinterpret_cast <uint32_t *>(buffer + 16 ));
174+ pyver = swapBytes (*reinterpret_cast <uint32_t *>(buffer + 20 ));
161175 }
162- else if (pyinstVer == 21 ) {
163- fPtr .seekg (cookiePos, std::ios::beg);
164- char buffer[PYINST21_COOKIE_SIZE];
165- fPtr .read (buffer, PYINST21_COOKIE_SIZE);
166- std::memcpy (&lengthofPackage, buffer + 8 , 4 );
167- std::memcpy (&toc, buffer + 12 , 4 );
168- std::memcpy (&tocLen, buffer + 16 , 4 );
169- std::memcpy (&pyver, buffer + 20 , 4 );
170- }
171-
172- // Convert values to host byte order (correcting endianness)
173- lengthofPackage = swapBytes (lengthofPackage);
174- toc = swapBytes (toc);
175- tocLen = swapBytes (tocLen);
176- pyver = swapBytes (pyver);
177176
178- if (pyver >= 100 ) {
179- pymaj = pyver / 100 ;
180- pymin = pyver % 100 ;
181- }
182- else {
183- pymaj = pyver / 10 ;
184- pymin = pyver % 10 ;
185- }
177+ // Python version determination
178+ pymaj = pyver / (pyver >= 100 ? 100 : 10 );
179+ pymin = pyver % (pyver >= 100 ? 100 : 10 );
186180
187181 std::cout << " [+] Python version: " << static_cast <int >(pymaj) << " ." << static_cast <int >(pymin) << std::endl;
188182
189- uint64_t tailBytes = fileSize - cookiePos - (pyinstVer == 20 ? PYINST20_COOKIE_SIZE : PYINST21_COOKIE_SIZE);
183+ uint64_t tailBytes = fileSize - cookiePos - (( pyinstVer == 20 ) ? PYINST20_COOKIE_SIZE : PYINST21_COOKIE_SIZE);
190184 overlaySize = static_cast <uint64_t >(lengthofPackage) + tailBytes;
191185 overlayPos = fileSize - overlaySize;
192186 tableOfContentsPos = overlayPos + toc;
193187 tableOfContentsSize = tocLen;
194188
189+ #ifdef _DEBUG
195190 std::cout << " [+] Length of package: " << lengthofPackage << " bytes" << std::endl;
196191 std::cout << " [DEBUG] overlaySize: " << overlaySize << std::endl;
197192 std::cout << " [DEBUG] overlayPos: " << overlayPos << std::endl;
198193 std::cout << " [DEBUG] tableOfContentsPos: " << tableOfContentsPos << std::endl;
199194 std::cout << " [DEBUG] tableOfContentsSize: " << tableOfContentsSize << std::endl;
195+ #endif
200196
201- parseTOC ();
197+ parseTOC (); // Always included, regardless of the mode
202198
203- std::cout << " [INFO] Entry sizes in the CArchive:" << std::endl;
199+ #ifdef _DEBUG
200+ std::cout << " [DEBUG] Entry sizes in the CArchive:" << std::endl;
204201 for (const auto & entry : tocList) {
205- std::cout << " [INFO ] Entry Name: " << entry.getName ()
202+ std::cout << " [DEBUG ] Entry Name: " << entry.getName ()
206203 << " , Compressed Size: " << entry.getCompressedDataSize () << " bytes"
207204 << std::endl;
208205 }
206+ #endif
207+
209208 }
210209 catch (...) {
211210 std::cerr << " [!] Error: The file is not a PyInstaller archive" << std::endl;
@@ -214,6 +213,7 @@ bool PyInstArchive::getCArchiveInfo() {
214213 return true ;
215214}
216215
216+
217217/* *
218218 * @brief Parses the Table of Contents (TOC) from the PyInstaller archive.
219219 *
@@ -222,64 +222,55 @@ bool PyInstArchive::getCArchiveInfo() {
222222 * Each entry is stored in a list for further processing.
223223 */
224224void PyInstArchive::parseTOC () {
225-
226225 // Set the file pointer to the position of the Table of Contents
227226 fPtr .seekg (tableOfContentsPos, std::ios::beg);
228227
229228 tocList.clear (); // Clear any existing TOC entries
230229 uint32_t parsedLen = 0 ; // Initialize parsed length
231230
232- // Continue parsing until the total size of the TOC is reached
231+ // Read the Table of Contents in chunks to reduce file reads
233232 while (parsedLen < tableOfContentsSize) {
234233 uint32_t entrySize;
235- fPtr .read (reinterpret_cast <char *>(&entrySize), sizeof (entrySize)); // Read the entry size
236- entrySize = swapBytes (entrySize); // Convert entry size to host byte order
234+ fPtr .read (reinterpret_cast <char *>(&entrySize), sizeof (entrySize));
235+ if ( fPtr . gcount () < sizeof (entrySize)) break ; // Prevent reading beyond the file
237236
238- // Debugging output for entry size
239- std::cout << " [DEBUG] Entry Size: " << entrySize << " , Parsed Length: " << parsedLen << std::endl;
237+ entrySize = swapBytes (entrySize); // Convert entry size to host byte order
240238
241- // Calculate the length of the name and allocate buffer
242239 uint32_t nameLen = sizeof (uint32_t ) + sizeof (uint32_t ) * 3 + sizeof (uint8_t ) + sizeof (char );
243240 std::vector<char > nameBuffer (entrySize - nameLen); // Create buffer for the name
244241
245- // Variables to hold entry information
242+ // Read the rest of the fields in one go to minimize file reads
246243 uint32_t entryPos, cmprsdDataSize, uncmprsdDataSize;
247244 uint8_t cmprsFlag;
248245 char typeCmprsData;
249246
250- // Read the other fields from the file
251247 fPtr .read (reinterpret_cast <char *>(&entryPos), sizeof (entryPos));
252248 fPtr .read (reinterpret_cast <char *>(&cmprsdDataSize), sizeof (cmprsdDataSize));
253249 fPtr .read (reinterpret_cast <char *>(&uncmprsdDataSize), sizeof (uncmprsdDataSize));
254250 fPtr .read (reinterpret_cast <char *>(&cmprsFlag), sizeof (cmprsFlag));
255251 fPtr .read (reinterpret_cast <char *>(&typeCmprsData), sizeof (typeCmprsData));
256- fPtr .read (nameBuffer.data (), entrySize - nameLen);
257252
258- // Debugging output for each field read
259- std::cout << " [DEBUG] Entry Position: " << swapBytes (entryPos) << std::endl ;
260- std::cout << " [DEBUG] Compressed Data Size: " << swapBytes (cmprsdDataSize) << std::endl ;
261- std::cout << " [DEBUG] Uncompressed Data Size: " << swapBytes (uncmprsdDataSize) << std::endl ;
262- std::cout << " [DEBUG] Compression Flag: " << static_cast < int >(cmprsFlag) << std::endl;
263- std::cout << " [DEBUG] Type of Compressed Data: " << typeCmprsData << std::endl ;
253+ // swap bytes if needed (endian-aware file format)
254+ entryPos = swapBytes (entryPos);
255+ cmprsdDataSize = swapBytes (cmprsdDataSize);
256+ uncmprsdDataSize = swapBytes (uncmprsdDataSize);
257+
258+ fPtr . read (nameBuffer. data (), entrySize - nameLen) ;
264259
265260 // Decode the name from the buffer and remove null characters
266261 std::string name (nameBuffer.data (), nameBuffer.size ());
267262 name.erase (std::remove (name.begin (), name.end (), ' \0 ' ), name.end ());
268263
269- // Debugging output for the name
270- std::cout << " [DEBUG] Name: '" << name << " '" << std::endl;
271-
272264 // Handle invalid names and normalize
273265 if (name.empty () || name[0 ] == ' /' ) {
274266 name = " unnamed_" + std::to_string (parsedLen);
275- std::cout << " [DEBUG] Normalized Name: '" << name << " '" << std::endl; // Debugging normalized name
276267 }
277268
278269 // Add the entry to the TOC list
279270 tocList.emplace_back (
280- overlayPos + swapBytes ( entryPos) ,
281- swapBytes ( cmprsdDataSize) ,
282- swapBytes ( uncmprsdDataSize) ,
271+ overlayPos + entryPos,
272+ cmprsdDataSize,
273+ uncmprsdDataSize,
283274 cmprsFlag,
284275 typeCmprsData,
285276 name
@@ -291,9 +282,9 @@ void PyInstArchive::parseTOC() {
291282
292283 // Output the total number of entries found in the TOC
293284 std::cout << " [+] Found " << tocList.size () << " files in CArchive" << std::endl;
294-
295285}
296286
287+
297288/* *
298289 * @brief Displays the list of files in the PyInstaller archive.
299290 *
@@ -338,7 +329,6 @@ void PyInstArchive::timeExtractionProcess(const std::string& outputDir) {
338329 << std::fixed << std::setprecision (2 ) << std::setw (5 ) << seconds << std::endl;
339330}
340331
341-
342332/* *
343333 * @brief Decompresses and extracts a file from the PyInstaller archive to the specified output directory.
344334 *
@@ -406,27 +396,6 @@ void PyInstArchive::decompressAndExtractFile(const CTOCEntry& tocEntry, const st
406396 }
407397}
408398
409- /* *
410- * @brief Decompresses data using zlib.
411- *
412- * Decompresses `compressedData` into `decompressedData` using zlib.
413- * Ensure `decompressedData` has enough space for the decompressed output.
414- *
415- * @param compressedData Input vector of compressed data.
416- * @param decompressedData Output vector for decompressed data.
417- *
418- * @note Prints an error message if decompression fails.
419- */
420- void PyInstArchive::decompressData (const std::vector<char >& compressedData, std::vector<char >& decompressedData) {
421- uLongf decompressedSize = decompressedData.size ();
422- int result = uncompress (reinterpret_cast <Bytef*>(decompressedData.data ()), &decompressedSize,
423- reinterpret_cast <const Bytef*>(compressedData.data ()), compressedData.size ());
424-
425- if (result != Z_OK) {
426- std::cerr << " [!] Error: Decompression failed" << std::endl;
427- // Optionally, you could also throw an exception or handle the error more specifically
428- }
429- }
430399
431400/* *
432401 * @brief Parses command-line arguments for interacting with a PyInstaller archive.
0 commit comments