@@ -362,73 +362,122 @@ void PyInstArchive::debugEntrySizes() {
362362#endif
363363
364364/* *
365- * @brief Parses the Table of Contents (TOC) from the PyInstaller archive .
365+ * @brief Parse the Table of Contents (TOC) of the CArchive .
366366 *
367- * This function reads the TOC from the archive, which contains information about the
368- * embedded files, such as their size, position in the archive, compression status, and type.
369- * Each entry is stored in a list for further processing.
367+ * This function reads the TOC from the PyInstaller archive and extracts
368+ * the necessary metadata for each entry, storing them in a list.
370369 */
371370void PyInstArchive::parseTOC () {
372- // Set the file pointer to the position of the Table of Contents
373371 fPtr .seekg (tableOfContentsPos, std::ios::beg);
372+ tocList.clear ();
373+ uint32_t parsedLen = 0 ;
374374
375- tocList.clear (); // Clear any existing TOC entries
376- uint32_t parsedLen = 0 ; // Initialize parsed length
377-
378- // Read the Table of Contents in chunks
379375 while (parsedLen < tableOfContentsSize) {
380376 uint32_t entrySize;
381- fPtr .read (reinterpret_cast <char *>(&entrySize), sizeof (entrySize));
382- if (fPtr .gcount () < sizeof (entrySize)) break ; // Prevent reading beyond the file
383-
384- entrySize = swapBytes (entrySize); // Convert entry size to host byte order
385-
386- uint32_t nameLen = sizeof (uint32_t ) + sizeof (uint32_t ) * 3 + sizeof (uint8_t ) + sizeof (char );
387- std::vector<char > nameBuffer (entrySize - nameLen); // Create buffer for the name
377+ if (!readEntrySize (entrySize)) break ;
388378
389- // Read the rest of the fields in one go
379+ std::vector< char > nameBuffer (entrySize - sizeofEntry ());
390380 uint32_t entryPos, cmprsdDataSize, uncmprsdDataSize;
391381 uint8_t cmprsFlag;
392382 char typeCmprsData;
393383
394- fPtr .read (reinterpret_cast <char *>(&entryPos), sizeof (entryPos));
395- fPtr .read (reinterpret_cast <char *>(&cmprsdDataSize), sizeof (cmprsdDataSize));
396- fPtr .read (reinterpret_cast <char *>(&uncmprsdDataSize), sizeof (uncmprsdDataSize));
397- fPtr .read (reinterpret_cast <char *>(&cmprsFlag), sizeof (cmprsFlag));
398- fPtr .read (reinterpret_cast <char *>(&typeCmprsData), sizeof (typeCmprsData));
384+ readEntryFields (entryPos, cmprsdDataSize, uncmprsdDataSize, cmprsFlag, typeCmprsData, nameBuffer, entrySize);
399385
400- // swap bytes if needed (endian-aware file format)
401- entryPos = swapBytes (entryPos);
402- cmprsdDataSize = swapBytes (cmprsdDataSize);
403- uncmprsdDataSize = swapBytes (uncmprsdDataSize);
386+ std::string name = decodeEntryName (nameBuffer, parsedLen);
387+ addTOCEntry (entryPos, cmprsdDataSize, uncmprsdDataSize, cmprsFlag, typeCmprsData, name);
404388
405- fPtr .read (nameBuffer.data (), entrySize - nameLen);
389+ parsedLen += entrySize;
390+ }
406391
407- // Decode the name from the buffer and remove null characters
408- std::string name (nameBuffer.data (), nameBuffer.size ());
409- name.erase (std::remove (name.begin (), name.end (), ' \0 ' ), name.end ());
392+ std::cout << " [+] Found " << tocList.size () << " files in CArchive" << std::endl;
393+ }
410394
411- // Handle invalid names and normalize
412- if (name.empty () || name[0 ] == ' /' ) {
413- name = " unnamed_" + std::to_string (parsedLen);
414- }
395+ /* *
396+ * @brief Read the size of the next TOC entry.
397+ *
398+ * @param entrySize Reference to store the size of the TOC entry.
399+ * @return True if the entry size was successfully read, otherwise false.
400+ */
401+ bool PyInstArchive::readEntrySize (uint32_t & entrySize) {
402+ fPtr .read (reinterpret_cast <char *>(&entrySize), sizeof (entrySize));
403+ if (fPtr .gcount () < sizeof (entrySize)) return false ;
415404
416- // Add the entry to the TOC list
417- tocList.emplace_back (
418- overlayPos + entryPos,
419- cmprsdDataSize,
420- uncmprsdDataSize,
421- cmprsFlag,
422- typeCmprsData,
423- name
424- );
425-
426- // Update the parsed length by the size of the current entry
427- parsedLen += entrySize;
405+ entrySize = swapBytes (entrySize);
406+ return true ;
407+ }
408+
409+ /* *
410+ * @brief Read the fields of a TOC entry.
411+ *
412+ * @param entryPos Reference to store the entry position.
413+ * @param cmprsdDataSize Reference to store the compressed data size.
414+ * @param uncmprsdDataSize Reference to store the uncompressed data size.
415+ * @param cmprsFlag Reference to store the compression flag.
416+ * @param typeCmprsData Reference to store the type of compressed data.
417+ * @param nameBuffer Buffer to store the entry name.
418+ * @param entrySize The size of the entry.
419+ */
420+ void PyInstArchive::readEntryFields (uint32_t & entryPos, uint32_t & cmprsdDataSize, uint32_t & uncmprsdDataSize, uint8_t & cmprsFlag, char & typeCmprsData, std::vector<char >& nameBuffer, uint32_t entrySize) {
421+ uint32_t nameLen = sizeofEntry ();
422+ fPtr .read (reinterpret_cast <char *>(&entryPos), sizeof (entryPos));
423+ fPtr .read (reinterpret_cast <char *>(&cmprsdDataSize), sizeof (cmprsdDataSize));
424+ fPtr .read (reinterpret_cast <char *>(&uncmprsdDataSize), sizeof (uncmprsdDataSize));
425+ fPtr .read (reinterpret_cast <char *>(&cmprsFlag), sizeof (cmprsFlag));
426+ fPtr .read (reinterpret_cast <char *>(&typeCmprsData), sizeof (typeCmprsData));
427+
428+ entryPos = swapBytes (entryPos);
429+ cmprsdDataSize = swapBytes (cmprsdDataSize);
430+ uncmprsdDataSize = swapBytes (uncmprsdDataSize);
431+
432+ fPtr .read (nameBuffer.data (), entrySize - nameLen);
433+ }
434+
435+ /* *
436+ * @brief Decode the entry name from the buffer and handle invalid names.
437+ *
438+ * @param nameBuffer Buffer containing the entry name.
439+ * @param parsedLen The current parsed length of the TOC.
440+ * @return The decoded and normalized entry name.
441+ */
442+ std::string PyInstArchive::decodeEntryName (std::vector<char >& nameBuffer, uint32_t parsedLen) {
443+ std::string name (nameBuffer.data (), nameBuffer.size ());
444+ name.erase (std::remove (name.begin (), name.end (), ' \0 ' ), name.end ());
445+
446+ if (name.empty () || name[0 ] == ' /' ) {
447+ name = " unnamed_" + std::to_string (parsedLen);
428448 }
429449
430- // Output the total number of entries found in the TOC
431- std::cout << " [+] Found " << tocList.size () << " files in CArchive" << std::endl;
450+ return name;
451+ }
452+
453+ /* *
454+ * @brief Add a TOC entry to the list.
455+ *
456+ * @param entryPos The position of the entry.
457+ * @param cmprsdDataSize The compressed data size of the entry.
458+ * @param uncmprsdDataSize The uncompressed data size of the entry.
459+ * @param cmprsFlag The compression flag of the entry.
460+ * @param typeCmprsData The type of compressed data.
461+ * @param name The name of the entry.
462+ */
463+ void PyInstArchive::addTOCEntry (uint32_t entryPos, uint32_t cmprsdDataSize, uint32_t uncmprsdDataSize, uint8_t cmprsFlag, char typeCmprsData, const std::string& name) {
464+ tocList.emplace_back (
465+ overlayPos + entryPos,
466+ cmprsdDataSize,
467+ uncmprsdDataSize,
468+ cmprsFlag,
469+ typeCmprsData,
470+ name
471+ );
472+ }
473+
474+ /* *
475+ * @brief Get the size of the standard TOC entry fields.
476+ *
477+ * @return The size of the standard TOC entry fields.
478+ */
479+ uint32_t PyInstArchive::sizeofEntry () const {
480+ return sizeof (uint32_t ) + sizeof (uint32_t ) * 3 + sizeof (uint8_t ) + sizeof (char );
432481}
433482
434483/* *
0 commit comments