Skip to content

Commit b89a9b0

Browse files
committed
update
update readability
1 parent a2b505e commit b89a9b0

File tree

2 files changed

+103
-48
lines changed

2 files changed

+103
-48
lines changed

include/PyInstArchive.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,12 @@ class PyInstArchive {
6767
void debugEntrySizes();
6868

6969
void parseTOC();
70+
bool readEntrySize(uint32_t& entrySize);
71+
void readEntryFields(uint32_t& entryPos, uint32_t& cmprsdDataSize, uint32_t& uncmprsdDataSize, uint8_t& cmprsFlag, char& typeCmprsData, std::vector<char>& nameBuffer, uint32_t entrySize);
72+
std::string decodeEntryName(std::vector<char>& nameBuffer, uint32_t parsedLen);
73+
void addTOCEntry(uint32_t entryPos, uint32_t cmprsdDataSize, uint32_t uncmprsdDataSize, uint8_t cmprsFlag, char typeCmprsData, const std::string& name);
74+
uint32_t sizeofEntry() const;
75+
7076
void timeExtractionProcess(const std::string& outputDir);
7177
void displayInfo();
7278
void MultiThreadedFileExtract(const std::vector<CTOCEntry>& tocEntries, const std::string& outputDir, size_t numThreads);

src/Pyinstaller.cpp

Lines changed: 97 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -362,73 +362,122 @@ void PyInstArchive::debugEntrySizes() {
362362
#endif
363363

364364
/**
365-
* @brief Parses the Table of Contents (TOC) from the PyInstaller archive.
365+
* @brief Parse the Table of Contents (TOC) of the CArchive.
366366
*
367-
* This function reads the TOC from the archive, which contains information about the
368-
* embedded files, such as their size, position in the archive, compression status, and type.
369-
* Each entry is stored in a list for further processing.
367+
* This function reads the TOC from the PyInstaller archive and extracts
368+
* the necessary metadata for each entry, storing them in a list.
370369
*/
371370
void PyInstArchive::parseTOC() {
372-
// Set the file pointer to the position of the Table of Contents
373371
fPtr.seekg(tableOfContentsPos, std::ios::beg);
372+
tocList.clear();
373+
uint32_t parsedLen = 0;
374374

375-
tocList.clear(); // Clear any existing TOC entries
376-
uint32_t parsedLen = 0; // Initialize parsed length
377-
378-
// Read the Table of Contents in chunks
379375
while (parsedLen < tableOfContentsSize) {
380376
uint32_t entrySize;
381-
fPtr.read(reinterpret_cast<char*>(&entrySize), sizeof(entrySize));
382-
if (fPtr.gcount() < sizeof(entrySize)) break; // Prevent reading beyond the file
383-
384-
entrySize = swapBytes(entrySize); // Convert entry size to host byte order
385-
386-
uint32_t nameLen = sizeof(uint32_t) + sizeof(uint32_t) * 3 + sizeof(uint8_t) + sizeof(char);
387-
std::vector<char> nameBuffer(entrySize - nameLen); // Create buffer for the name
377+
if (!readEntrySize(entrySize)) break;
388378

389-
// Read the rest of the fields in one go
379+
std::vector<char> nameBuffer(entrySize - sizeofEntry());
390380
uint32_t entryPos, cmprsdDataSize, uncmprsdDataSize;
391381
uint8_t cmprsFlag;
392382
char typeCmprsData;
393383

394-
fPtr.read(reinterpret_cast<char*>(&entryPos), sizeof(entryPos));
395-
fPtr.read(reinterpret_cast<char*>(&cmprsdDataSize), sizeof(cmprsdDataSize));
396-
fPtr.read(reinterpret_cast<char*>(&uncmprsdDataSize), sizeof(uncmprsdDataSize));
397-
fPtr.read(reinterpret_cast<char*>(&cmprsFlag), sizeof(cmprsFlag));
398-
fPtr.read(reinterpret_cast<char*>(&typeCmprsData), sizeof(typeCmprsData));
384+
readEntryFields(entryPos, cmprsdDataSize, uncmprsdDataSize, cmprsFlag, typeCmprsData, nameBuffer, entrySize);
399385

400-
// swap bytes if needed (endian-aware file format)
401-
entryPos = swapBytes(entryPos);
402-
cmprsdDataSize = swapBytes(cmprsdDataSize);
403-
uncmprsdDataSize = swapBytes(uncmprsdDataSize);
386+
std::string name = decodeEntryName(nameBuffer, parsedLen);
387+
addTOCEntry(entryPos, cmprsdDataSize, uncmprsdDataSize, cmprsFlag, typeCmprsData, name);
404388

405-
fPtr.read(nameBuffer.data(), entrySize - nameLen);
389+
parsedLen += entrySize;
390+
}
406391

407-
// Decode the name from the buffer and remove null characters
408-
std::string name(nameBuffer.data(), nameBuffer.size());
409-
name.erase(std::remove(name.begin(), name.end(), '\0'), name.end());
392+
std::cout << "[+] Found " << tocList.size() << " files in CArchive" << std::endl;
393+
}
410394

411-
// Handle invalid names and normalize
412-
if (name.empty() || name[0] == '/') {
413-
name = "unnamed_" + std::to_string(parsedLen);
414-
}
395+
/**
396+
* @brief Read the size of the next TOC entry.
397+
*
398+
* @param entrySize Reference to store the size of the TOC entry.
399+
* @return True if the entry size was successfully read, otherwise false.
400+
*/
401+
bool PyInstArchive::readEntrySize(uint32_t& entrySize) {
402+
fPtr.read(reinterpret_cast<char*>(&entrySize), sizeof(entrySize));
403+
if (fPtr.gcount() < sizeof(entrySize)) return false;
415404

416-
// Add the entry to the TOC list
417-
tocList.emplace_back(
418-
overlayPos + entryPos,
419-
cmprsdDataSize,
420-
uncmprsdDataSize,
421-
cmprsFlag,
422-
typeCmprsData,
423-
name
424-
);
425-
426-
// Update the parsed length by the size of the current entry
427-
parsedLen += entrySize;
405+
entrySize = swapBytes(entrySize);
406+
return true;
407+
}
408+
409+
/**
410+
* @brief Read the fields of a TOC entry.
411+
*
412+
* @param entryPos Reference to store the entry position.
413+
* @param cmprsdDataSize Reference to store the compressed data size.
414+
* @param uncmprsdDataSize Reference to store the uncompressed data size.
415+
* @param cmprsFlag Reference to store the compression flag.
416+
* @param typeCmprsData Reference to store the type of compressed data.
417+
* @param nameBuffer Buffer to store the entry name.
418+
* @param entrySize The size of the entry.
419+
*/
420+
void PyInstArchive::readEntryFields(uint32_t& entryPos, uint32_t& cmprsdDataSize, uint32_t& uncmprsdDataSize, uint8_t& cmprsFlag, char& typeCmprsData, std::vector<char>& nameBuffer, uint32_t entrySize) {
421+
uint32_t nameLen = sizeofEntry();
422+
fPtr.read(reinterpret_cast<char*>(&entryPos), sizeof(entryPos));
423+
fPtr.read(reinterpret_cast<char*>(&cmprsdDataSize), sizeof(cmprsdDataSize));
424+
fPtr.read(reinterpret_cast<char*>(&uncmprsdDataSize), sizeof(uncmprsdDataSize));
425+
fPtr.read(reinterpret_cast<char*>(&cmprsFlag), sizeof(cmprsFlag));
426+
fPtr.read(reinterpret_cast<char*>(&typeCmprsData), sizeof(typeCmprsData));
427+
428+
entryPos = swapBytes(entryPos);
429+
cmprsdDataSize = swapBytes(cmprsdDataSize);
430+
uncmprsdDataSize = swapBytes(uncmprsdDataSize);
431+
432+
fPtr.read(nameBuffer.data(), entrySize - nameLen);
433+
}
434+
435+
/**
436+
* @brief Decode the entry name from the buffer and handle invalid names.
437+
*
438+
* @param nameBuffer Buffer containing the entry name.
439+
* @param parsedLen The current parsed length of the TOC.
440+
* @return The decoded and normalized entry name.
441+
*/
442+
std::string PyInstArchive::decodeEntryName(std::vector<char>& nameBuffer, uint32_t parsedLen) {
443+
std::string name(nameBuffer.data(), nameBuffer.size());
444+
name.erase(std::remove(name.begin(), name.end(), '\0'), name.end());
445+
446+
if (name.empty() || name[0] == '/') {
447+
name = "unnamed_" + std::to_string(parsedLen);
428448
}
429449

430-
// Output the total number of entries found in the TOC
431-
std::cout << "[+] Found " << tocList.size() << " files in CArchive" << std::endl;
450+
return name;
451+
}
452+
453+
/**
454+
* @brief Add a TOC entry to the list.
455+
*
456+
* @param entryPos The position of the entry.
457+
* @param cmprsdDataSize The compressed data size of the entry.
458+
* @param uncmprsdDataSize The uncompressed data size of the entry.
459+
* @param cmprsFlag The compression flag of the entry.
460+
* @param typeCmprsData The type of compressed data.
461+
* @param name The name of the entry.
462+
*/
463+
void PyInstArchive::addTOCEntry(uint32_t entryPos, uint32_t cmprsdDataSize, uint32_t uncmprsdDataSize, uint8_t cmprsFlag, char typeCmprsData, const std::string& name) {
464+
tocList.emplace_back(
465+
overlayPos + entryPos,
466+
cmprsdDataSize,
467+
uncmprsdDataSize,
468+
cmprsFlag,
469+
typeCmprsData,
470+
name
471+
);
472+
}
473+
474+
/**
475+
* @brief Get the size of the standard TOC entry fields.
476+
*
477+
* @return The size of the standard TOC entry fields.
478+
*/
479+
uint32_t PyInstArchive::sizeofEntry() const {
480+
return sizeof(uint32_t) + sizeof(uint32_t) * 3 + sizeof(uint8_t) + sizeof(char);
432481
}
433482

434483
/**

0 commit comments

Comments
 (0)