Skip to content

Commit a2b505e

Browse files
committed
update
make the code more readable
1 parent 7259e7f commit a2b505e

File tree

2 files changed

+152
-64
lines changed

2 files changed

+152
-64
lines changed

include/PyInstArchive.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,18 @@ class PyInstArchive {
5454
// Member functions
5555
bool open();
5656
void close();
57+
5758
bool checkFile();
59+
bool isFileValid(size_t searchChunkSize);
60+
bool findCookie(size_t searchChunkSize);
61+
void determinePyinstallerVersion();
62+
5863
bool getCArchiveInfo();
64+
void readArchiveData(uint32_t& lengthofPackage, uint32_t& toc, uint32_t& tocLen, uint32_t& pyver);
65+
void calculateOverlayInfo(uint32_t lengthofPackage, uint32_t toc, uint32_t tocLen);
66+
void debugOutput(uint32_t lengthofPackage);
67+
void debugEntrySizes();
68+
5969
void parseTOC();
6070
void timeExtractionProcess(const std::string& outputDir);
6171
void displayInfo();

src/Pyinstaller.cpp

Lines changed: 142 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -79,29 +79,62 @@ const std::vector<CTOCEntry>& PyInstArchive::getTOCList() const {
7979
}
8080

8181
/**
82-
* @brief Checks if the file is a valid PyInstaller archive.
82+
* @brief Check if the file is a valid PyInstaller archive and determine its version.
8383
*
84-
* This method searches for the magic string (a unique identifier) in the PyInstaller archive
85-
* and determines the version of PyInstaller used. If the magic string is found, it sets the
86-
* cookie position and identifies the PyInstaller version.
84+
* This function processes the file specified by filePath, verifies if it's a valid PyInstaller
85+
* archive by searching for the magic string, and determines the PyInstaller version used to create the archive.
8786
*
88-
* @return true if the file is a valid PyInstaller archive, false otherwise.
87+
* @return True if the file is valid and the version is determined, otherwise false.
8988
*/
9089
bool PyInstArchive::checkFile() {
9190
std::cout << "[+] Processing " << filePath << std::endl;
9291
const size_t searchChunkSize = 8192;
93-
uint64_t endPos = fileSize;
94-
cookiePos = -1;
9592

96-
if (endPos < MAGIC.size()) {
93+
if (!isFileValid(searchChunkSize)) {
94+
return false;
95+
}
96+
97+
if (!findCookie(searchChunkSize)) {
98+
return false;
99+
}
100+
101+
determinePyinstallerVersion();
102+
return true;
103+
}
104+
105+
/**
106+
* @brief Validate the file size to ensure it's large enough to contain the magic string.
107+
*
108+
* This function checks if the file size is smaller than the size of the magic string,
109+
* which would indicate that the file is too short or truncated to be a valid PyInstaller archive.
110+
*
111+
* @param searchChunkSize The size of the chunk to be searched.
112+
* @return True if the file size is valid, otherwise false.
113+
*/
114+
bool PyInstArchive::isFileValid(size_t searchChunkSize) {
115+
if (fileSize < MAGIC.size()) {
97116
std::cerr << "[!] Error: File is too short or truncated" << std::endl;
98117
return false;
99118
}
119+
return true;
120+
}
100121

122+
/**
123+
* @brief Search for the magic string in the file to find the cookie position.
124+
*
125+
* This function reads through the file in chunks, searching for the magic string that indicates
126+
* the start of the PyInstaller archive's metadata. It sets the cookie position if the magic string is found.
127+
*
128+
* @param searchChunkSize The size of the chunk to be searched.
129+
* @return True if the cookie position is found, otherwise false.
130+
*/
131+
bool PyInstArchive::findCookie(size_t searchChunkSize) {
132+
uint64_t endPos = fileSize;
133+
cookiePos = -1;
101134
std::vector<char> buffer(searchChunkSize + MAGIC.size() - 1);
102135

103136
while (endPos >= MAGIC.size()) {
104-
uint64_t startPos = endPos >= searchChunkSize ? endPos - searchChunkSize : 0;
137+
uint64_t startPos = (endPos >= searchChunkSize) ? endPos - searchChunkSize : 0;
105138
size_t chunkSize = endPos - startPos;
106139

107140
fPtr.seekg(startPos, std::ios::beg);
@@ -114,40 +147,43 @@ bool PyInstArchive::checkFile() {
114147
for (size_t i = chunkSize; i-- > 0;) {
115148
if (std::memcmp(buffer.data() + i, MAGIC.c_str(), MAGIC.size()) == 0) {
116149
cookiePos = startPos + i;
117-
break;
150+
return true;
118151
}
119152
}
120153

121-
if (cookiePos != -1) {
122-
break;
123-
}
124-
125154
endPos = startPos + MAGIC.size() - 1;
126155
if (startPos == 0) {
127156
break;
128157
}
129158
}
130159

131-
if (cookiePos == -1) {
132-
std::cerr << "[!] Error: Missing cookie, unsupported pyinstaller version or not a pyinstaller archive" << std::endl;
133-
return false;
134-
}
160+
std::cerr << "[!] Error: Missing cookie, unsupported pyinstaller version or not a pyinstaller archive" << std::endl;
161+
return false;
162+
}
135163

164+
/**
165+
* @brief Determine the version of the PyInstaller used to create the archive.
166+
*
167+
* This function reads a specific section of the file and checks for the presence of the word "python".
168+
* If found, it sets the PyInstaller version to 2.1 or higher; otherwise, it sets the version to 2.0.
169+
*/
170+
void PyInstArchive::determinePyinstallerVersion() {
136171
fPtr.seekg(cookiePos + PYINST20_COOKIE_SIZE, std::ios::beg);
137172
std::vector<char> buffer64(64);
138173
fPtr.read(buffer64.data(), 64);
139-
if (std::string(buffer64.data(), 64).find("python") != std::string::npos) {
174+
std::string bufferStr(buffer64.data(), buffer64.size());
175+
176+
if (bufferStr.find("python") != std::string::npos) {
140177
std::cout << "[+] Pyinstaller version: 2.1+" << std::endl;
141178
pyinstVer = 21;
142179
}
143180
else {
144-
pyinstVer = 20;
145181
std::cout << "[+] Pyinstaller version: 2.0" << std::endl;
182+
pyinstVer = 20;
146183
}
147-
148-
return true;
149184
}
150185

186+
151187
/**
152188
* @brief Swaps the byte order of a 32-bit integer to correct endianness.
153189
*
@@ -218,61 +254,27 @@ size_t getPhysicalCoreCount() {
218254
}
219255

220256
/**
221-
* @brief Extracts and parses CArchive information from the PyInstaller file.
257+
* @brief Get information about the CArchive.
222258
*
223-
* This function reads the package length, table of contents (TOC), and Python version
224-
* from the PyInstaller archive. It adjusts byte order for multi-byte values based on
225-
* the endianness and calculates offsets for further extraction.
259+
* This function reads the PyInstaller archive to extract metadata, such as the Python version,
260+
* table of contents position, and sizes, and overlays size and position.
226261
*
227-
* @return true if the archive information was successfully parsed, false if an error occurred.
262+
* @return True if the information is successfully extracted, otherwise false.
228263
*/
229264
bool PyInstArchive::getCArchiveInfo() {
230265
try {
231266
uint32_t lengthofPackage, toc, tocLen, pyver;
232-
233-
// Check for version and load relevant data
234-
fPtr.seekg(cookiePos, std::ios::beg);
235-
char buffer[PYINST21_COOKIE_SIZE]; // Use a single buffer to handle both versions
236-
fPtr.read(buffer, (pyinstVer == 20) ? PYINST20_COOKIE_SIZE : PYINST21_COOKIE_SIZE);
237-
238-
// Directly read values from the buffer
239-
if (pyinstVer == 20 || pyinstVer == 21) {
240-
// Read and immediately swap bytes
241-
lengthofPackage = swapBytes(*reinterpret_cast<uint32_t*>(buffer + 8));
242-
toc = swapBytes(*reinterpret_cast<uint32_t*>(buffer + 12));
243-
tocLen = swapBytes(*reinterpret_cast<uint32_t*>(buffer + 16));
244-
pyver = swapBytes(*reinterpret_cast<uint32_t*>(buffer + 20));
245-
}
246-
247-
// Python version determination
248-
pymaj = pyver / (pyver >= 100 ? 100 : 10);
249-
pymin = pyver % (pyver >= 100 ? 100 : 10);
250-
251-
std::cout << "[+] Python version: " << static_cast<int>(pymaj) << "." << static_cast<int>(pymin) << std::endl;
252-
253-
uint64_t tailBytes = fileSize - cookiePos - ((pyinstVer == 20) ? PYINST20_COOKIE_SIZE : PYINST21_COOKIE_SIZE);
254-
overlaySize = static_cast<uint64_t>(lengthofPackage) + tailBytes;
255-
overlayPos = fileSize - overlaySize;
256-
tableOfContentsPos = overlayPos + toc;
257-
tableOfContentsSize = tocLen;
267+
readArchiveData(lengthofPackage, toc, tocLen, pyver);
268+
calculateOverlayInfo(lengthofPackage, toc, tocLen);
258269

259270
#ifdef _DEBUG
260-
std::cout << "[+] Length of package: " << lengthofPackage << " bytes" << std::endl;
261-
std::cout << "[DEBUG] overlaySize: " << overlaySize << std::endl;
262-
std::cout << "[DEBUG] overlayPos: " << overlayPos << std::endl;
263-
std::cout << "[DEBUG] tableOfContentsPos: " << tableOfContentsPos << std::endl;
264-
std::cout << "[DEBUG] tableOfContentsSize: " << tableOfContentsSize << std::endl;
271+
debugOutput(lengthofPackage);
265272
#endif
266273

267274
parseTOC();
268275

269276
#ifdef _DEBUG
270-
std::cout << "[DEBUG] Entry sizes in the CArchive:" << std::endl;
271-
for (const auto& entry : tocList) {
272-
std::cout << "[DEBUG] Entry Name: " << entry.getName()
273-
<< ", Compressed Size: " << entry.getCompressedDataSize() << " bytes"
274-
<< std::endl;
275-
}
277+
debugEntrySizes();
276278
#endif
277279

278280
}
@@ -283,6 +285,82 @@ bool PyInstArchive::getCArchiveInfo() {
283285
return true;
284286
}
285287

288+
/**
289+
* @brief Read the archive data and extract necessary values.
290+
*
291+
* This function reads the PyInstaller archive's cookie section to extract metadata,
292+
* such as the length of the package, table of contents position and length, and Python version.
293+
*
294+
* @param lengthofPackage Reference to store the length of the package.
295+
* @param toc Reference to store the position of the table of contents.
296+
* @param tocLen Reference to store the length of the table of contents.
297+
* @param pyver Reference to store the Python version.
298+
*/
299+
void PyInstArchive::readArchiveData(uint32_t& lengthofPackage, uint32_t& toc, uint32_t& tocLen, uint32_t& pyver) {
300+
fPtr.seekg(cookiePos, std::ios::beg);
301+
char buffer[PYINST21_COOKIE_SIZE]; // Use a single buffer to handle both versions
302+
fPtr.read(buffer, (pyinstVer == 20) ? PYINST20_COOKIE_SIZE : PYINST21_COOKIE_SIZE);
303+
304+
if (pyinstVer == 20 || pyinstVer == 21) {
305+
lengthofPackage = swapBytes(*reinterpret_cast<uint32_t*>(buffer + 8));
306+
toc = swapBytes(*reinterpret_cast<uint32_t*>(buffer + 12));
307+
tocLen = swapBytes(*reinterpret_cast<uint32_t*>(buffer + 16));
308+
pyver = swapBytes(*reinterpret_cast<uint32_t*>(buffer + 20));
309+
}
310+
}
311+
312+
313+
/**
314+
* @brief Calculate the overlay size and position, and table of contents position and size.
315+
*
316+
* This function calculates the overlay size and position, and the table of contents position and size
317+
* based on the extracted archive metadata.
318+
*
319+
* @param lengthofPackage The length of the package extracted from the archive.
320+
* @param toc The position of the table of contents extracted from the archive.
321+
* @param tocLen The length of the table of contents extracted from the archive.
322+
*/
323+
void PyInstArchive::calculateOverlayInfo(uint32_t lengthofPackage, uint32_t toc, uint32_t tocLen) {
324+
uint64_t tailBytes = fileSize - cookiePos - ((pyinstVer == 20) ? PYINST20_COOKIE_SIZE : PYINST21_COOKIE_SIZE);
325+
overlaySize = static_cast<uint64_t>(lengthofPackage) + tailBytes;
326+
overlayPos = fileSize - overlaySize;
327+
tableOfContentsPos = overlayPos + toc;
328+
tableOfContentsSize = tocLen;
329+
}
330+
331+
#ifdef _DEBUG
332+
/**
333+
* @brief Output debug information about the archive.
334+
*
335+
* This function outputs debug information about the overlay size and position,
336+
* and the table of contents position and size.
337+
*
338+
* @param lengthofPackage The length of the package extracted from the archive.
339+
*/
340+
void PyInstArchive::debugOutput(uint32_t lengthofPackage) {
341+
std::cout << "[+] Length of package: " << lengthofPackage << " bytes" << std::endl;
342+
std::cout << "[DEBUG] overlaySize: " << overlaySize << std::endl;
343+
std::cout << "[DEBUG] overlayPos: " << overlayPos << std::endl;
344+
std::cout << "[DEBUG] tableOfContentsPos: " << tableOfContentsPos << std::endl;
345+
std::cout << "[DEBUG] tableOfContentsSize: " << tableOfContentsSize << std::endl;
346+
}
347+
348+
/**
349+
* @brief Output debug information about the entry sizes in the CArchive.
350+
*
351+
* This function outputs debug information about the entry sizes in the CArchive, including the
352+
* name and compressed data size of each entry.
353+
*/
354+
void PyInstArchive::debugEntrySizes() {
355+
std::cout << "[DEBUG] Entry sizes in the CArchive:" << std::endl;
356+
for (const auto& entry : tocList) {
357+
std::cout << "[DEBUG] Entry Name: " << entry.getName()
358+
<< ", Compressed Size: " << entry.getCompressedDataSize() << " bytes"
359+
<< std::endl;
360+
}
361+
}
362+
#endif
363+
286364
/**
287365
* @brief Parses the Table of Contents (TOC) from the PyInstaller archive.
288366
*

0 commit comments

Comments
 (0)