@@ -79,29 +79,62 @@ const std::vector<CTOCEntry>& PyInstArchive::getTOCList() const {
7979}
8080
8181/* *
82- * @brief Checks if the file is a valid PyInstaller archive.
82+ * @brief Check if the file is a valid PyInstaller archive and determine its version .
8383 *
84- * This method searches for the magic string (a unique identifier) in the PyInstaller archive
85- * and determines the version of PyInstaller used. If the magic string is found, it sets the
86- * cookie position and identifies the PyInstaller version.
84+ * This function processes the file specified by filePath, verifies if it's a valid PyInstaller
85+ * archive by searching for the magic string, and determines the PyInstaller version used to create the archive.
8786 *
88- * @return true if the file is a valid PyInstaller archive, false otherwise.
87+ * @return True if the file is valid and the version is determined, otherwise false .
8988 */
9089bool PyInstArchive::checkFile () {
9190 std::cout << " [+] Processing " << filePath << std::endl;
9291 const size_t searchChunkSize = 8192 ;
93- uint64_t endPos = fileSize;
94- cookiePos = -1 ;
9592
96- if (endPos < MAGIC.size ()) {
93+ if (!isFileValid (searchChunkSize)) {
94+ return false ;
95+ }
96+
97+ if (!findCookie (searchChunkSize)) {
98+ return false ;
99+ }
100+
101+ determinePyinstallerVersion ();
102+ return true ;
103+ }
104+
105+ /* *
106+ * @brief Validate the file size to ensure it's large enough to contain the magic string.
107+ *
108+ * This function checks if the file size is smaller than the size of the magic string,
109+ * which would indicate that the file is too short or truncated to be a valid PyInstaller archive.
110+ *
111+ * @param searchChunkSize The size of the chunk to be searched.
112+ * @return True if the file size is valid, otherwise false.
113+ */
114+ bool PyInstArchive::isFileValid (size_t searchChunkSize) {
115+ if (fileSize < MAGIC.size ()) {
97116 std::cerr << " [!] Error: File is too short or truncated" << std::endl;
98117 return false ;
99118 }
119+ return true ;
120+ }
100121
122+ /* *
123+ * @brief Search for the magic string in the file to find the cookie position.
124+ *
125+ * This function reads through the file in chunks, searching for the magic string that indicates
126+ * the start of the PyInstaller archive's metadata. It sets the cookie position if the magic string is found.
127+ *
128+ * @param searchChunkSize The size of the chunk to be searched.
129+ * @return True if the cookie position is found, otherwise false.
130+ */
131+ bool PyInstArchive::findCookie (size_t searchChunkSize) {
132+ uint64_t endPos = fileSize;
133+ cookiePos = -1 ;
101134 std::vector<char > buffer (searchChunkSize + MAGIC.size () - 1 );
102135
103136 while (endPos >= MAGIC.size ()) {
104- uint64_t startPos = endPos >= searchChunkSize ? endPos - searchChunkSize : 0 ;
137+ uint64_t startPos = ( endPos >= searchChunkSize) ? endPos - searchChunkSize : 0 ;
105138 size_t chunkSize = endPos - startPos;
106139
107140 fPtr .seekg (startPos, std::ios::beg);
@@ -114,40 +147,43 @@ bool PyInstArchive::checkFile() {
114147 for (size_t i = chunkSize; i-- > 0 ;) {
115148 if (std::memcmp (buffer.data () + i, MAGIC.c_str (), MAGIC.size ()) == 0 ) {
116149 cookiePos = startPos + i;
117- break ;
150+ return true ;
118151 }
119152 }
120153
121- if (cookiePos != -1 ) {
122- break ;
123- }
124-
125154 endPos = startPos + MAGIC.size () - 1 ;
126155 if (startPos == 0 ) {
127156 break ;
128157 }
129158 }
130159
131- if (cookiePos == -1 ) {
132- std::cerr << " [!] Error: Missing cookie, unsupported pyinstaller version or not a pyinstaller archive" << std::endl;
133- return false ;
134- }
160+ std::cerr << " [!] Error: Missing cookie, unsupported pyinstaller version or not a pyinstaller archive" << std::endl;
161+ return false ;
162+ }
135163
164+ /* *
165+ * @brief Determine the version of the PyInstaller used to create the archive.
166+ *
167+ * This function reads a specific section of the file and checks for the presence of the word "python".
168+ * If found, it sets the PyInstaller version to 2.1 or higher; otherwise, it sets the version to 2.0.
169+ */
170+ void PyInstArchive::determinePyinstallerVersion () {
136171 fPtr .seekg (cookiePos + PYINST20_COOKIE_SIZE, std::ios::beg);
137172 std::vector<char > buffer64 (64 );
138173 fPtr .read (buffer64.data (), 64 );
139- if (std::string (buffer64.data (), 64 ).find (" python" ) != std::string::npos) {
174+ std::string bufferStr (buffer64.data (), buffer64.size ());
175+
176+ if (bufferStr.find (" python" ) != std::string::npos) {
140177 std::cout << " [+] Pyinstaller version: 2.1+" << std::endl;
141178 pyinstVer = 21 ;
142179 }
143180 else {
144- pyinstVer = 20 ;
145181 std::cout << " [+] Pyinstaller version: 2.0" << std::endl;
182+ pyinstVer = 20 ;
146183 }
147-
148- return true ;
149184}
150185
186+
151187/* *
152188 * @brief Swaps the byte order of a 32-bit integer to correct endianness.
153189 *
@@ -218,61 +254,27 @@ size_t getPhysicalCoreCount() {
218254}
219255
220256/* *
221- * @brief Extracts and parses CArchive information from the PyInstaller file .
257+ * @brief Get information about the CArchive .
222258 *
223- * This function reads the package length, table of contents (TOC), and Python version
224- * from the PyInstaller archive. It adjusts byte order for multi-byte values based on
225- * the endianness and calculates offsets for further extraction.
259+ * This function reads the PyInstaller archive to extract metadata, such as the Python version,
260+ * table of contents position, and sizes, and overlays size and position.
226261 *
227- * @return true if the archive information was successfully parsed, false if an error occurred .
262+ * @return True if the information is successfully extracted, otherwise false .
228263 */
229264bool PyInstArchive::getCArchiveInfo () {
230265 try {
231266 uint32_t lengthofPackage, toc, tocLen, pyver;
232-
233- // Check for version and load relevant data
234- fPtr .seekg (cookiePos, std::ios::beg);
235- char buffer[PYINST21_COOKIE_SIZE]; // Use a single buffer to handle both versions
236- fPtr .read (buffer, (pyinstVer == 20 ) ? PYINST20_COOKIE_SIZE : PYINST21_COOKIE_SIZE);
237-
238- // Directly read values from the buffer
239- if (pyinstVer == 20 || pyinstVer == 21 ) {
240- // Read and immediately swap bytes
241- lengthofPackage = swapBytes (*reinterpret_cast <uint32_t *>(buffer + 8 ));
242- toc = swapBytes (*reinterpret_cast <uint32_t *>(buffer + 12 ));
243- tocLen = swapBytes (*reinterpret_cast <uint32_t *>(buffer + 16 ));
244- pyver = swapBytes (*reinterpret_cast <uint32_t *>(buffer + 20 ));
245- }
246-
247- // Python version determination
248- pymaj = pyver / (pyver >= 100 ? 100 : 10 );
249- pymin = pyver % (pyver >= 100 ? 100 : 10 );
250-
251- std::cout << " [+] Python version: " << static_cast <int >(pymaj) << " ." << static_cast <int >(pymin) << std::endl;
252-
253- uint64_t tailBytes = fileSize - cookiePos - ((pyinstVer == 20 ) ? PYINST20_COOKIE_SIZE : PYINST21_COOKIE_SIZE);
254- overlaySize = static_cast <uint64_t >(lengthofPackage) + tailBytes;
255- overlayPos = fileSize - overlaySize;
256- tableOfContentsPos = overlayPos + toc;
257- tableOfContentsSize = tocLen;
267+ readArchiveData (lengthofPackage, toc, tocLen, pyver);
268+ calculateOverlayInfo (lengthofPackage, toc, tocLen);
258269
259270#ifdef _DEBUG
260- std::cout << " [+] Length of package: " << lengthofPackage << " bytes" << std::endl;
261- std::cout << " [DEBUG] overlaySize: " << overlaySize << std::endl;
262- std::cout << " [DEBUG] overlayPos: " << overlayPos << std::endl;
263- std::cout << " [DEBUG] tableOfContentsPos: " << tableOfContentsPos << std::endl;
264- std::cout << " [DEBUG] tableOfContentsSize: " << tableOfContentsSize << std::endl;
271+ debugOutput (lengthofPackage);
265272#endif
266273
267274 parseTOC ();
268275
269276#ifdef _DEBUG
270- std::cout << " [DEBUG] Entry sizes in the CArchive:" << std::endl;
271- for (const auto & entry : tocList) {
272- std::cout << " [DEBUG] Entry Name: " << entry.getName ()
273- << " , Compressed Size: " << entry.getCompressedDataSize () << " bytes"
274- << std::endl;
275- }
277+ debugEntrySizes ();
276278#endif
277279
278280 }
@@ -283,6 +285,82 @@ bool PyInstArchive::getCArchiveInfo() {
283285 return true ;
284286}
285287
288+ /* *
289+ * @brief Read the archive data and extract necessary values.
290+ *
291+ * This function reads the PyInstaller archive's cookie section to extract metadata,
292+ * such as the length of the package, table of contents position and length, and Python version.
293+ *
294+ * @param lengthofPackage Reference to store the length of the package.
295+ * @param toc Reference to store the position of the table of contents.
296+ * @param tocLen Reference to store the length of the table of contents.
297+ * @param pyver Reference to store the Python version.
298+ */
299+ void PyInstArchive::readArchiveData (uint32_t & lengthofPackage, uint32_t & toc, uint32_t & tocLen, uint32_t & pyver) {
300+ fPtr .seekg (cookiePos, std::ios::beg);
301+ char buffer[PYINST21_COOKIE_SIZE]; // Use a single buffer to handle both versions
302+ fPtr .read (buffer, (pyinstVer == 20 ) ? PYINST20_COOKIE_SIZE : PYINST21_COOKIE_SIZE);
303+
304+ if (pyinstVer == 20 || pyinstVer == 21 ) {
305+ lengthofPackage = swapBytes (*reinterpret_cast <uint32_t *>(buffer + 8 ));
306+ toc = swapBytes (*reinterpret_cast <uint32_t *>(buffer + 12 ));
307+ tocLen = swapBytes (*reinterpret_cast <uint32_t *>(buffer + 16 ));
308+ pyver = swapBytes (*reinterpret_cast <uint32_t *>(buffer + 20 ));
309+ }
310+ }
311+
312+
313+ /* *
314+ * @brief Calculate the overlay size and position, and table of contents position and size.
315+ *
316+ * This function calculates the overlay size and position, and the table of contents position and size
317+ * based on the extracted archive metadata.
318+ *
319+ * @param lengthofPackage The length of the package extracted from the archive.
320+ * @param toc The position of the table of contents extracted from the archive.
321+ * @param tocLen The length of the table of contents extracted from the archive.
322+ */
323+ void PyInstArchive::calculateOverlayInfo (uint32_t lengthofPackage, uint32_t toc, uint32_t tocLen) {
324+ uint64_t tailBytes = fileSize - cookiePos - ((pyinstVer == 20 ) ? PYINST20_COOKIE_SIZE : PYINST21_COOKIE_SIZE);
325+ overlaySize = static_cast <uint64_t >(lengthofPackage) + tailBytes;
326+ overlayPos = fileSize - overlaySize;
327+ tableOfContentsPos = overlayPos + toc;
328+ tableOfContentsSize = tocLen;
329+ }
330+
331+ #ifdef _DEBUG
332+ /* *
333+ * @brief Output debug information about the archive.
334+ *
335+ * This function outputs debug information about the overlay size and position,
336+ * and the table of contents position and size.
337+ *
338+ * @param lengthofPackage The length of the package extracted from the archive.
339+ */
340+ void PyInstArchive::debugOutput (uint32_t lengthofPackage) {
341+ std::cout << " [+] Length of package: " << lengthofPackage << " bytes" << std::endl;
342+ std::cout << " [DEBUG] overlaySize: " << overlaySize << std::endl;
343+ std::cout << " [DEBUG] overlayPos: " << overlayPos << std::endl;
344+ std::cout << " [DEBUG] tableOfContentsPos: " << tableOfContentsPos << std::endl;
345+ std::cout << " [DEBUG] tableOfContentsSize: " << tableOfContentsSize << std::endl;
346+ }
347+
348+ /* *
349+ * @brief Output debug information about the entry sizes in the CArchive.
350+ *
351+ * This function outputs debug information about the entry sizes in the CArchive, including the
352+ * name and compressed data size of each entry.
353+ */
354+ void PyInstArchive::debugEntrySizes () {
355+ std::cout << " [DEBUG] Entry sizes in the CArchive:" << std::endl;
356+ for (const auto & entry : tocList) {
357+ std::cout << " [DEBUG] Entry Name: " << entry.getName ()
358+ << " , Compressed Size: " << entry.getCompressedDataSize () << " bytes"
359+ << std::endl;
360+ }
361+ }
362+ #endif
363+
286364/* *
287365 * @brief Parses the Table of Contents (TOC) from the PyInstaller archive.
288366 *
0 commit comments