Skip to content

Commit 3da81ba

Browse files
committed
optimize performance
Update performance
1 parent 9e0f461 commit 3da81ba

File tree

2 files changed

+68
-99
lines changed

2 files changed

+68
-99
lines changed

PyInstaller-C++.vcxproj.filters

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,18 +15,18 @@
1515
</Filter>
1616
</ItemGroup>
1717
<ItemGroup>
18-
<ClInclude Include="framework.h">
18+
<ClInclude Include="include\PyInstArchive.h">
1919
<Filter>Header Files</Filter>
2020
</ClInclude>
21-
<ClInclude Include="PyInstArchive.h">
21+
<ClInclude Include="include\zconf.h">
22+
<Filter>Header Files</Filter>
23+
</ClInclude>
24+
<ClInclude Include="include\zlib.h">
2225
<Filter>Header Files</Filter>
2326
</ClInclude>
2427
</ItemGroup>
2528
<ItemGroup>
26-
<ClCompile Include="Pyinstaller.cpp">
27-
<Filter>Source Files</Filter>
28-
</ClCompile>
29-
<ClCompile Include="pch.cpp">
29+
<ClCompile Include="src\Pyinstaller.cpp">
3030
<Filter>Source Files</Filter>
3131
</ClCompile>
3232
</ItemGroup>

src/Pyinstaller.cpp

Lines changed: 62 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -80,21 +80,30 @@ bool PyInstArchive::checkFile() {
8080
return false;
8181
}
8282

83-
while (true) {
83+
std::vector<char> buffer(searchChunkSize + MAGIC.size() - 1);
84+
85+
while (endPos >= MAGIC.size()) {
8486
uint64_t startPos = endPos >= searchChunkSize ? endPos - searchChunkSize : 0;
8587
size_t chunkSize = endPos - startPos;
86-
if (chunkSize < MAGIC.size()) {
87-
break;
88-
}
88+
8989
fPtr.seekg(startPos, std::ios::beg);
90-
std::vector<char> data(chunkSize);
91-
fPtr.read(data.data(), chunkSize);
90+
fPtr.read(buffer.data(), chunkSize);
91+
92+
for (size_t i = chunkSize; i < buffer.size(); ++i) {
93+
buffer[i] = buffer[i - chunkSize];
94+
}
95+
96+
for (size_t i = chunkSize; i-- > 0;) {
97+
if (std::memcmp(buffer.data() + i, MAGIC.c_str(), MAGIC.size()) == 0) {
98+
cookiePos = startPos + i;
99+
break;
100+
}
101+
}
92102

93-
auto offs = std::string(data.data(), chunkSize).rfind(MAGIC);
94-
if (offs != std::string::npos) {
95-
cookiePos = startPos + offs;
103+
if (cookiePos != -1) {
96104
break;
97105
}
106+
98107
endPos = startPos + MAGIC.size() - 1;
99108
if (startPos == 0) {
100109
break;
@@ -107,9 +116,9 @@ bool PyInstArchive::checkFile() {
107116
}
108117

109118
fPtr.seekg(cookiePos + PYINST20_COOKIE_SIZE, std::ios::beg);
110-
std::vector<char> buffer(64);
111-
fPtr.read(buffer.data(), 64);
112-
if (std::string(buffer.data(), 64).find("python") != std::string::npos) {
119+
std::vector<char> buffer64(64);
120+
fPtr.read(buffer64.data(), 64);
121+
if (std::string(buffer64.data(), 64).find("python") != std::string::npos) {
113122
std::cout << "[+] Pyinstaller version: 2.1+" << std::endl;
114123
pyinstVer = 21;
115124
}
@@ -121,6 +130,7 @@ bool PyInstArchive::checkFile() {
121130
return true;
122131
}
123132

133+
124134
/**
125135
* @brief Swaps the byte order of a 32-bit integer to correct endianness.
126136
*
@@ -150,62 +160,51 @@ bool PyInstArchive::getCArchiveInfo() {
150160
try {
151161
uint32_t lengthofPackage, toc, tocLen, pyver;
152162

153-
if (pyinstVer == 20) {
154-
fPtr.seekg(cookiePos, std::ios::beg);
155-
char buffer[PYINST20_COOKIE_SIZE];
156-
fPtr.read(buffer, PYINST20_COOKIE_SIZE);
157-
std::memcpy(&lengthofPackage, buffer + 8, 4);
158-
std::memcpy(&toc, buffer + 12, 4);
159-
std::memcpy(&tocLen, buffer + 16, 4);
160-
std::memcpy(&pyver, buffer + 20, 4);
163+
// Check for version and load relevant data
164+
fPtr.seekg(cookiePos, std::ios::beg);
165+
char buffer[PYINST21_COOKIE_SIZE]; // Use a single buffer to handle both versions if possible
166+
fPtr.read(buffer, (pyinstVer == 20) ? PYINST20_COOKIE_SIZE : PYINST21_COOKIE_SIZE);
167+
168+
// Directly read values from the buffer
169+
if (pyinstVer == 20 || pyinstVer == 21) {
170+
// Read and immediately swap bytes (combine reading and byte order correction in one step)
171+
lengthofPackage = swapBytes(*reinterpret_cast<uint32_t*>(buffer + 8));
172+
toc = swapBytes(*reinterpret_cast<uint32_t*>(buffer + 12));
173+
tocLen = swapBytes(*reinterpret_cast<uint32_t*>(buffer + 16));
174+
pyver = swapBytes(*reinterpret_cast<uint32_t*>(buffer + 20));
161175
}
162-
else if (pyinstVer == 21) {
163-
fPtr.seekg(cookiePos, std::ios::beg);
164-
char buffer[PYINST21_COOKIE_SIZE];
165-
fPtr.read(buffer, PYINST21_COOKIE_SIZE);
166-
std::memcpy(&lengthofPackage, buffer + 8, 4);
167-
std::memcpy(&toc, buffer + 12, 4);
168-
std::memcpy(&tocLen, buffer + 16, 4);
169-
std::memcpy(&pyver, buffer + 20, 4);
170-
}
171-
172-
// Convert values to host byte order (correcting endianness)
173-
lengthofPackage = swapBytes(lengthofPackage);
174-
toc = swapBytes(toc);
175-
tocLen = swapBytes(tocLen);
176-
pyver = swapBytes(pyver);
177176

178-
if (pyver >= 100) {
179-
pymaj = pyver / 100;
180-
pymin = pyver % 100;
181-
}
182-
else {
183-
pymaj = pyver / 10;
184-
pymin = pyver % 10;
185-
}
177+
// Python version determination
178+
pymaj = pyver / (pyver >= 100 ? 100 : 10);
179+
pymin = pyver % (pyver >= 100 ? 100 : 10);
186180

187181
std::cout << "[+] Python version: " << static_cast<int>(pymaj) << "." << static_cast<int>(pymin) << std::endl;
188182

189-
uint64_t tailBytes = fileSize - cookiePos - (pyinstVer == 20 ? PYINST20_COOKIE_SIZE : PYINST21_COOKIE_SIZE);
183+
uint64_t tailBytes = fileSize - cookiePos - ((pyinstVer == 20) ? PYINST20_COOKIE_SIZE : PYINST21_COOKIE_SIZE);
190184
overlaySize = static_cast<uint64_t>(lengthofPackage) + tailBytes;
191185
overlayPos = fileSize - overlaySize;
192186
tableOfContentsPos = overlayPos + toc;
193187
tableOfContentsSize = tocLen;
194188

189+
#ifdef _DEBUG
195190
std::cout << "[+] Length of package: " << lengthofPackage << " bytes" << std::endl;
196191
std::cout << "[DEBUG] overlaySize: " << overlaySize << std::endl;
197192
std::cout << "[DEBUG] overlayPos: " << overlayPos << std::endl;
198193
std::cout << "[DEBUG] tableOfContentsPos: " << tableOfContentsPos << std::endl;
199194
std::cout << "[DEBUG] tableOfContentsSize: " << tableOfContentsSize << std::endl;
195+
#endif
200196

201-
parseTOC();
197+
parseTOC(); // Always included, regardless of the mode
202198

203-
std::cout << "[INFO] Entry sizes in the CArchive:" << std::endl;
199+
#ifdef _DEBUG
200+
std::cout << "[DEBUG] Entry sizes in the CArchive:" << std::endl;
204201
for (const auto& entry : tocList) {
205-
std::cout << "[INFO] Entry Name: " << entry.getName()
202+
std::cout << "[DEBUG] Entry Name: " << entry.getName()
206203
<< ", Compressed Size: " << entry.getCompressedDataSize() << " bytes"
207204
<< std::endl;
208205
}
206+
#endif
207+
209208
}
210209
catch (...) {
211210
std::cerr << "[!] Error: The file is not a PyInstaller archive" << std::endl;
@@ -214,6 +213,7 @@ bool PyInstArchive::getCArchiveInfo() {
214213
return true;
215214
}
216215

216+
217217
/**
218218
* @brief Parses the Table of Contents (TOC) from the PyInstaller archive.
219219
*
@@ -222,64 +222,55 @@ bool PyInstArchive::getCArchiveInfo() {
222222
* Each entry is stored in a list for further processing.
223223
*/
224224
void PyInstArchive::parseTOC() {
225-
226225
// Set the file pointer to the position of the Table of Contents
227226
fPtr.seekg(tableOfContentsPos, std::ios::beg);
228227

229228
tocList.clear(); // Clear any existing TOC entries
230229
uint32_t parsedLen = 0; // Initialize parsed length
231230

232-
// Continue parsing until the total size of the TOC is reached
231+
// Read the Table of Contents in chunks to reduce file reads
233232
while (parsedLen < tableOfContentsSize) {
234233
uint32_t entrySize;
235-
fPtr.read(reinterpret_cast<char*>(&entrySize), sizeof(entrySize)); // Read the entry size
236-
entrySize = swapBytes(entrySize); // Convert entry size to host byte order
234+
fPtr.read(reinterpret_cast<char*>(&entrySize), sizeof(entrySize));
235+
if (fPtr.gcount() < sizeof(entrySize)) break; // Prevent reading beyond the file
237236

238-
// Debugging output for entry size
239-
std::cout << "[DEBUG] Entry Size: " << entrySize << ", Parsed Length: " << parsedLen << std::endl;
237+
entrySize = swapBytes(entrySize); // Convert entry size to host byte order
240238

241-
// Calculate the length of the name and allocate buffer
242239
uint32_t nameLen = sizeof(uint32_t) + sizeof(uint32_t) * 3 + sizeof(uint8_t) + sizeof(char);
243240
std::vector<char> nameBuffer(entrySize - nameLen); // Create buffer for the name
244241

245-
// Variables to hold entry information
242+
// Read the rest of the fields in one go to minimize file reads
246243
uint32_t entryPos, cmprsdDataSize, uncmprsdDataSize;
247244
uint8_t cmprsFlag;
248245
char typeCmprsData;
249246

250-
// Read the other fields from the file
251247
fPtr.read(reinterpret_cast<char*>(&entryPos), sizeof(entryPos));
252248
fPtr.read(reinterpret_cast<char*>(&cmprsdDataSize), sizeof(cmprsdDataSize));
253249
fPtr.read(reinterpret_cast<char*>(&uncmprsdDataSize), sizeof(uncmprsdDataSize));
254250
fPtr.read(reinterpret_cast<char*>(&cmprsFlag), sizeof(cmprsFlag));
255251
fPtr.read(reinterpret_cast<char*>(&typeCmprsData), sizeof(typeCmprsData));
256-
fPtr.read(nameBuffer.data(), entrySize - nameLen);
257252

258-
// Debugging output for each field read
259-
std::cout << "[DEBUG] Entry Position: " << swapBytes(entryPos) << std::endl;
260-
std::cout << "[DEBUG] Compressed Data Size: " << swapBytes(cmprsdDataSize) << std::endl;
261-
std::cout << "[DEBUG] Uncompressed Data Size: " << swapBytes(uncmprsdDataSize) << std::endl;
262-
std::cout << "[DEBUG] Compression Flag: " << static_cast<int>(cmprsFlag) << std::endl;
263-
std::cout << "[DEBUG] Type of Compressed Data: " << typeCmprsData << std::endl;
253+
// swap bytes if needed (endian-aware file format)
254+
entryPos = swapBytes(entryPos);
255+
cmprsdDataSize = swapBytes(cmprsdDataSize);
256+
uncmprsdDataSize = swapBytes(uncmprsdDataSize);
257+
258+
fPtr.read(nameBuffer.data(), entrySize - nameLen);
264259

265260
// Decode the name from the buffer and remove null characters
266261
std::string name(nameBuffer.data(), nameBuffer.size());
267262
name.erase(std::remove(name.begin(), name.end(), '\0'), name.end());
268263

269-
// Debugging output for the name
270-
std::cout << "[DEBUG] Name: '" << name << "'" << std::endl;
271-
272264
// Handle invalid names and normalize
273265
if (name.empty() || name[0] == '/') {
274266
name = "unnamed_" + std::to_string(parsedLen);
275-
std::cout << "[DEBUG] Normalized Name: '" << name << "'" << std::endl; // Debugging normalized name
276267
}
277268

278269
// Add the entry to the TOC list
279270
tocList.emplace_back(
280-
overlayPos + swapBytes(entryPos),
281-
swapBytes(cmprsdDataSize),
282-
swapBytes(uncmprsdDataSize),
271+
overlayPos + entryPos,
272+
cmprsdDataSize,
273+
uncmprsdDataSize,
283274
cmprsFlag,
284275
typeCmprsData,
285276
name
@@ -291,9 +282,9 @@ void PyInstArchive::parseTOC() {
291282

292283
// Output the total number of entries found in the TOC
293284
std::cout << "[+] Found " << tocList.size() << " files in CArchive" << std::endl;
294-
295285
}
296286

287+
297288
/**
298289
* @brief Displays the list of files in the PyInstaller archive.
299290
*
@@ -338,7 +329,6 @@ void PyInstArchive::timeExtractionProcess(const std::string& outputDir) {
338329
<< std::fixed << std::setprecision(2) << std::setw(5) << seconds << std::endl;
339330
}
340331

341-
342332
/**
343333
* @brief Decompresses and extracts a file from the PyInstaller archive to the specified output directory.
344334
*
@@ -406,27 +396,6 @@ void PyInstArchive::decompressAndExtractFile(const CTOCEntry& tocEntry, const st
406396
}
407397
}
408398

409-
/**
410-
* @brief Decompresses data using zlib.
411-
*
412-
* Decompresses `compressedData` into `decompressedData` using zlib.
413-
* Ensure `decompressedData` has enough space for the decompressed output.
414-
*
415-
* @param compressedData Input vector of compressed data.
416-
* @param decompressedData Output vector for decompressed data.
417-
*
418-
* @note Prints an error message if decompression fails.
419-
*/
420-
void PyInstArchive::decompressData(const std::vector<char>& compressedData, std::vector<char>& decompressedData) {
421-
uLongf decompressedSize = decompressedData.size();
422-
int result = uncompress(reinterpret_cast<Bytef*>(decompressedData.data()), &decompressedSize,
423-
reinterpret_cast<const Bytef*>(compressedData.data()), compressedData.size());
424-
425-
if (result != Z_OK) {
426-
std::cerr << "[!] Error: Decompression failed" << std::endl;
427-
// Optionally, you could also throw an exception or handle the error more specifically
428-
}
429-
}
430399

431400
/**
432401
* @brief Parses command-line arguments for interacting with a PyInstaller archive.

0 commit comments

Comments
 (0)