Skip to content

Commit 705630b

Browse files
committed
Implement sorting of computed SUM file entries when -threads is specified.
If the SUM file is being overwritten, we sort and rewrite the SUM file. Otherwise, we write the result of the sum to a shadow file, sort it when all operations finish and finally append sorted connected to the target SUM file. shadow file is deleted at the end.
1 parent 69f3ecd commit 705630b

File tree

1 file changed

+175
-13
lines changed

1 file changed

+175
-13
lines changed

DirHash.cpp

Lines changed: 175 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,18 @@ void ToHex(LPBYTE pbData, int iLen, LPTSTR szHex)
192192
*szHex = 0;
193193
}
194194

195+
void ToHex (const ByteArray& data, LPTSTR szHex)
196+
{
197+
unsigned char b;
198+
for (size_t i = 0; i < data.size(); i++)
199+
{
200+
b = data[i];
201+
*szHex++ = ToHex(b >> 4);
202+
*szHex++ = ToHex(b & 0x0F);
203+
}
204+
*szHex = 0;
205+
}
206+
195207
bool FromHex(TCHAR c, unsigned char& b)
196208
{
197209
if (c >= _T('0') && c <= _T('9'))
@@ -601,35 +613,69 @@ class CFilePtr
601613
{
602614
protected:
603615
FILE* m_pFile;
616+
FILE* m_pShadowFile;
617+
wstring m_fileName;
618+
wstring m_shadowFileName;
604619
// forbid copying
605-
CFilePtr(const CFilePtr&) : m_pFile(NULL)
620+
CFilePtr(const CFilePtr&) : m_pFile(NULL), m_pShadowFile(NULL), m_fileName(L""), m_shadowFileName(L"")
606621
{
607622

608623
}
624+
609625
CFilePtr& operator = (const CFilePtr&)
610626
{
611627
return *this;
612628
}
613629
public:
614-
CFilePtr() : m_pFile(NULL)
630+
CFilePtr() : m_pFile(NULL), m_pShadowFile(NULL)
615631
{
616632

617633
}
618634

619-
explicit CFilePtr(FILE* pFile) : m_pFile(pFile)
635+
636+
explicit CFilePtr(FILE* pFile, const wstring& name, FILE* pShadowFile, const wstring& shadowName)
637+
: m_pFile(pFile), m_pShadowFile(pShadowFile), m_fileName(name), m_shadowFileName(shadowName)
620638
{
621639

622640
}
623641

624642
~CFilePtr()
625643
{
626-
if (m_pFile)
627-
fclose(m_pFile);
644+
Close();
628645
}
629646

647+
const wstring& GetFileName() const { return m_fileName; }
648+
const wstring& GetShadowFileName() const { return m_shadowFileName; }
649+
650+
FILE* GetShadowFile() const { return m_pShadowFile; }
651+
630652
operator FILE* () const { return m_pFile; }
631653

632654
FILE* operator -> () const { return m_pFile; }
655+
656+
void CloseShadowFile()
657+
{
658+
if (m_pShadowFile)
659+
{
660+
fclose(m_pShadowFile);
661+
m_pShadowFile = NULL;
662+
}
663+
}
664+
665+
void Close()
666+
{
667+
if (m_pFile)
668+
{
669+
fclose(m_pFile);
670+
m_pFile = NULL;
671+
}
672+
673+
if (m_pShadowFile)
674+
{
675+
fclose(m_pShadowFile);
676+
m_pShadowFile = NULL;
677+
}
678+
}
633679
};
634680

635681

@@ -1339,7 +1385,7 @@ LPCTSTR GetShortFileName(LPCTSTR szFilePath, unsigned long long fileSize)
13391385
ptr++;
13401386

13411387
// calculate maximum length for file name
1342-
bufferSize = (g_originalConsoleInfo.dwSize.X > (maxPrintLen + 1)) ? min(256, (g_originalConsoleInfo.dwSize.X - 1 - maxPrintLen)) : 9;
1388+
bufferSize = (g_originalConsoleInfo.dwSize.X > (maxPrintLen + 1)) ? min(bufferSize, (g_originalConsoleInfo.dwSize.X - 1 - maxPrintLen)) : 9;
13431389

13441390
l = _tcslen(ptr);
13451391
if (l < bufferSize)
@@ -1507,7 +1553,7 @@ void AddHashJob(const CPath& filePath, ULONGLONG fileSize, bool bQuiet, bool bSh
15071553

15081554
void ProcessFile(HANDLE f, ULONGLONG fileSize, LPCTSTR szFilePath, bool bQuiet, bool bShowProgress, bool bSumMode, bool bSumVerificationMode, LPCBYTE pbExpectedDigest, vector<shared_ptr<Hash>>& pHashes, LPBYTE pbBuffer, size_t cbBuffer)
15091555
{
1510-
bShowProgress = !bQuiet && bShowProgress;
1556+
bShowProgress = !bQuiet && bShowProgress && !g_threadsCount; // no progress shown in case of multitheaded computation
15111557
unsigned long long currentSize = 0;
15121558
clock_t startTime = bShowProgress ? clock() : 0;
15131559
clock_t lastBlockTime = 0;
@@ -1518,17 +1564,17 @@ void ProcessFile(HANDLE f, ULONGLONG fileSize, LPCTSTR szFilePath, bool bQuiet,
15181564
{
15191565
currentSize += (unsigned long long) cbCount;
15201566
UpdateHashes(pHashes,pbBuffer, cbCount);
1521-
if (bShowProgress && !g_threadsCount)
1567+
if (bShowProgress)
15221568
DisplayProgress(szFileName, currentSize, fileSize, startTime, lastBlockTime);
15231569
if (currentSize == fileSize)
15241570
break;
15251571
}
15261572

1527-
if (bShowProgress && !g_threadsCount)
1528-
ClearProgress();
1529-
15301573
CloseHandle(f);
15311574

1575+
if (bShowProgress)
1576+
ClearProgress();
1577+
15321578
if (bSumMode)
15331579
{
15341580
if (bSumVerificationMode)
@@ -1635,7 +1681,14 @@ DWORD WINAPI OutputThreadCode(LPVOID pArg)
16351681
else
16361682
ShowWarningDirect(pConsole->c_str());
16371683
}
1638-
if (!pOutput->bSkipOutputFile) if (outputFiles[pOutput->nOutputFile]) _ftprintf(*outputFiles[pOutput->nOutputFile], L"%s", p->c_str());
1684+
if (!pOutput->bSkipOutputFile && outputFiles[pOutput->nOutputFile]) {
1685+
FILE* fTarget = *outputFiles[pOutput->nOutputFile];
1686+
// write to shadow file if it is enabled
1687+
FILE* fShadow = outputFiles[pOutput->nOutputFile]->GetShadowFile();
1688+
if (fShadow) fTarget = fShadow;
1689+
1690+
_ftprintf(fTarget, L"%s", p->c_str());
1691+
}
16391692
delete p;
16401693
if (bDeleteConsole) delete pConsole;
16411694
_aligned_free(pOutput);
@@ -2790,6 +2843,54 @@ bool ParseSumFile(const CPath& sumFile, map<wstring, HashResultEntry>& digestLis
27902843
return bRet;
27912844
}
27922845

2846+
bool SortSumFile(const CPath& sumFile, FILE* fTarget)
2847+
{
2848+
map<wstring, HashResultEntry> digestList;
2849+
vector<int> skippedLines;
2850+
bool bRet = false;
2851+
2852+
// Parse the existing SHASUM file
2853+
if (ParseSumFile(sumFile, digestList, skippedLines))
2854+
{
2855+
// Convert map to vector for sorting
2856+
vector<pair<wstring, HashResultEntry>> sortedEntries(digestList.begin(), digestList.end());
2857+
2858+
// Sort the entries based on file path
2859+
sort(sortedEntries.begin(), sortedEntries.end(),
2860+
[](const auto& a, const auto& b) { return _wcsicmp(a.first.c_str(), b.first.c_str()) < 0; });
2861+
2862+
// write the sorted entries to the file
2863+
bool opendedNewFile = false;
2864+
if (!fTarget)
2865+
{
2866+
fTarget = _wfopen(sumFile.GetAbsolutPathValue().c_str(), L"wt,ccs=UTF-8");
2867+
opendedNewFile = true;
2868+
}
2869+
2870+
if (fTarget)
2871+
{
2872+
WCHAR szDigestHex[129]; // enough for 64 bytes digest
2873+
for (const auto& entry : sortedEntries)
2874+
{
2875+
wstring szLine;
2876+
ToHex(entry.second.m_digest, szDigestHex);
2877+
szLine = szDigestHex;
2878+
szLine += L" ";
2879+
szLine += entry.first;
2880+
szLine += L"\n";
2881+
_ftprintf(fTarget, L"%s", szLine.c_str());
2882+
}
2883+
2884+
if (opendedNewFile)
2885+
fclose(fTarget);
2886+
2887+
bRet = true;
2888+
}
2889+
}
2890+
2891+
return bRet;
2892+
}
2893+
27932894

27942895
BOOL WINAPI CtrlHandler(DWORD fdwCtrlType)
27952896
{
@@ -3247,10 +3348,12 @@ int _tmain(int argc, _TCHAR* argv[])
32473348
// in case of sum mode and if there are multiple hash algorithms specified, we need to create a separate file for each hash algorithm
32483349
// the file name will be the same as the output file name, but with the hash algorithm appended
32493350
bool bMultiHashMode = bSumMode && pHashes.size() > 1;
3351+
bool bSumComputation = bSumMode && !bVerifyMode;
32503352
for (size_t i = 0; i < pHashes.size(); i++)
32513353
{
32523354
// create a new file name by appending the hash algorithm name
32533355
std::wstring newFileName = g_outputFileName.GetAbsolutPathValue();
3356+
std::wstring shadowFileName;
32543357
if (bMultiHashMode)
32553358
{
32563359
newFileName += _T(".");
@@ -3273,9 +3376,19 @@ int _tmain(int argc, _TCHAR* argv[])
32733376
_ftprintf(newFile, L"\n");
32743377
}
32753378

3379+
FILE* shadowFile = NULL;
3380+
if (bSumComputation && bUseThreads && !bOverwrite)
3381+
{
3382+
// create a shadow file for the current sum file. This file will be used to store the hash values.
3383+
// when all computations are done, we will first sort the hash values and then write the sorted values to the target sum file
3384+
// this is done to avoid issues with the order of hash values in the sum file when using threads
3385+
shadowFileName = newFileName + L".dirhash_shadow";
3386+
shadowFile = _tfopen(shadowFileName.c_str(), _T("wt,ccs=UTF-8"));
3387+
}
3388+
32763389
if (newFile)
32773390
// add the file to the list of output files
3278-
outputFiles.push_back(shared_ptr<CFilePtr>(new CFilePtr(newFile)));
3391+
outputFiles.push_back(shared_ptr<CFilePtr>(new CFilePtr(newFile, newFileName, shadowFile, shadowFileName)));
32793392
else
32803393
outputFiles.push_back(NULL);
32813394

@@ -3608,6 +3721,55 @@ int _tmain(int argc, _TCHAR* argv[])
36083721
}
36093722

36103723
}
3724+
else
3725+
{
3726+
// Sort the entries of each sum file in case of multithreaded mode
3727+
// for this, we loop over outputFiles elements and for each non NULL element, we sort it by calling SortSumFile
3728+
if (bUseThreads)
3729+
{
3730+
for (size_t i = 0; i < outputFiles.size(); i++)
3731+
{
3732+
if (outputFiles[i])
3733+
{
3734+
FILE* pShadowFile = outputFiles[i]->GetShadowFile();
3735+
if (pShadowFile)
3736+
{
3737+
// close the shadow file
3738+
outputFiles[i]->CloseShadowFile();
3739+
// sort its content and write it to the target file
3740+
CPath shadowFilePath(outputFiles[i]->GetShadowFileName().c_str());
3741+
FILE* pFile = *outputFiles[i];
3742+
if (!SortSumFile(shadowFilePath, pFile))
3743+
{
3744+
if (!bQuiet)
3745+
{
3746+
ShowError(_T("Failed to parse and write entries from the shadow file \"%s\".\n"), shadowFilePath.GetPathValue().c_str());
3747+
}
3748+
}
3749+
else
3750+
{
3751+
// delete shadow file
3752+
DeleteFile(shadowFilePath.GetAbsolutPathValue().c_str());
3753+
}
3754+
}
3755+
else
3756+
{
3757+
// close the file
3758+
outputFiles[i]->Close();
3759+
// sort its content and overwrite it with the sorted content
3760+
CPath filePath(outputFiles[i]->GetFileName().c_str());
3761+
if (!SortSumFile(filePath, NULL))
3762+
{
3763+
if (!bQuiet)
3764+
{
3765+
ShowError(_T("Failed to parse and write entries from the file \"%s\".\n"), filePath.GetPathValue().c_str());
3766+
}
3767+
}
3768+
}
3769+
}
3770+
}
3771+
}
3772+
}
36113773
}
36123774
else
36133775
{

0 commit comments

Comments
 (0)