Skip to content

Commit 84ef5c2

Browse files
committed
improve D-proof candidate amount estimation
1 parent 275a2e2 commit 84ef5c2

File tree

3 files changed

+120
-15
lines changed

3 files changed

+120
-15
lines changed

main.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,9 @@ int main(int argc, char* argv[]) { // argc = 1 + N, argv = { <command>, <arg1>,
6565
};
6666
#if 0 // default command
6767
if (argc <= 1) {
68+
//#bool redundantSchemaRemoval = false, withConclusions = true;
69+
//#DlProofEnumerator::countNextIterationAmount(redundantSchemaRemoval, withConclusions);
70+
//#return 0;
6871
//#static vector<string> customCmd = FctHelper::stringSplit("pmGenerator -g 19 -g 21 -u -r data/pmproofs-old.txt data/pmproofs-reducer.txt -a SD data/pmproofs-reducer.txt data/pmproofs-old.txt data/pmproofs-result-all.txt -l -w", " ");
6972
//#static vector<string> customCmd = FctHelper::stringSplit("pmGenerator -g 19 -g 21 -u -r data/pmproofs-old.txt data/pmproofs-reducer.txt -a SD data/pmproofs-reducer.txt data/pmproofs-old.txt data/pmproofs-result-styleAll-all.txt -s -l -w", " ");
7073
//#static vector<string> customCmd = FctHelper::stringSplit("pmGenerator -g 19 -g 21 -u -r data/pmproofs-old.txt data/pmproofs-reducer.txt -a SD data/pmproofs-reducer.txt data/pmproofs-old.txt data/pmproofs-result-modifiedOnly.txt -w", " ");

nortmann/DlProofEnumerator.cpp

Lines changed: 114 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -362,6 +362,72 @@ tbb::concurrent_unordered_map<string, string> DlProofEnumerator::connectDProofCo
362362
return representativeProofs;
363363
}
364364

365+
void DlProofEnumerator::countNextIterationAmount(bool redundantSchemaRemoval, bool withConclusions) {
366+
chrono::time_point<chrono::steady_clock> startTime;
367+
368+
// 1. Load representative D-proof strings.
369+
time_t time = chrono::system_clock::to_time_t(chrono::system_clock::now());
370+
cout << strtok(ctime(&time), "\n") << ": Next iteration amount counter started. [parallel ; " << thread::hardware_concurrency() << " hardware thread contexts" << (redundantSchemaRemoval ? "" : ", unfiltered") << "]" << endl;
371+
string filePrefix = withConclusions ? "data/dProofs-withConclusions/dProofs" : "data/dProofs-withoutConclusions/dProofs";
372+
string filePostfix = ".txt";
373+
vector<vector<string>> allRepresentatives;
374+
vector<vector<string>> allConclusions;
375+
uint64_t allRepresentativesCount;
376+
uint32_t wordLengthLimit;
377+
if (!loadDProofRepresentatives(allRepresentatives, withConclusions ? &allConclusions : nullptr, &allRepresentativesCount, &wordLengthLimit, true, filePrefix, filePostfix))
378+
return;
379+
uint32_t unfilteredStart = 0;
380+
if (!redundantSchemaRemoval) {
381+
unfilteredStart = wordLengthLimit;
382+
filePostfix = "-unfiltered" + to_string(wordLengthLimit) + "+.txt";
383+
if (!loadDProofRepresentatives(allRepresentatives, withConclusions ? &allConclusions : nullptr, &allRepresentativesCount, &wordLengthLimit, true, filePrefix, filePostfix, false))
384+
return;
385+
}
386+
387+
// 2. Initialize and prepare progress data.
388+
bool showProgress = allRepresentatives.size() > 15;
389+
ProgressData parseProgress = showProgress ? ProgressData(allRepresentatives.size() > 27 ? 5 : allRepresentatives.size() > 25 ? 10 : 20, allRepresentativesCount) : ProgressData();
390+
391+
// 3. Prepare representative proofs that are already known addressable by conclusions, for filtering.
392+
startTime = chrono::steady_clock::now();
393+
tbb::concurrent_unordered_map<string, string> representativeProofs;
394+
representativeProofs = withConclusions ? connectDProofConclusions(allRepresentatives, allConclusions, showProgress ? &parseProgress : nullptr) : parseDProofRepresentatives(allRepresentatives, showProgress ? &parseProgress : nullptr);
395+
cout << FctHelper::durationStringMs(chrono::duration_cast<chrono::microseconds>(chrono::steady_clock::now() - startTime)) << " total " << (withConclusions ? "" : "parse, conversion & ") << "insertion duration." << endl;
396+
397+
// 4. Iterate and count candidates of length 'wordLengthLimit'.
398+
time = chrono::system_clock::to_time_t(chrono::system_clock::now());
399+
cout << strtok(ctime(&time), "\n") << ": Starting to iterate D-proof candidates of length " << wordLengthLimit << "." << endl;
400+
uint64_t counter;
401+
const vector<uint32_t> stack = { wordLengthLimit }; // do not generate all words up to a certain length, but only of length 'wordLengthLimit' ; NOTE: Uses nonterminal 'A' as lower limit 'wordLengthLimit' in combination with upper limit 'wordLengthLimit'.
402+
const unsigned knownLimit = wordLengthLimit - 2;
403+
auto _iterateRepresentatives = [&]() -> uint64_t {
404+
atomic<uint64_t> counter = 0;
405+
processCondensedDetachmentPlProofs_generic(stack, wordLengthLimit, knownLimit, allRepresentatives, [&counter](string& sequence) { counter++; });
406+
return counter;
407+
};
408+
startTime = chrono::steady_clock::now();
409+
counter = _iterateRepresentatives();
410+
cout << FctHelper::durationStringMs(chrono::duration_cast<chrono::microseconds>(chrono::steady_clock::now() - startTime)) << " taken to iterate " << counter << " condensed detachment proof strings of length " << wordLengthLimit << "." << endl;
411+
// e.g. 17 : 11.54 ms taken to iterate 31388
412+
// 19 : 42.47 ms taken to iterate 94907
413+
// 21 : 98.47 ms taken to iterate 290392
414+
// 23 : 280.59 ms taken to iterate 886041
415+
// 25 : 897.72 ms taken to iterate 2709186
416+
// 27 : 3248.31 ms ( 3 s 248.31 ms) taken to iterate 8320672
417+
// 29 : 9294.50 ms ( 9 s 294.50 ms) taken to iterate 25589216
418+
// 29-unfiltered27+: 10765.89 ms ( 10 s 765.88 ms) taken to iterate 27452198
419+
// 29-unfiltered25+: 12193.00 ms ( 12 s 193.00 ms) taken to iterate 30038660
420+
// 29-unfiltered23+: 12953.30 ms ( 12 s 953.30 ms) taken to iterate 32772266
421+
// 29-unfiltered21+: 14458.88 ms ( 14 s 458.88 ms) taken to iterate 36185400
422+
// 29-unfiltered19+: 15760.71 ms ( 15 s 760.71 ms) taken to iterate 40243692
423+
// 29-unfiltered17+: 19397.85 ms ( 19 s 397.85 ms) taken to iterate 44934432
424+
// 31 : 35628.12 ms ( 35 s 628.12 ms) taken to iterate 78896376 ; 78896376 / 44934432 ≈ 1.75581 ; 35628.12 / 19397.85 ≈ 1.83670
425+
// 33-unfiltered31+: 106942.91 ms (1 min 46 s 942.90 ms) taken to iterate 260604052 ; 260604052 / 78896376 ≈ 3.30312 ; 106942.91 / 35628.12 ≈ 3.00164
426+
cout << "[Copy] Next iteration count (" << (redundantSchemaRemoval || unfilteredStart == wordLengthLimit ? "filtered" : "unfiltered" + to_string(unfilteredStart) + "+") << "): { " << wordLengthLimit << ", " << counter << " }" << endl;
427+
time = chrono::system_clock::to_time_t(chrono::system_clock::now());
428+
cout << strtok(ctime(&time), "\n") << ": Next iteration amount counter complete. [parallel ; " << thread::hardware_concurrency() << " hardware thread contexts" << (redundantSchemaRemoval ? "" : ", unfiltered") << "]" << endl;
429+
}
430+
365431
void DlProofEnumerator::generateDProofRepresentativeFiles(uint32_t limit, bool redundantSchemaRemoval, bool withConclusions) { // NOTE: More debug code & performance results available before https://github.com/deontic-logic/proof-tool/commit/45627054d14b6a1e08eb56eaafcf7cf202f2ab96 ; representation of formulas as tree structures before https://github.com/xamidi/pmGenerator/commit/63c7f17b82d56ec639f2b843b688d3e9a0a2a077
366432
chrono::time_point<chrono::steady_clock> startTime;
367433

@@ -403,7 +469,9 @@ void DlProofEnumerator::generateDProofRepresentativeFiles(uint32_t limit, bool r
403469
// 27 : 1149058
404470
// 29 : 3449251
405471
// 5181578 representatives in total.
472+
uint32_t unfilteredStart = 0;
406473
if (!redundantSchemaRemoval) {
474+
unfilteredStart = start;
407475
filePostfix = "-unfiltered" + to_string(start) + "+.txt";
408476
if (!loadDProofRepresentatives(allRepresentatives, withConclusions ? &allConclusions : nullptr, &allRepresentativesCount, &start, true, filePrefix, filePostfix, false))
409477
return;
@@ -420,7 +488,7 @@ void DlProofEnumerator::generateDProofRepresentativeFiles(uint32_t limit, bool r
420488
ProgressData findProgress;
421489
ProgressData removalProgress;
422490

423-
// 3. Prepare representative proofs that are already known addressable by conclusions, for filtering. To find the conclusions, parse all loaded D-proofs.
491+
// 3. Prepare representative proofs that are already known addressable by conclusions, for filtering.
424492
startTime = chrono::steady_clock::now();
425493
tbb::concurrent_unordered_map<string, string> representativeProofs;
426494
representativeProofs = withConclusions ? connectDProofConclusions(allRepresentatives, allConclusions, showProgress ? &parseProgress : nullptr) : parseDProofRepresentatives(allRepresentatives, showProgress ? &parseProgress : nullptr);
@@ -435,19 +503,42 @@ void DlProofEnumerator::generateDProofRepresentativeFiles(uint32_t limit, bool r
435503
// 29: 741236.66 ms (12 min 21 s 236.66 ms) total parse, conversion & insertion duration. | 877.29 ms total insertion duration.
436504
//#cout << "Done loading (measure memory requirement)." << endl; while (true);
437505

438-
// 4. Compute and store new representatives indefinitely.
506+
// 4. Compute and store new representatives.
507+
map<uint32_t, uint64_t> iterationCounts;
439508
for (uint32_t wordLengthLimit = start; wordLengthLimit <= limit; wordLengthLimit += 2) {
440509

441510
// 4.1 Prepare progress data.
442511
showProgress = wordLengthLimit >= 17;
443512
// NOTE: The following maps are built dynamically and may contain gaps, in which case earlier
444513
// values are used to approximate the exponential growth rate, based on which new values
445514
// are approximated in order to estimate ongoing progress of unknown scale.
446-
static map<uint32_t, uint64_t> iterationCounts_builtin = { { 1, 3 }, { 3, 9 }, { 5, 36 }, { 7, 108 }, { 9, 372 }, { 11, 1134 }, { 13, 3354 }, { 15, 10360 }, { 17, 31388 } };
447-
static map<uint32_t, uint64_t> iterationCounts = { { 1, 3 }, { 3, 9 }, { 5, 36 }, { 7, 108 }, { 9, 372 }, { 11, 1134 }, { 13, 3354 }, { 15, 10360 }, { 17, 31388 }, { 19, 94907 }, { 21, 290392 }, { 23, 886041 }, { 25, 2709186 }, { 27, 8320672 }, { 29, 25589216 }, };
448-
static map<uint32_t, uint64_t> removalCounts = { { 1, 0 }, { 3, 0 }, { 5, 3 }, { 7, 6 }, { 9, 24 }, { 11, 59 }, { 13, 171 }, { 15, 504 }, { 17, 1428 }, { 19, 4141 }, { 21, 12115 }, { 23, 35338 }, { 25, 104815 }, { 27, 310497 }, { 29, 926015 }, };
515+
static map<uint32_t, uint64_t> iterationCounts_filtered = { { 1, 3 }, { 3, 9 }, { 5, 36 }, { 7, 108 }, { 9, 372 }, { 11, 1134 }, { 13, 3354 }, { 15, 10360 }, { 17, 31388 }, { 19, 94907 }, { 21, 290392 }, { 23, 886041 }, { 25, 2709186 }, { 27, 8320672 }, { 29, 25589216 }, { 31, 78896376 } };
516+
static map<uint32_t, map<uint32_t, uint64_t>> iterationCounts_unfiltered = {
517+
{ 17, { { 19, 103475 }, { 21, 350830 }, { 23, 1173825 }, { 25, 3951918 }, { 27, 13339002 }, { 29, 44934432 } } },
518+
{ 19, { { 21, 315238 }, { 23, 1061733 }, { 25, 3546684 }, { 27, 11942738 }, { 29, 40243692 } } },
519+
{ 21, { { 23, 958731 }, { 25, 3221706 }, { 27, 10765954 }, { 29, 36185400 } } },
520+
{ 23, { { 25, 2921214 }, { 27, 9822130 }, { 29, 32772266 } } },
521+
{ 25, { { 27, 8949562 }, { 29, 30038660 } } },
522+
{ 27, { { 29, 27452198 }, { 31, 92103906 } } },
523+
{ 29, { { 31, 84452466 }, { 33, 283384726 } } },
524+
{ 31, { { 33, 260604052 }, { 35, 874253765 } } },
525+
};
526+
static map<uint32_t, uint64_t> removalCounts = { { 1, 0 }, { 3, 0 }, { 5, 3 }, { 7, 6 }, { 9, 24 }, { 11, 59 }, { 13, 171 }, { 15, 504 }, { 17, 1428 }, { 19, 4141 }, { 21, 12115 }, { 23, 35338 }, { 25, 104815 }, { 27, 310497 }, { 29, 926015 } };
527+
if (iterationCounts.empty()) {
528+
if (redundantSchemaRemoval)
529+
iterationCounts = iterationCounts_filtered;
530+
else {
531+
for (const pair<const uint32_t, uint64_t>& p : iterationCounts_filtered)
532+
if (p.first <= unfilteredStart)
533+
iterationCounts.insert(p);
534+
else
535+
break;
536+
map<uint32_t, uint64_t>& counts = iterationCounts_unfiltered[unfilteredStart];
537+
iterationCounts.insert(counts.begin(), counts.end());
538+
}
539+
}
449540
if (showProgress) {
450-
auto determineCountingLimit = [&wordLengthLimit](uint64_t& count, const map<uint32_t, uint64_t>& counts) -> bool {
541+
auto determineCountingLimit = [&wordLengthLimit](uint64_t& count, const map<uint32_t, uint64_t>& counts, bool iteration) -> bool {
451542
map<uint32_t, uint64_t>::const_iterator itIterationCount = counts.find(wordLengthLimit);
452543
if (itIterationCount == counts.end()) {
453544
map<uint32_t, uint64_t>::const_iterator itLastKnown = prev(counts.end());
@@ -461,22 +552,25 @@ void DlProofEnumerator::generateDProofRepresentativeFiles(uint32_t limit, bool r
461552
itPrevLastKnown = prev(itPrevLastKnown);
462553
}
463554
double lastKnownGrowth = static_cast<double>(itLastKnown->second) / static_cast<double>(itPrevLastKnown->second);
464-
double estimatedLimit = static_cast<double>(lastKnownCount);
465-
for (uint32_t i = lastKnownLimit; i < wordLengthLimit; i += 2)
466-
estimatedLimit *= lastKnownGrowth;
555+
uint32_t exp = (wordLengthLimit - lastKnownLimit) / 2;
556+
double estimatedLimit = static_cast<double>(lastKnownCount) * pow(lastKnownGrowth, exp);
467557
count = static_cast<uint64_t>(estimatedLimit);
558+
cout << "Estimated " << (iteration ? "iteration" : "removal") << " count set to " << count << ", based on last known pair (" << itPrevLastKnown->first << ":" << itPrevLastKnown->second << ", " << itLastKnown->first << ":" << itLastKnown->second << ") with " << itLastKnown->second << "/" << itPrevLastKnown->second << "" << lastKnownGrowth << " and " << itLastKnown->second << " * (" << itLastKnown->second << "/" << itPrevLastKnown->second << ")^" << (wordLengthLimit - lastKnownLimit) / 2 << "" << FctHelper::round(estimatedLimit, 2) << "." << endl;
468559
return true;
469560
} else {
470561
count = itIterationCount->second;
562+
cout << "Known " << (iteration ? "iteration" : "removal") << " count loaded from " << itIterationCount->first << ":" << itIterationCount->second << "." << endl;
471563
return false;
472564
}
473565
};
474566
uint64_t iterationCount;
475-
bool iterationCountEstimated = determineCountingLimit(iterationCount, redundantSchemaRemoval ? iterationCounts : iterationCounts_builtin);
567+
bool iterationCountEstimated = determineCountingLimit(iterationCount, iterationCounts, true);
476568
findProgress = ProgressData(wordLengthLimit >= 27 ? 2 : wordLengthLimit >= 25 ? 5 : wordLengthLimit >= 23 ? 10 : 20, iterationCount, iterationCountEstimated);
477-
uint64_t removalCount;
478-
bool removalCountEstimated = determineCountingLimit(removalCount, removalCounts);
479-
removalProgress = ProgressData(wordLengthLimit >= 23 ? 2 : wordLengthLimit >= 21 ? 5 : wordLengthLimit >= 19 ? 10 : 20, removalCount, removalCountEstimated);
569+
if (redundantSchemaRemoval) {
570+
uint64_t removalCount;
571+
bool removalCountEstimated = determineCountingLimit(removalCount, removalCounts, false);
572+
removalProgress = ProgressData(wordLengthLimit >= 23 ? 2 : wordLengthLimit >= 21 ? 5 : wordLengthLimit >= 19 ? 10 : 20, removalCount, removalCountEstimated);
573+
}
480574
}
481575

482576
time = chrono::system_clock::to_time_t(chrono::system_clock::now());
@@ -501,8 +595,12 @@ void DlProofEnumerator::generateDProofRepresentativeFiles(uint32_t limit, bool r
501595
// 29: 3187900.65 ms (53 min 7 s 900.65 ms) taken to collect 4375266 [...] ; 3187900.65 / 916905.86 ≈ 3.47680
502596

503597
// 4.3 Update iteration progress information.
504-
(redundantSchemaRemoval ? iterationCounts : iterationCounts_builtin).emplace(wordLengthLimit, counter);
505-
//#cout << "Updated iterationCounts: " << FctHelper::mapString((redundantSchemaRemoval ? iterationCounts : iterationCounts_builtin)) << endl;
598+
iterationCounts.emplace(wordLengthLimit, counter);
599+
(redundantSchemaRemoval ? iterationCounts_filtered : (wordLengthLimit <= unfilteredStart ? iterationCounts_filtered : iterationCounts_unfiltered[unfilteredStart])).emplace(wordLengthLimit, counter); // also save progress statically for potential subsequent generations
600+
//#cout << "Updated iterationCounts: " << FctHelper::mapString(iterationCounts) << ", static entry: " << FctHelper::mapString(redundantSchemaRemoval ? iterationCounts_filtered : iterationCounts_unfiltered[unfilteredStart]) << endl;
601+
cout << "[Copy] Static filtered iteration counts: " << FctHelper::mapStringF(iterationCounts_filtered, [](const pair<const uint32_t, uint64_t>& p) { return "{ " + to_string(p.first) + ", " + to_string(p.second) + " }"; }, "{ ", " }") << endl;
602+
if (!redundantSchemaRemoval)
603+
cout << "[Copy] Static unfiltered iteration counts: { " << unfilteredStart << ", " << FctHelper::mapStringF(iterationCounts_unfiltered[unfilteredStart], [](const pair<const uint32_t, uint64_t>& p) { return "{ " + to_string(p.first) + ", " + to_string(p.second) + " }"; }, "{ ", " }") << " }," << endl;
506604

507605
// 4.4 Remove new proofs with redundant conclusions.
508606
// NOTE: For a few steps more to not take ages (but still get all minimal D-proofs up to a certain length), one can skip the time-intensive filtering below and then
@@ -533,6 +631,7 @@ void DlProofEnumerator::generateDProofRepresentativeFiles(uint32_t limit, bool r
533631
// 4.5 Update removal progress information.
534632
removalCounts.emplace(wordLengthLimit, oldRepresentativeCounter - representativeCounter);
535633
//#cout << "Updated removalCounts: " << FctHelper::mapString(removalCounts) << endl;
634+
cout << "[Copy] Static filtered removal counts: " << FctHelper::mapStringF(removalCounts, [](const pair<const uint32_t, uint64_t>& p) { return "{ " + to_string(p.first) + ", " + to_string(p.second) + " }"; }, "{ ", " }") << endl;
536635
}
537636

538637
// 4.6 Order and output information.

nortmann/DlProofEnumerator.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,9 @@ struct DlProofEnumerator {
3333
static bool readRepresentativesLookupVectorFromFiles_par(std::vector<std::vector<std::string>>& allRepresentativesLookup, std::vector<std::vector<std::string>>* optOut_allConclusionsLookup, bool debug = false, unsigned concurrencyCount = std::thread::hardware_concurrency(), const std::string& filePrefix = "data/dProofs", const std::string& filePostfix = ".txt", bool initFresh = true, std::size_t containerReserve = 100);
3434
static std::vector<std::pair<std::array<std::uint32_t, 2>, unsigned>> proofLengthCombinations(std::uint32_t knownLimit);
3535

36+
// Data prediction
37+
static void countNextIterationAmount(bool redundantSchemaRemoval = true, bool withConclusions = true);
38+
3639
// Data generation
3740
// Summarized, this shared memory parallelism utilizing function is able to generate D-proofs without redundant conclusions ('dProofs<k>.txt')_{k<=n} (where time quickly becomes a limiting factor),
3841
// and subsequently it can generate D-proofs with redundant conclusions ('dProofs<m>-unfiltered<n+1>+.txt')_{m>n} (where space becomes a limiting factor).

0 commit comments

Comments
 (0)