Skip to content

Commit 791258f

Browse files
[Refactor:Plagiarism] Only print truncation warning once (#73)
* Only print warning once * add number of times it was truncated
1 parent 1fe7878 commit 791258f

File tree

1 file changed

+17
-5
lines changed

1 file changed

+17
-5
lines changed

compare_hashes/compare_hashes.cpp

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,9 @@ int main(int argc, char* argv[]) {
161161
std::unordered_map<user_id, std::vector<std::pair<version_number, Score>>> highest_matches;
162162
// keeps track of max matching hashes across all submissions, used for calculation of ranking score
163163
unsigned int max_hashes_matched = 0;
164+
// a map of "user_id:version" strings to the non-zero number of times their matching positions array was truncated
165+
std::unordered_map<std::string, int> matching_positions_truncations;
166+
164167

165168
time_t start, end;
166169
time(&start);
@@ -383,8 +386,7 @@ int main(int argc, char* argv[]) {
383386
others.push_back(other);
384387

385388
if (matchingpositions.size() >= lichen_config["max_matching_positions"]) {
386-
std::cout << "Matching positions array truncated for user: [" << other["username"] << "] version: " << other["version"] << std::endl;
387-
std::cout << " - Try increasing the hash size to fix this problem." << std::endl;
389+
matching_positions_truncations[std::string(other["username"]) + std::string(":") + std::to_string(other["version"].get<int>())]++;
388390
break;
389391
}
390392

@@ -554,7 +556,7 @@ int main(int argc, char* argv[]) {
554556
if (max_hashes_matched < totalMatchingHashes) {
555557
max_hashes_matched = totalMatchingHashes;
556558
}
557-
559+
558560
std::pair<version_number, Score> new_pair = {(*submission_itr)->version(), submission_score};
559561
highest_matches[(*submission_itr)->student()].push_back(new_pair);
560562
// =========================================================================
@@ -587,13 +589,23 @@ int main(int argc, char* argv[]) {
587589
my_counter++;
588590
if (int((my_counter / float(all_submissions.size())) * 100) > my_percent) {
589591
my_percent = int((my_counter / float(all_submissions.size())) * 100);
590-
std::cout << "hash walk: " << my_percent << "% complete" << std::endl;
592+
std::cout << "Processing submissions: " << my_percent << "% complete" << std::endl;
591593
}
592594
}
593595

594596
time(&end);
595597
diff = difftime(end, start);
596-
std::cout << "finished walking in " << diff << " seconds" << std::endl;
598+
std::cout << "Finished processing submissions in " << diff << " seconds" << std::endl;
599+
600+
// Print out the list of users who had their matching positions array truncated
601+
if (matching_positions_truncations.size() > 0) {
602+
std::cout << "Matching positions array truncated for user(s): ";
603+
for (std::unordered_map<std::string, int>::const_iterator itr = matching_positions_truncations.begin();
604+
itr != matching_positions_truncations.end(); itr++) {
605+
std::cout << itr->first << " (" << itr->second << "), ";
606+
}
607+
std::cout << std::endl << " - Try increasing the hash size or adding a regex to fix this problem." << std::endl;
608+
}
597609

598610
// ===========================================================================
599611
// Create a general summary of rankings of users by percentage match

0 commit comments

Comments
 (0)