1717#include " lichen_config.h"
1818#include " submission.h"
1919#include " hash_location.h"
20- #include " score.h"
2120
2221
2322// =============================================================================
@@ -29,20 +28,6 @@ typedef std::string user_id;
2928typedef unsigned int version_number;
3029
3130
32- // =============================================================================
33- // helper classes
34-
35-
36- // represents an element in a ranking of students by percent match
37- struct StudentRanking {
38- StudentRanking (const user_id &id, int v, const std::string &sg, const Score &s) : student(id), version(v), source_gradeable(sg), score(s) {}
39- user_id student;
40- version_number version;
41- std::string source_gradeable;
42- Score score;
43- };
44-
45-
4631// =============================================================================
4732// helper functions
4833
@@ -89,12 +74,6 @@ void incrementEndPositionsForMatches(nlohmann::json &others) {
8974}
9075
9176
92- bool ranking_sorter (const StudentRanking &a, const StudentRanking &b) {
93- return a.score > b.score ||
94- (a.score == b.score && a.student < b.student );
95- }
96-
97-
9877// =============================================================================
9978// MAIN
10079
@@ -157,10 +136,6 @@ int main(int argc, char* argv[]) {
157136 std::unordered_set<hash> provided_code;
158137 // stores all hashes from other gradeables
159138 std::unordered_map<hash, std::unordered_map<user_id, std::vector<HashLocation>>> other_gradeables;
160- // stores the matches for every student, used later for generating overall_rankings.txt
161- std::unordered_map<user_id, std::vector<std::pair<version_number, Score>>> highest_matches;
162- // keeps track of max matching hashes across all submissions, used for calculation of ranking score
163- unsigned int max_hashes_matched = 0 ;
164139 // a map of "user_id:version" strings to the non-zero number of times their matching positions array was truncated
165140 std::unordered_map<std::string, int > matching_positions_truncations;
166141
@@ -323,7 +298,7 @@ int main(int argc, char* argv[]) {
323298
324299 // Note: we DO look for matches across submissions of the same student for self-plagiarism
325300
326- // save the locations of all other occurences from proir term submissions
301+ // save the locations of all other occurences from prior term submissions
327302 std::vector<HashLocation>::iterator itr = other_occurences_itr->second .begin ();
328303 for (; itr != other_occurences_itr->second .end (); ++itr) {
329304 (*submission_itr)->addSuspiciousMatch (hash_itr->second , *itr, hash_itr->first );
@@ -515,80 +490,14 @@ int main(int argc, char* argv[]) {
515490 assert (ostr.good ());
516491 ostr << match_data.dump (4 ) << std::endl;
517492
518- // =========================================================================
519- // create individual ranking file
520- // the file contains all the other students share matches, sorted by decreasing order of the percent match
521-
522- // find and sort the other submissions it matches with
523- std::vector<StudentRanking> student_ranking;
524- std::unordered_map<std::string, std::unordered_map<user_id, std::unordered_map<version_number, std::unordered_set<hash>>>> matches = (*submission_itr)->getStudentsMatched ();
525-
526- std::unordered_map<std::string, std::unordered_map<user_id, std::unordered_map<version_number, std::unordered_set<hash>>>>::const_iterator gradeables_itr = matches.begin ();
527- for (; gradeables_itr != matches.end (); ++gradeables_itr) {
528- for (std::unordered_map<user_id, std::unordered_map<version_number, std::unordered_set<hash>>>::const_iterator matches_itr = gradeables_itr->second .begin ();
529- matches_itr != gradeables_itr->second .end (); ++matches_itr) {
530-
531- for (std::unordered_map<version_number, std::unordered_set<hash>>::const_iterator version_itr = matches_itr->second .begin ();
532- version_itr != matches_itr->second .end (); ++version_itr) {
533-
534- // Calculate the Percent Match:
535- // count the number of unique hashes for the percent match calculation
536- std::vector<std::pair<hash, location_in_submission>> submission_hashes = (*submission_itr)->getHashes ();
537- std::unordered_set<hash> unique_hashes;
538- for (std::vector<std::pair<hash, location_in_submission>>::const_iterator itr = submission_hashes.begin ();
539- itr != submission_hashes.end (); ++itr) {
540- unique_hashes.insert (itr->first );
541- }
542-
543- // the percent match is currently calculated using the number of hashes that match between this
544- // submission and the other submission, over the total number of hashes this submission has.
545- // In other words, the percentage is how much of this submission's code was plgairised from the other.
546- unsigned int num_hashes_matched = version_itr->second .size ();
547- float percent = (100.0 * num_hashes_matched) / unique_hashes.size ();
548- student_ranking.push_back (StudentRanking (matches_itr->first , version_itr->first , gradeables_itr->first , Score (num_hashes_matched, percent)));
549- student_ranking.back ().score .calculateScore (num_hashes_matched);
550- }
551- }
552- }
553-
554- // =========================================================================
555- // Save this submission's highest percent match for later when we generate overall_rankings.txt
556- float percentMatch = (*submission_itr)->getPercentage ();
557- unsigned int totalMatchingHashes = (*submission_itr)->getMatchCount ();
558- Score submission_score (totalMatchingHashes, percentMatch);
559- if (max_hashes_matched < totalMatchingHashes) {
560- max_hashes_matched = totalMatchingHashes;
561- }
562-
563- std::pair<version_number, Score> new_pair = {(*submission_itr)->version (), submission_score};
564- highest_matches[(*submission_itr)->student ()].push_back (new_pair);
565- // =========================================================================
566-
567- std::sort (student_ranking.begin (), student_ranking.end (), ranking_sorter);
568-
569- // create the directory and a file to write into
570- boost::filesystem::path ranking_student_dir = users_root_directory / (*submission_itr)->student () / std::to_string ((*submission_itr)->version ());
571- boost::filesystem::path ranking_student_file = ranking_student_dir / " ranking.txt" ;
572- boost::filesystem::create_directories (ranking_student_dir);
573- std::ofstream ranking_student_ostr (ranking_student_file.string ());
574-
575- // finally, write the file of ranking for this submission
576- for (unsigned int i = 0 ; i < student_ranking.size (); i++) {
577- ranking_student_ostr
578- << std::setw (15 ) << std::left << student_ranking[i].student << " "
579- << std::setw (3 ) << std::left << student_ranking[i].version << " "
580- << std::setw (1 ) << std::right << student_ranking[i].source_gradeable << " "
581- << std::setw (6 ) << std::setprecision (2 ) << std::fixed << student_ranking[i].score .getPercent () << " %" << std::endl;
582- }
583-
584493 // =========================================================================
585494 // Cleanup
586495
587- // Done with this submissions . discard the data and clear the memory
496+ // Done with this submission . discard the data and clear the memory
588497 delete (*submission_itr);
589498 (*submission_itr) = nullptr ;
590499
591- // print current progress
500+ // Print current progress
592501 my_counter++;
593502 if (int ((my_counter / float (all_submissions.size ())) * 100 ) > my_percent) {
594503 int new_my_percent = int ((my_counter / float (all_submissions.size ())) * 100 );
@@ -607,7 +516,7 @@ int main(int argc, char* argv[]) {
607516
608517 time (&end);
609518 diff = difftime (end, start);
610- std::cout << " ]" << std::endl << " Finished processing submissions in " << diff << " seconds " << std::endl ;
519+ std::cout << " ]" << std::endl;
611520
612521 // Print out the list of users who had their matching positions array truncated
613522 if (matching_positions_truncations.size () > 0 ) {
@@ -618,40 +527,6 @@ int main(int argc, char* argv[]) {
618527 }
619528 std::cout << std::endl << " - Try increasing the hash size or adding a regex to fix this problem." << std::endl;
620529 }
621- fflush (stdout);
622-
623- // ===========================================================================
624- // Create a general summary of rankings of users by percentage match
625-
626- // create a single file of students ranked by highest percentage of code plagiarised
627- boost::filesystem::path ranking_file = lichen_gradeable_path / " overall_ranking.txt" ;
628- std::ofstream ranking_ostr (ranking_file.string ());
629-
630- // take the map of highest matches and convert it to a vector so we can sort it
631- // by percent match and then save it to a file
632- std::vector<StudentRanking> ranking;
633- for (std::unordered_map<user_id, std::vector<std::pair<version_number, Score>>>::iterator itr
634- = highest_matches.begin (); itr != highest_matches.end (); ++itr) {
635-
636- std::pair<version_number, Score> best_score = itr->second .front ();
637- best_score.second .calculateScore (max_hashes_matched);
638- for (unsigned int i=0 ; i < itr->second .size (); i++) {
639- itr->second [i].second .calculateScore (max_hashes_matched);
640- if (itr->second [i].second > best_score.second ) {
641- best_score = itr->second [i];
642- }
643- }
644- ranking.push_back (StudentRanking (itr->first , best_score.first , " " , best_score.second ));
645- }
646-
647- std::sort (ranking.begin (), ranking.end (), ranking_sorter);
648- for (unsigned int i = 0 ; i < ranking.size (); i++) {
649- ranking_ostr
650- << std::left << std::setw (20 ) << ranking[i].student << " "
651- << std::setw (3 ) << ranking[i].version << " "
652- << std::right << std::setw (4 ) << std::setprecision (1 ) << std::fixed << ranking[i].score .getPercent () << " % "
653- << std::setw (5 ) << ranking[i].score .getHashesMatched () << std::endl;
654- }
655530
656531 // ===========================================================================
657532 // Done!
0 commit comments