2121// for a user.
2222class Submission {
2323public:
24- Submission (std::string u, std::string v) : username(u),version(v) {}
24+ Submission (std::string u, int v) : username(u),version(v) {}
2525 std::string username;
26- std::string version;
26+ int version;
2727};
2828
2929// to allow sorting
@@ -37,7 +37,7 @@ bool operator<(const Submission &a, const Submission &b) {
3737// the token) within in a specific concatenated file (the Submission).
3838class Sequence {
3939public:
40- Sequence (std::string u, std::string v , int p) : submission(u,v ),position(p) {}
40+ Sequence (std::string username, int version , int p) : submission(username,version ),position(p) {}
4141 Submission submission;
4242 int position;
4343};
@@ -47,7 +47,7 @@ class Sequence {
4747// helper typedefs
4848
4949
50- // common sequence hash -> ( each user -> all match locations by that user across all versions )
50+ // matching sequence hash -> ( each user -> all match locations by that user across all versions )
5151typedef std::map<std::string,std::map<std::string,std::vector<Sequence> > > hashed_sequences;
5252
5353
@@ -66,6 +66,53 @@ bool ranking_sorter(const std::pair<Submission,float> &a, const std::pair<Submis
6666}
6767
6868
69+ // ===================================================================================
70+ // ===================================================================================
71+ void insert_others (std::map<Submission,std::set<int > > &others,
72+ const std::map<Submission,std::vector<Sequence> > &matches) {
73+ for (std::map<Submission,std::vector<Sequence> >::const_iterator itr = matches.begin (); itr!=matches.end ();itr++) {
74+ // std::set<int> foo;
75+ for (int i = 0 ; i < itr->second .size (); i++) {
76+ others[itr->first ].insert (itr->second [i].position );
77+ }
78+ // .insert(std::make_pair(itr->first,foo));
79+ }
80+ }
81+
82+ void convert (std::map<Submission,std::set<int > > &myset, nlohmann::json &obj) {
83+ for (std::map<Submission,std::set<int > >::iterator itr = myset.begin (); itr != myset.end (); itr++) {
84+ nlohmann::json me;
85+ me[" username" ] = itr->first .username ;
86+ me[" version" ] = itr->first .version ;
87+
88+ std::vector<nlohmann::json> foo;
89+ int start = -1 ;
90+ int end = -1 ;
91+ std::set<int >::iterator itr2 = itr->second .begin ();
92+ while (true ) {
93+ int pos = (itr2 == itr->second .end ()) ? -1 : *itr2;
94+ if (pos != -1 && start == -1 ) {
95+ start = end = pos;
96+ } else if (pos != -1 && end+1 == pos) {
97+ end = pos;
98+ } else if (start != -1 ) {
99+ nlohmann::json range;
100+ range[" start" ] = start;
101+ range[" end" ] = end;
102+ start=end=-1 ;
103+ foo.push_back (range);
104+ }
105+ if (itr2 == itr->second .end ()) {
106+ break ;
107+ }
108+ itr2++;
109+ }
110+
111+ me[" matchingpositions" ] = foo;
112+ obj.push_back (me);
113+ }
114+ }
115+
69116// ===================================================================================
70117// ===================================================================================
71118int main (int argc, char * argv[]) {
@@ -76,11 +123,13 @@ int main(int argc, char* argv[]) {
76123
77124 // ---------------------------------------------------------------------------
78125 // deal with command line arguments
79- assert (argc == 4 );
126+ assert (argc == 6 );
80127 std::string semester = argv[1 ];
81128 std::string course = argv[2 ];
82129 std::string gradeable = argv[3 ];
83-
130+ assert (argv[4 ] == std::string (" --window" ));
131+ int window = std::stoi (std::string (argv[5 ]));
132+ assert (window >= 1 );
84133
85134 // error checking, confirm there are hashes to work with
86135 std::string tmp = " /var/local/submitty/courses/" +semester+" /" +course+" /lichen/hashes/" +gradeable;
@@ -112,7 +161,9 @@ int main(int argc, char* argv[]) {
112161 for (boost::filesystem::directory_iterator username_itr ( username_path ); username_itr != end_iter; ++username_itr) {
113162 boost::filesystem::path version_path = username_itr->path ();
114163 assert (is_directory (version_path));
115- std::string version = username_itr->path ().filename ().string ();
164+ std::string str_version = username_itr->path ().filename ().string ();
165+ int version = std::stoi (str_version);
166+ assert (version > 0 );
116167 // load the hashes sequences from this submission
117168 boost::filesystem::path hash_file = version_path;
118169 hash_file /= " hashes.txt" ;
@@ -131,7 +182,7 @@ int main(int argc, char* argv[]) {
131182
132183 // label the parts of the file that are common to many
133184 // user,version -> vector<position>
134- std::map<Submission,std::vector <int > > common;
185+ std::map<Submission,std::set <int > > common;
135186
136187 // label the parts of the file that match the provided code
137188 // user,version -> vector<position>
@@ -152,7 +203,7 @@ int main(int argc, char* argv[]) {
152203 // common to many/all
153204 for (std::map<std::string,std::vector<Sequence> >::iterator itr2 = itr->second .begin (); itr2 != itr->second .end (); itr2++) {
154205 for (int i = 0 ; i < itr2->second .size (); i++) {
155- common[itr2->second [i].submission ].push_back (itr2->second [i].position );
206+ common[itr2->second [i].submission ].insert (itr2->second [i].position );
156207 }
157208 }
158209 } else if (count > 1 && count < 20 ) {
@@ -161,15 +212,15 @@ int main(int argc, char* argv[]) {
161212 std::string username = itr2->first ;
162213 for (int i = 0 ; i < itr2->second .size (); i++) {
163214 assert (itr2->second [i].submission .username == username);
164- std::string version = itr2->second [i].submission .version ;
215+ int version = itr2->second [i].submission .version ;
165216 int position = itr2->second [i].position ;
166217
167218 std::map<Submission, std::vector<Sequence> > matches;
168219
169220 for (std::map<std::string,std::vector<Sequence> >::iterator itr3 = itr->second .begin (); itr3 != itr->second .end (); itr3++) {
170221 std::string match_username = itr3->first ;
171222 for (int j = 0 ; j < itr3->second .size (); j++) {
172- std::string match_version = itr3->second [j].submission .version ;
223+ int match_version = itr3->second [j].submission .version ;
173224 Submission ms (match_username,match_version);
174225 matches[ms].push_back (itr3->second [j]);
175226 }
@@ -185,6 +236,7 @@ int main(int argc, char* argv[]) {
185236 // ---------------------------------------------------------------------------
186237 // prepare a sorted list of all users sorted by match percent
187238 std::vector<std::pair<Submission,float > > ranking;
239+
188240 for (std::map<Submission,std::map<int ,std::map<Submission,std::vector<Sequence> > > >::iterator itr = suspicious.begin ();
189241 itr != suspicious.end (); itr++) {
190242 int total = submission_length[itr->first ];
@@ -194,58 +246,99 @@ int main(int argc, char* argv[]) {
194246 std::vector<nlohmann::json> info;
195247
196248 std::string username = itr->first .username ;
197- std::string version = itr->first .version ;
249+ int version = itr->first .version ;
198250
199251 ranking.push_back (std::make_pair (itr->first ,percent));
200252
201253 // prepare the ranges of suspicious matching tokens
202254 int range_start=-1 ;
203255 int range_end=-1 ;
204- for (std::map<int ,std::map<Submission,std::vector<Sequence> > >::iterator itr2 = itr->second .begin (); itr2 != itr->second .end (); itr2++) {
205- int pos = itr2->first ;
206- if (range_start==-1 ) {
256+ std::map<Submission, std::set<int > > others;
257+ std::map<int ,std::map<Submission,std::vector<Sequence> > >::iterator itr2 = itr->second .begin ();
258+ while (true ) {
259+ int pos = (itr2 == itr->second .end ()) ? -1 : itr2->first ;
260+ if (pos != -1 && range_start==-1 ) {
207261 range_start = range_end = pos;
208- } else if (range_end+1 == pos) {
262+ insert_others (others,itr2->second );
263+ } else if (pos != -1 && range_end+1 == pos) {
209264 range_end = pos;
210- } else {
211- std::map<std::string,std::string> info_data;
212- info_data[" start" ]=std::to_string (range_start);
213- info_data[" end" ]=std::to_string (range_end);
214- info_data[" type" ]=std::string (" match" );
265+ insert_others (others,itr2->second );
266+ } else if (range_start != -1 ) {
267+ std::map<std::string,nlohmann::json> info_data;
268+ info_data[" start" ]=nlohmann::json (range_start);
269+ info_data[" end" ]=nlohmann::json (range_end);
270+ info_data[" type" ]=nlohmann::json (std::string (" match" ));
271+ nlohmann::json obj;
272+ convert (others,obj);
273+ info_data[" others" ]=obj;
215274 info.push_back (info_data);
216275 range_start=range_end=-1 ;
276+ others.clear ();
277+ }
278+ if (itr2 == itr->second .end ()) {
279+ break ;
217280 }
281+ itr2++;
218282 }
219- if (range_start != -1 ) {
220- std::map<std::string,std::string> info_data;
221- info_data[" start" ]=std::to_string (range_start);
222- info_data[" end" ]=std::to_string (range_end);
223- info_data[" type" ]=std::string (" match" );
224- info.push_back (info_data);
225- range_start=range_end=-1 ;
283+
284+ std::map<Submission,std::set<int > >::iterator itr3 = common.find (itr->first );
285+ if (itr3 != common.end ()) {
286+ // std::cout << "HAS COMMON CODE" << std::endl;
287+ int range_start=-1 ;
288+ int range_end=-1 ;
289+ for (std::set<int >::iterator itr4 = itr3->second .begin (); itr4 != itr3->second .end (); itr4++) {
290+ // std::cout << "v=" << *itr4 << std::endl;
291+ if (range_start == -1 ) {
292+ range_start = range_end = *itr4;
293+ } else if (range_end+1 == *itr4) {
294+ range_end = *itr4;
295+ } else {
296+ std::map<std::string,nlohmann::json> info_data;
297+ info_data[" start" ]=nlohmann::json (range_start);
298+ info_data[" end" ]=nlohmann::json (range_end);
299+ info_data[" type" ]=std::string (" common" );
300+ info.push_back (info_data);
301+ range_start = range_end = -1 ;
302+ }
303+ }
304+ if (range_start != -1 ) {
305+ std::map<std::string,nlohmann::json> info_data;
306+ info_data[" start" ]=nlohmann::json (range_start);
307+ info_data[" end" ]=nlohmann::json (range_end);
308+ info_data[" type" ]=std::string (" common" );
309+ info.push_back (info_data);
310+ range_start=range_end=-1 ;
311+ }
226312 }
227313
228314 // save the file with matches per user
229315 nlohmann::json match_data = info;
230- std::string matches_dir = " /var/local/submitty/courses/" +semester+" /" +course+" /lichen/matches/" +gradeable+" /" +username+" /" +version;
316+ std::string matches_dir = " /var/local/submitty/courses/" +semester+" /" +course+" /lichen/matches/" +gradeable+" /" +username+" /" +std::to_string ( version) ;
231317 boost::filesystem::create_directories (matches_dir);
232318 std::string matches_file = matches_dir+" /matches.json" ;
233319 std::ofstream ostr (matches_file);
234320 assert (ostr.good ());
235321 ostr << match_data.dump (4 ) << std::endl;
236322 }
237323
324+ std::set<std::string> users_already_ranked;
325+
238326 // save the rankings to a file
239327 std::string ranking_dir = " /var/local/submitty/courses/" +semester+" /" +course+" /lichen/ranking/" ;
240328 std::string ranking_file = ranking_dir+gradeable+" .txt" ;
241329 boost::filesystem::create_directories (ranking_dir);
242330 std::ofstream ranking_ostr (ranking_file);
243331 std::sort (ranking.begin (),ranking.end (),ranking_sorter);
244332 for (int i = 0 ; i < ranking.size (); i++) {
245- ranking_ostr
246- << std::setw (6 ) << std::setprecision (2 ) << std::fixed << 100.0 *ranking[i].second << " % "
247- << std::setw (15 ) << std::left << ranking[i].first .username << " "
248- << std::setw (3 ) << std::right << ranking[i].first .version << std::endl;
333+ std::string username = ranking[i].first .username ;
334+ if (users_already_ranked.insert (username).second != false ) {
335+ // print each username at most once, only if insert was
336+ // successful (not already in the set)
337+ ranking_ostr
338+ << std::setw (6 ) << std::setprecision (2 ) << std::fixed << 100.0 *ranking[i].second << " % "
339+ << std::setw (15 ) << std::left << ranking[i].first .username << " "
340+ << std::setw (3 ) << std::right << ranking[i].first .version << std::endl;
341+ }
249342 }
250343
251344
0 commit comments