@@ -41,7 +41,7 @@ class submitty_student_auto_feed {
4141 public function __construct () {
4242
4343 //Important: Make sure we are running from CLI
44- if (PHP_SAPI != "cli " ) {
44+ if (PHP_SAPI !== "cli " ) {
4545 die ("This is a command line tool. " );
4646 }
4747
@@ -245,7 +245,12 @@ private function validate_csv($csv_data) {
245245 * be deduplicated.
246246 * ------------------------------------------------------------------ */
247247
248- deduplicate::deduplicate_data (self ::$ data ['users ' ], 'user_id ' );
248+ if ($ this ->deduplicate ('users ' , 'user_id ' ) === false ) {
249+
250+ //Deduplication didn't work. We can't proceed (set validation flag to false).
251+ $ this ->log_it ("Users data deduplication encountered a problem. Aborting. " );
252+ $ validation_flag = false ;
253+ }
249254
250255 //TRUE: Data validation passed and validated data set will have at least 1 row per table.
251256 //FALSE: Either data validation failed or at least one table is an empty set.
@@ -394,6 +399,38 @@ private function load_csv(&$csv_data) {
394399 return true ;
395400 }
396401
402+
403+ /**
404+ * deduplicate data set by a specific column
405+ *
406+ * Users table in "Submitty" database must have a unique student per row.
407+ * per row. Students in multiple courses may have multiple entries where
408+ * where deduplication is necessary.
409+ *
410+ * @access private
411+ * @param array $subset data subset to be deduplicated
412+ * @param mixed $key column by which rows are deduplicated
413+ * @return boolean TRUE when deduplication is completed. FALSE when sorting fails.
414+ */
415+ private function deduplicate ($ subset = 'users ' , $ key = 'user_id ' ) {
416+
417+ // First, sort data subset. On success, remove duplicate rows identified by $key.
418+ if (usort (self ::$ data [$ subset ], function ($ a , $ b ) use ($ key ) { return strcmp ($ a [$ key ], $ b [$ key ]); })) {
419+ $ count = count (self ::$ data [$ subset ]);
420+ for ($ i = 1 ; $ i < $ count ; $ i ++) {
421+ if (self ::$ data [$ subset ][$ i ][$ key ] === self ::$ data [$ subset ][$ i -1 ][$ key ]) {
422+ unset(self ::$ data [$ subset ][$ i -1 ]);
423+ }
424+ }
425+
426+ //Indicate that deduplication is done.
427+ return true ;
428+ }
429+
430+ //Something went wrong during sort. Abort and indicate failure.
431+ return false ;
432+ }
433+
397434 /**
398435 * "Update/Insert" data into the database. Code works via "batch" upserts.
399436 *
@@ -684,100 +721,6 @@ private function log_it($msg) {
684721 }
685722}
686723
687- /** static class for deduplicating data */
688- class deduplicate {
689-
690- /**
691- * deduplicate data by a specific column
692- *
693- * Users table in "Submitty" database must have a unique student per row.
694- * per row. Students in multiple courses may have multiple entries where
695- * where deduplication is necessary.
696- *
697- * @access public
698- * @param array $arr array to be deduplicated, passed by reference
699- * @param mixed $key column by which rows are deduplicated
700- */
701- public static function deduplicate_data (&$ arr , $ key ='user_id ' ) {
702-
703- self ::merge_sort ($ arr , $ key );
704- self ::dedup ($ arr , $ key );
705- }
706-
707- /**
708- * merge sort
709- *
710- * PHP's built in sort is quicksort. It is not stable and cannot sort rows
711- * by column, and therefore is not sufficient. Data will be sorted to be
712- * deduplicated.
713- *
714- * @access private
715- * @param array $arr array of data rows to be sorted
716- * @param mixed $key column by which rows are sorted
717- */
718- private static function merge_sort (&$ arr , $ key ) {
719-
720- //Arrays of size < 2 require no action.
721- if (count ($ arr ) < 2 ) {
722- return ;
723- }
724-
725- //Split the array in half
726- $ halfway = count ($ arr ) / 2 ;
727- $ arr1 = array_slice ($ arr , 0 , $ halfway );
728- $ arr2 = array_slice ($ arr , $ halfway );
729-
730- //Recurse to sort the two halves
731- self ::merge_sort ($ arr1 , $ key );
732- self ::merge_sort ($ arr2 , $ key );
733-
734- //If all of $array1 is <= all of $array2, just append them.
735- if (strcasecmp (end ($ arr1 )[$ key ], $ arr2 [0 ][$ key ]) < 1 ) {
736- $ arr = array_merge ($ arr1 , $ arr2 );
737- return ;
738- }
739-
740- //Merge the two sorted arrays into a single sorted array
741- $ arr = array ();
742- $ i = 0 ;
743- $ j = 0 ;
744- while ($ i < count ($ arr1 ) && $ j < count ($ arr2 )) {
745- if (strcasecmp ($ arr1 [$ i ][$ key ], $ arr2 [$ j ][$ key ]) < 1 ) {
746- $ arr [] = $ arr1 [$ i ];
747- $ i ++;
748- } else {
749- $ arr [] = $ arr2 [$ j ];
750- $ j ++;
751- }
752- }
753-
754- //Merge the remainder
755- for (/* no var init */ ; $ i < count ($ arr1 ); $ i ++) {
756- $ arr [] = $ arr1 [$ i ];
757- }
758-
759- for (/* no var init */ ; $ j < count ($ arr2 ); $ j ++) {
760- $ arr [] = $ arr2 [$ j ];
761- }
762- }
763-
764- /**
765- * remove duplicated student rows
766- *
767- * @access private
768- * @param array $arr array of data rows to be deduplicated
769- * @param mixed $key column by which rows are deduplicated
770- */
771- private static function dedup (&$ arr , $ key ) {
772-
773- $ count = count ($ arr );
774- for ($ i = 1 ; $ i < $ count ; $ i ++) {
775- if ($ arr [$ i ][$ key ] === $ arr [$ i -1 ][$ key ]) {
776- unset($ arr [$ i -1 ]);
777- }
778- }
779- }
780- }
781724
782725/** @static class to parse command line arguments */
783726class cli_args {
0 commit comments