Skip to content

Commit c832542

Browse files
pbailiebmcutler
authored andcommitted
Auto feed update (#7)
* Auto Feed Update student_auto_feed/submitty_student_auto_feed.php Refactor/Simplify/Improve deduplication code. Perhaps we don't need a stable sort in this particular project. * auto_feed_update Deduplication now relies on usort() instead of merge sort. Should be faster.
1 parent 33af541 commit c832542

File tree

1 file changed

+39
-96
lines changed

1 file changed

+39
-96
lines changed

student_auto_feed/submitty_student_auto_feed.php

Lines changed: 39 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ class submitty_student_auto_feed {
4141
public function __construct() {
4242

4343
//Important: Make sure we are running from CLI
44-
if (PHP_SAPI != "cli") {
44+
if (PHP_SAPI !== "cli") {
4545
die("This is a command line tool.");
4646
}
4747

@@ -245,7 +245,12 @@ private function validate_csv($csv_data) {
245245
* be deduplicated.
246246
* ------------------------------------------------------------------ */
247247

248-
deduplicate::deduplicate_data(self::$data['users'], 'user_id');
248+
if ($this->deduplicate('users', 'user_id') === false) {
249+
250+
//Deduplication didn't work. We can't proceed (set validation flag to false).
251+
$this->log_it("Users data deduplication encountered a problem. Aborting.");
252+
$validation_flag = false;
253+
}
249254

250255
//TRUE: Data validation passed and validated data set will have at least 1 row per table.
251256
//FALSE: Either data validation failed or at least one table is an empty set.
@@ -394,6 +399,38 @@ private function load_csv(&$csv_data) {
394399
return true;
395400
}
396401

402+
403+
/**
404+
* deduplicate data set by a specific column
405+
*
406+
* Users table in "Submitty" database must have a unique student per row.
407+
* per row. Students in multiple courses may have multiple entries where
408+
* where deduplication is necessary.
409+
*
410+
* @access private
411+
* @param array $subset data subset to be deduplicated
412+
* @param mixed $key column by which rows are deduplicated
413+
* @return boolean TRUE when deduplication is completed. FALSE when sorting fails.
414+
*/
415+
private function deduplicate($subset = 'users', $key = 'user_id') {
416+
417+
// First, sort data subset. On success, remove duplicate rows identified by $key.
418+
if (usort(self::$data[$subset], function($a, $b) use ($key) { return strcmp($a[$key], $b[$key]); })) {
419+
$count = count(self::$data[$subset]);
420+
for ($i = 1; $i < $count; $i++) {
421+
if (self::$data[$subset][$i][$key] === self::$data[$subset][$i-1][$key]) {
422+
unset(self::$data[$subset][$i-1]);
423+
}
424+
}
425+
426+
//Indicate that deduplication is done.
427+
return true;
428+
}
429+
430+
//Something went wrong during sort. Abort and indicate failure.
431+
return false;
432+
}
433+
397434
/**
398435
* "Update/Insert" data into the database. Code works via "batch" upserts.
399436
*
@@ -684,100 +721,6 @@ private function log_it($msg) {
684721
}
685722
}
686723

687-
/** static class for deduplicating data */
688-
class deduplicate {
689-
690-
/**
691-
* deduplicate data by a specific column
692-
*
693-
* Users table in "Submitty" database must have a unique student per row.
694-
* per row. Students in multiple courses may have multiple entries where
695-
* where deduplication is necessary.
696-
*
697-
* @access public
698-
* @param array $arr array to be deduplicated, passed by reference
699-
* @param mixed $key column by which rows are deduplicated
700-
*/
701-
public static function deduplicate_data(&$arr, $key='user_id') {
702-
703-
self::merge_sort($arr, $key);
704-
self::dedup($arr, $key);
705-
}
706-
707-
/**
708-
* merge sort
709-
*
710-
* PHP's built in sort is quicksort. It is not stable and cannot sort rows
711-
* by column, and therefore is not sufficient. Data will be sorted to be
712-
* deduplicated.
713-
*
714-
* @access private
715-
* @param array $arr array of data rows to be sorted
716-
* @param mixed $key column by which rows are sorted
717-
*/
718-
private static function merge_sort(&$arr, $key) {
719-
720-
//Arrays of size < 2 require no action.
721-
if (count($arr) < 2) {
722-
return;
723-
}
724-
725-
//Split the array in half
726-
$halfway = count($arr) / 2;
727-
$arr1 = array_slice($arr, 0, $halfway);
728-
$arr2 = array_slice($arr, $halfway);
729-
730-
//Recurse to sort the two halves
731-
self::merge_sort($arr1, $key);
732-
self::merge_sort($arr2, $key);
733-
734-
//If all of $array1 is <= all of $array2, just append them.
735-
if (strcasecmp(end($arr1)[$key], $arr2[0][$key]) < 1) {
736-
$arr = array_merge($arr1, $arr2);
737-
return;
738-
}
739-
740-
//Merge the two sorted arrays into a single sorted array
741-
$arr = array();
742-
$i = 0;
743-
$j = 0;
744-
while ($i < count($arr1) && $j < count($arr2)) {
745-
if (strcasecmp($arr1[$i][$key], $arr2[$j][$key]) < 1) {
746-
$arr[] = $arr1[$i];
747-
$i++;
748-
} else {
749-
$arr[] = $arr2[$j];
750-
$j++;
751-
}
752-
}
753-
754-
//Merge the remainder
755-
for (/* no var init */; $i < count($arr1); $i++) {
756-
$arr[] = $arr1[$i];
757-
}
758-
759-
for (/* no var init */; $j < count($arr2); $j++) {
760-
$arr[] = $arr2[$j];
761-
}
762-
}
763-
764-
/**
765-
* remove duplicated student rows
766-
*
767-
* @access private
768-
* @param array $arr array of data rows to be deduplicated
769-
* @param mixed $key column by which rows are deduplicated
770-
*/
771-
private static function dedup(&$arr, $key) {
772-
773-
$count = count($arr);
774-
for ($i = 1; $i < $count; $i++) {
775-
if ($arr[$i][$key] === $arr[$i-1][$key]) {
776-
unset($arr[$i-1]);
777-
}
778-
}
779-
}
780-
}
781724

782725
/** @static class to parse command line arguments */
783726
class cli_args {

0 commit comments

Comments
 (0)