@@ -141,8 +141,8 @@ public function go() {
141141 case $ this ->check_for_excessive_dropped_users ():
142142 // This check will block all upserts when an error is detected.
143143 exit (1 );
144- case $ this ->check_for_duplicate_user_ids ():
145- $ this -> log_it ( " Duplicate user IDs detected in CSV file. " );
144+ case $ this ->filter_duplicate_registrations ():
145+ // Never returns false. Error messages are already in log queue.
146146 break ;
147147 case $ this ->invalidate_courses ():
148148 // Should do nothing when $this->invalid_courses is empty
@@ -299,31 +299,31 @@ private function get_csv_data() {
299299 }
300300
301301 /**
302- * Users cannot be registered to the same course multiple times.
302+ * Students cannot be registered to the same course multiple times.
303303 *
304- * Any course with a user registered more than once is flagged invalid as
305- * it is indicative of data errors from the CSV file.
306- *
307- * @return bool always TRUE
304+ * If multiple registrations for the same student and course are found, the first instance is allowed to be
305+ * upserted to the database. All other instances are removed from the data set and therefore not upserted.
308306 */
309- private function check_for_duplicate_user_ids () {
310- foreach ($ this ->data as $ course => $ rows ) {
311- $ user_ids = null ;
312- $ d_rows = null ;
313- // Returns FALSE (as in there is an error) when duplicate IDs are found.
314- // However, a duplicate ID does not invalidate a course. Instead, the
315- // first enrollment is accepted, the other enrollments are discarded,
316- // and the event is logged.
317- if (validate::check_for_duplicate_user_ids ($ rows , $ user_ids , $ d_rows ) === false ) {
318- foreach ($ d_rows as $ user_id => $ userid_rows ) {
319- $ length = count ($ userid_rows );
320- for ($ i = 1 ; $ i < $ length ; $ i ++) {
321- unset($ this ->data [$ course ][$ userid_rows [$ i ]]);
322- }
307+ private function filter_duplicate_registrations (): true {
308+ foreach ($ this ->data as $ course => &$ rows ) {
309+ usort ($ rows , function ($ a , $ b ) { return $ a [COLUMN_USER_ID ] <=> $ b [COLUMN_USER_ID ]; });
310+ $ duplicated_ids = [];
311+ $ num_rows = count ($ rows );
312+
313+ // We are iterating from bottom to top through a course's data set. Should we find a duplicate registration
314+ // and unset it from the array, (1) we are unsetting duplicates starting from the bottom, (2) which preserves
315+ // the first entry among duplicate entries, and (3) we do not make a comparison with a null key.
316+ for ($ j = $ num_rows - 1 , $ i = $ j - 1 ; $ i >= 0 ; $ i --, $ j --) {
317+ if ($ rows [$ i ][COLUMN_USER_ID ] === $ rows [$ j ][COLUMN_USER_ID ]) {
318+ $ duplicated_ids [] = $ rows [$ j ][COLUMN_USER_ID ];
319+ unset($ rows [$ j ]);
323320 }
321+ }
324322
323+ if (count ($ duplicated_ids ) > 0 ) {
324+ array_unique ($ duplicated_ids , SORT_STRING );
325325 $ msg = "Duplicate user IDs detected in {$ course } data: " ;
326- $ msg .= implode (", " , $ user_ids );
326+ $ msg .= implode (", " , $ duplicated_ids );
327327 $ this ->log_it ($ msg );
328328 }
329329 }
0 commit comments