55 *
66 * This script will read a student enrollment CSV feed provided by the campus
77 * registrar or data warehouse and "upsert" (insert/update) the feed into
8- * Submitty's course databases. Requires PHP 7.3 and pgsql extension.
8+ * Submitty's course databases. Requires pgsql extension.
99 *
1010 * @author Peter Bailie, Rensselaer Polytechnic Institute
1111 */
1515require __DIR__ . "/ssaf_cli.php " ;
1616require __DIR__ . "/ssaf_db.php " ;
1717require __DIR__ . "/ssaf_validate.php " ;
18+ require __DIR__ . "/ssaf_rcos.php " ;
1819
1920// Important: Make sure we are running from CLI
2021if (php_sapi_name () !== "cli " ) {
2728
2829/** primary process class */
2930class submitty_student_auto_feed {
30- /** @var resource File handle to read CSV */
31+ /** File handle to read CSV */
3132 private $ fh ;
32- /** @var string Semester code */
33- private $ semester ;
34- /** @var array List of courses registered in Submitty */
35- private $ course_list ;
36- /** @var array Describes how courses are mapped from one to another */
37- private $ mapped_courses ;
38- /** @var array Describes courses/sections that are duplicated to other courses/sections */
39- private $ crn_copymap ;
40- /** @var array Courses with invalid data. */
41- private $ invalid_courses ;
42- /** @var array All CSV data to be upserted */
43- private $ data ;
44- /** @var string Ongoing string of messages to write to logfile */
45- private $ log_msg_queue ;
33+ /** Semester code */
34+ private string $ semester ;
35+ /** List of courses registered in Submitty */
36+ private array $ course_list ;
37+ /** Describes how courses are mapped from one to another */
38+ private array $ mapped_courses ;
39+ /** Describes courses/sections that are duplicated to other courses/sections */
40+ private array $ crn_copymap ;
41+ /** Courses with invalid data. */
42+ private array $ invalid_courses ;
43+ /** All CSV data to be upserted */
44+ private array $ data ;
45+ /** Ongoing string of messages to write to logfile */
46+ private string $ log_msg_queue ;
47+ /** For special cases involving Renssealer Center for Open Source */
48+ private object $ rcos ;
4649
4750 /** Init properties. Open DB connection. Open CSV file. */
4851 public function __construct () {
@@ -100,6 +103,9 @@ public function __construct() {
100103 // Get CRN shared courses/sections (when a course/section is copied to another course/section)
101104 $ this ->crn_copymap = $ this ->read_crn_copymap ();
102105
106+ // Helper object for special-cases involving RCOS.
107+ $ this ->rcos = new rcos ();
108+
103109 // Init other properties.
104110 $ this ->invalid_courses = [];
105111 $ this ->data = [];
@@ -135,8 +141,8 @@ public function go() {
135141 case $ this ->check_for_excessive_dropped_users ():
136142 // This check will block all upserts when an error is detected.
137143 exit (1 );
138- case $ this ->check_for_duplicate_user_ids ():
139- $ this -> log_it ( " Duplicate user IDs detected in CSV file. " );
144+ case $ this ->filter_duplicate_registrations ():
145+ // Never returns false. Error messages are already in log queue.
140146 break ;
141147 case $ this ->invalidate_courses ():
142148 // Should do nothing when $this->invalid_courses is empty
@@ -185,15 +191,15 @@ private function get_csv_data() {
185191 // Read and assign csv rows into $this->data array
186192 $ row = fgetcsv ($ this ->fh , 0 , CSV_DELIM_CHAR );
187193 while (!feof ($ this ->fh )) {
188- // Course is comprised of an alphabetic prefix and a numeric suffix.
189- $ course = strtolower ($ row [COLUMN_COURSE_PREFIX ] . $ row [COLUMN_COURSE_NUMBER ]);
190-
191194 // Trim whitespace from all fields in $row.
192195 array_walk ($ row , function (&$ val , $ key ) { $ val = trim ($ val ); });
193196
194197 // Remove any leading zeroes from "integer" registration sections.
195198 if (ctype_digit ($ row [COLUMN_SECTION ])) $ row [COLUMN_SECTION ] = ltrim ($ row [COLUMN_SECTION ], "0 " );
196199
200+ // Course is comprised of an alphabetic prefix and a numeric suffix.
201+ $ course = strtolower ($ row [COLUMN_COURSE_PREFIX ] . $ row [COLUMN_COURSE_NUMBER ]);
202+
197203 switch (true ) {
198204 // Check that $row has an appropriate student registration.
199205 case array_search ($ row [COLUMN_REGISTRATION ], $ all_valid_reg_codes ) === false :
@@ -212,6 +218,9 @@ private function get_csv_data() {
212218 // Check that $row is associated with the course list.
213219 case array_search ($ course , $ this ->course_list ) !== false :
214220 if (validate::validate_row ($ row , $ row_num )) {
221+ // Check (and perform) special-case RCOS registration section mapping.
222+ $ this ->rcos ->map ($ course , $ row );
223+
215224 // Include $row
216225 $ this ->data [$ course ][] = $ row ;
217226
@@ -233,8 +242,13 @@ private function get_csv_data() {
233242 if (array_key_exists ($ section , $ this ->mapped_courses [$ course ])) {
234243 $ m_course = $ this ->mapped_courses [$ course ][$ section ]['mapped_course ' ];
235244 if (validate::validate_row ($ row , $ row_num )) {
236- // Include $row .
245+ // Do course mapping (alters registration section) .
237246 $ row [COLUMN_SECTION ] = $ this ->mapped_courses [$ course ][$ section ]['mapped_section ' ];
247+
248+ // Check (and override) for special-case RCOS registration section mapping.
249+ $ this ->rcos ->map ($ course , $ row );
250+
251+ // Include $row.
238252 $ this ->data [$ m_course ][] = $ row ;
239253
240254 // $row with a blank email is allowed, but it is also logged.
@@ -285,31 +299,31 @@ private function get_csv_data() {
285299 }
286300
287301 /**
288- * Users cannot be registered to the same course multiple times.
302+ * Students cannot be registered to the same course multiple times.
289303 *
290- * Any course with a user registered more than once is flagged invalid as
291- * it is indicative of data errors from the CSV file.
292- *
293- * @return bool always TRUE
304+ * If multiple registrations for the same student and course are found, the first instance is allowed to be
305+ * upserted to the database. All other instances are removed from the data set and therefore not upserted.
294306 */
295- private function check_for_duplicate_user_ids () {
296- foreach ($ this ->data as $ course => $ rows ) {
297- $ user_ids = null ;
298- $ d_rows = null ;
299- // Returns FALSE (as in there is an error) when duplicate IDs are found.
300- // However, a duplicate ID does not invalidate a course. Instead, the
301- // first enrollment is accepted, the other enrollments are discarded,
302- // and the event is logged.
303- if (validate::check_for_duplicate_user_ids ($ rows , $ user_ids , $ d_rows ) === false ) {
304- foreach ($ d_rows as $ user_id => $ userid_rows ) {
305- $ length = count ($ userid_rows );
306- for ($ i = 1 ; $ i < $ length ; $ i ++) {
307- unset($ this ->data [$ course ][$ userid_rows [$ i ]]);
308- }
307+ private function filter_duplicate_registrations (): true {
308+ foreach ($ this ->data as $ course => &$ rows ) {
309+ usort ($ rows , function ($ a , $ b ) { return $ a [COLUMN_USER_ID ] <=> $ b [COLUMN_USER_ID ]; });
310+ $ duplicated_ids = [];
311+ $ num_rows = count ($ rows );
312+
313+ // We are iterating from bottom to top through a course's data set. Should we find a duplicate registration
314+ // and unset it from the array, (1) we are unsetting duplicates starting from the bottom, (2) which preserves
315+ // the first entry among duplicate entries, and (3) we do not make a comparison with a null key.
316+ for ($ j = $ num_rows - 1 , $ i = $ j - 1 ; $ i >= 0 ; $ i --, $ j --) {
317+ if ($ rows [$ i ][COLUMN_USER_ID ] === $ rows [$ j ][COLUMN_USER_ID ]) {
318+ $ duplicated_ids [] = $ rows [$ j ][COLUMN_USER_ID ];
319+ unset($ rows [$ j ]);
309320 }
321+ }
310322
323+ if (count ($ duplicated_ids ) > 0 ) {
324+ array_unique ($ duplicated_ids , SORT_STRING );
311325 $ msg = "Duplicate user IDs detected in {$ course } data: " ;
312- $ msg .= implode (", " , $ user_ids );
326+ $ msg .= implode (", " , $ duplicated_ids );
313327 $ this ->log_it ($ msg );
314328 }
315329 }
0 commit comments