From 9c349e8236d0ce5d12b383ff0f3638c4b02e26dd Mon Sep 17 00:00:00 2001 From: freaky4wrld Date: Wed, 13 Mar 2024 10:49:54 +0530 Subject: [PATCH 01/12] feat: added scripts to backend folder with json data --- backend/scripts/emailCleaner.mjs | 33 ++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 backend/scripts/emailCleaner.mjs diff --git a/backend/scripts/emailCleaner.mjs b/backend/scripts/emailCleaner.mjs new file mode 100644 index 000000000..609572262 --- /dev/null +++ b/backend/scripts/emailCleaner.mjs @@ -0,0 +1,33 @@ +import jsonData from './test-data.json' assert {type: 'json'}; + +const emailMap = {}; + +jsonData.forEach(user => { + const lowercaseEmail = user.email.toLowerCase(); + if (!emailMap[lowercaseEmail]) { + emailMap[lowercaseEmail] = user; + } else { + const existingUser = emailMap[lowercaseEmail]; + existingUser.skillsToMatch.push(...user.skillsToMatch); + existingUser.projects.push(...user.projects); + existingUser.managedProjects.push(...user.managedProjects); + existingUser.textingOk = existingUser.textingOk || user.textingOk; + existingUser.isActive = existingUser.isActive || user.isActive; + existingUser.newMember = existingUser.newMember || user.newMember; + existingUser.currentRole = existingUser !== user ? existingUser.currentRole : user.currentRole; + existingUser.desiredRole = existingUser !== user ? existingUser.desiredRole : user.desiredRole; + + if (existingUser.accessLevel === 'admin' || user.accessLevel === 'admin') { + existingUser.accessLevel = 'admin'; + } + // Preserving the older createdDate, firstAttended and modifying the email + if (new Date(user.createdDate) < new Date(existingUser.createdDate)) { + existingUser.createdDate = user.createdDate; + existingUser.firstAttended = user.firstAttended; + existingUser.email = `${user.email.toLowerCase()}_${user._id}` + } + } + // Always lowercase email + user.email = lowercaseEmail; +}); + From fd33817844d8d9b9e869a437509e0dccd8b843af Mon Sep 17 00:00:00 2001 From: freaky4wrld Date: Sat, 11 May 2024 09:04:57 +0530 Subject: [PATCH 02/12] feat: added test-data.json file --- backend/scripts/test-data.json | 1116 ++++++++++++++++++++++++++++++++ 1 file changed, 1116 insertions(+) create mode 100644 backend/scripts/test-data.json diff --git a/backend/scripts/test-data.json b/backend/scripts/test-data.json new file mode 100644 index 000000000..5519d4d72 --- /dev/null +++ b/backend/scripts/test-data.json @@ -0,0 +1,1116 @@ +[ + { + "name": { + "firstName": "John", + "lastName": "Doe" + }, + "accessLevel": "user", + "skillsToMatch": [], + "projects": [], + "textingOk": false, + "managedProjects": ["60d13ff818317127044e7f08"], + "isActive": true, + "_id": "5f1d23fe316d2f00345ef06a", + "email": "johnDoe@civic.org", + "currentRole": "most common human name", + "desiredRole": "most desired human name", + "newMember": false, + "firstAttended": "NOV 2015", + "createdDate": "2020-01-14T02:14:22.407Z", + "__v": 0, + "attendanceReason": "Civic Engagement", + "currentProject": "Undebate" + }, + { + "name": { + "firstName": "Iggy", + "lastName": "Stoic" + }, + "accessLevel": "user", + "skillsToMatch": [], + "projects": [], + "textingOk": false, + "managedProjects": [], + "isActive": true, + "_id": "5e1d2490316d2f00172ef072", + "email": "iggystoic@gmail.com", + "currentRole": "Tech Consulting", + "desiredRole": "Technical Product", + "newMember": true, + "firstAttended": "JAN 2020", + "createdDate": "2020-01-14T02:16:48.597Z", + "__v": 0 + }, + { + "name": { + "firstName": "TEST", + "lastName": "PERSON" + }, + "accessLevel": "user", + "skillsToMatch": [], + "projects": [], + "textingOk": false, + "managedProjects": [], + "isActive": true, + "_id": "5e1e76c20ab172001790f806", + "email": "TEST@GMAIL.com", + "currentRole": "Test Developer", + "desiredRole": "Senior Test Developer", + "newMember": true, + "firstAttended": "JAN 2020", + "createdDate": "2020-01-15T02:19:46.780Z", + "__v": 0 + }, + { + "name": { + "firstName": "Test", + "lastName": "Person" + }, + "accessLevel": "admin", + "skillsToMatch": ["Testing", "Quality Management"], + "projects": [], + "textingOk": false, + "managedProjects": ["60d13ff818317127044e7f09"], + "isActive": true, + "_id": "5f4bfbc8e9f4f121e8c1eb42", + "email": "test@gmail.com", + "currentRole": "Test Student", + "desiredRole": "Software Developer", + "newMember": false, + "attendanceReason": "Environment", + "currentProject": "VRMS", + "firstAttended": "JAN 2019", + "createdDate": "2024-05-10T03:37:30.363Z" + }, + { + "name": { + "firstName": "John", + "lastName": "Atkins" + }, + "accessLevel": "user", + "skillsToMatch": ["acting"], + "projects": ["Mr. Bean"], + "textingOk": true, + "managedProjects": [], + "isActive": true, + "_id": "5e1e74030ab172001790f7ea", + "email": "JOHN.J.ATKINS@GMAIL.COM", + "currentRole": "Mr. Bean", + "desiredRole": "Johnny English", + "newMember": true, + "firstAttended": "JAN 2020", + "createdDate": "2020-01-15T02:08:03.024Z", + "__v": 0 + }, + { + "name": { + "firstName": "Alex", + "lastName": "Chu" + }, + "accessLevel": "admin", + "skillsToMatch": [], + "projects": [], + "textingOk": false, + "managedProjects": [ + "5edeac78ce228b001778facd", + "60d13ff818317127044e7f08" + ], + "isActive": true, + "_id": "5f164d2839cb9c001736f4cf", + "email": "chu.alex@gmail.com", + "currentRole": "Software engineer", + "desiredRole": "Software engineer", + "newMember": false, + "firstAttended": "JUL 2020", + "createdDate": "2020-07-21T02:04:24.241Z", + "__v": 0 + }, + { + "name": { + "firstName": "Boston", + "lastName": "Langford" + }, + "accessLevel": "user", + "skillsToMatch": [], + "projects": [], + "textingOk": false, + "managedProjects": [], + "isActive": true, + "_id": "5e1d27d9316d2f00172ef090", + "email": "boston@snl.com", + "currentRole": "PM", + "desiredRole": "PM", + "newMember": true, + "firstAttended": "JAN 2020", + "createdDate": "2020-01-14T02:30:49.449Z", + "__v": 0 + }, + { + "name": { + "firstName": "Cole", + "lastName": "Bennett" + }, + "accessLevel": "user", + "skillsToMatch": [], + "projects": [], + "textingOk": false, + "managedProjects": [], + "isActive": true, + "_id": "5e1d2323316d2f00172ef064", + "email": "bennett.cole@gmail.com", + "currentRole": "idk", + "desiredRole": "idk", + "newMember": true, + "firstAttended": "APR 2020", + "createdDate": "2020-01-14T02:10:43.978Z", + "__v": 0 + }, + { + "name": { + "firstName": "Cole", + "lastName": "Bennett" + }, + "accessLevel": "admin", + "skillsToMatch": [], + "projects": [], + "textingOk": false, + "managedProjects": [], + "isActive": true, + "_id": "5e1d2081316d2f00172ef052", + "email": "BENNETT.COLE@GMAIL.COM", + "currentRole": "Dir of MUSIC", + "desiredRole": "None", + "newMember": true, + "firstAttended": "JAN 2020", + "createdDate": "2020-01-14T01:59:29.273Z", + "__v": 0, + "attendanceReason": "Social Justice/Equity" + }, + { + "name": { + "firstName": "John", + "lastName": "Atkins" + }, + "accessLevel": "user", + "skillsToMatch": ["comic"], + "projects": ["Johnny English"], + "textingOk": false, + "managedProjects": [], + "isActive": true, + "_id": "5e1d2137316d2f00172ef056", + "email": "john.j.atkins@gmail.com", + "currentRole": "High School Student", + "desiredRole": "Software/Research", + "newMember": false, + "firstAttended": "DEC 2019", + "createdDate": "2020-01-14T02:02:31.455Z", + "__v": 0, + "attendanceReason": "Open Data", + "currentProject": "New Schools Today" + }, + { + "name": { + "firstName": "Greg", + "lastName": "Smith" + }, + "accessLevel": "user", + "skillsToMatch": [], + "projects": [], + "textingOk": false, + "managedProjects": [], + "isActive": true, + "_id": "5eec3e16411acc00174869a3", + "email": "gregpawpetrol@gmail.com", + "currentRole": "Data Consultant", + "desiredRole": "Data Scientist", + "newMember": false, + "firstAttended": "NOV 2019", + "createdDate": "2020-06-19T04:24:54.887Z", + "__v": 0 + }, + { + "name": { + "firstName": "Ryan", + "lastName": "Gosling" + }, + "accessLevel": "user", + "skillsToMatch": [], + "projects": [], + "textingOk": false, + "managedProjects": [], + "isActive": true, + "_id": "5e30e8a80b9d2300177d3b20", + "email": "rgosling@gmail.com", + "currentRole": "Data Scientist", + "desiredRole": "Data Scientist", + "newMember": false, + "firstAttended": "OCT 2019", + "createdDate": "2020-01-29T02:06:32.192Z", + "__v": 0, + "attendanceReason": "Open Data", + "currentProject": "311 Data" + }, + { + "name": { + "firstName": "Jared", + "lastName": "Maxwell" + }, + "accessLevel": "user", + "skillsToMatch": [], + "projects": [], + "textingOk": false, + "managedProjects": [], + "isActive": true, + "_id": "5e38d3cf8d52770017ae8a91", + "email": "jmax812@max.com", + "currentRole": "freelance dev", + "desiredRole": "employed def", + "newMember": false, + "firstAttended": "JAN 2020", + "createdDate": "2020-02-04T02:15:43.745Z", + "__v": 0, + "attendanceReason": "Civic Engagement", + "currentProject": "VRMS" + }, + { + "name": { + "firstName": "Dexter", + "lastName": "Robinson" + }, + "accessLevel": "user", + "skillsToMatch": [], + "projects": [], + "textingOk": false, + "managedProjects": [], + "isActive": true, + "_id": "5f28c25206f21000177e690a", + "email": "Dexterlab@gmail.com", + "currentRole": "Software Developer", + "desiredRole": "Frontend Developer", + "newMember": true, + "firstAttended": "AUG 2020", + "createdDate": "2020-08-04T02:05:06.309Z", + "__v": 0 + }, + { + "name": { + "firstName": "Sharon", + "lastName": "Wesley" + }, + "accessLevel": "user", + "skillsToMatch": [], + "projects": [], + "textingOk": false, + "managedProjects": [], + "isActive": true, + "_id": "5e4ca746b73a2a001732f49a", + "email": "sharonwesleycodes@gmail.com", + "currentRole": "Student", + "desiredRole": "Programmer", + "newMember": true, + "firstAttended": "JAN 2020", + "createdDate": "2020-02-19T03:11:02.863Z", + "__v": 0 + }, + { + "name": { + "firstName": "Mr", + "lastName": "Awesome" + }, + "accessLevel": "user", + "skillsToMatch": [], + "projects": [], + "textingOk": false, + "managedProjects": [], + "isActive": true, + "_id": "5e421328ccd154001772603b", + "email": "pseudo.randsome@gmail.clm", + "currentRole": "Engineer", + "desiredRole": "God", + "newMember": true, + "firstAttended": "JAN 2020", + "createdDate": "2020-02-11T02:36:24.529Z", + "__v": 0 + }, + { + "name": { + "firstName": "chichi", + "lastName": "hughes" + }, + "accessLevel": "admin", + "skillsToMatch": [], + "projects": [], + "textingOk": false, + "managedProjects": [], + "isActive": true, + "_id": "5ea74d2720cec100179465cf", + "email": "chichi.hughes@gmail.com", + "currentRole": "UX/UI Designer", + "desiredRole": "UX/UI Designer", + "newMember": false, + "firstAttended": "APR 2020", + "createdDate": "2020-04-27T21:22:47.465Z", + "__v": 0, + "attendanceReason": "Open Data" + }, + { + "name": { + "firstName": "asim", + "lastName": "rahman" + }, + "accessLevel": "user", + "skillsToMatch": [], + "projects": [], + "textingOk": false, + "managedProjects": [], + "isActive": true, + "_id": "60d777c20707783dc86d6101", + "email": "asim.radhat@gmail.comdfm34", + "currentRole": "zxc", + "desiredRole": "zxc", + "newMember": true, + "firstAttended": "JUN 2021", + "createdDate": "2021-06-26T18:53:54.935Z", + "__v": 0 + }, + { + "name": { + "firstName": "asim", + "lastName": "rahman" + }, + "accessLevel": "user", + "skillsToMatch": [], + "projects": [], + "textingOk": false, + "managedProjects": [], + "isActive": true, + "_id": "60d95ead26878d41f48aa15f", + "email": "asim.rhalskdfm10923knzdst@gmail.com", + "currentRole": "asldkm", + "desiredRole": "sdlkfm", + "newMember": true, + "createdDate": "2021-06-28T05:31:25.482Z", + "__v": 0 + }, + { + "name": { + "firstName": "Glen", + "lastName": "Steven" + }, + "accessLevel": "user", + "skillsToMatch": [], + "projects": [], + "textingOk": false, + "managedProjects": [], + "isActive": true, + "_id": "61523ca806ddd96310eb3e49", + "email": "kpop51367@bts123.net", + "currentRole": "Test", + "desiredRole": "Test", + "newMember": true, + "firstAttended": "JAN 2021", + "createdDate": "2021-01-27T21:50:32.255Z", + "__v": 0 + }, + { + "name": { + "firstName": "Glen", + "lastName": "Steven" + }, + "accessLevel": "user", + "skillsToMatch": [], + "projects": [], + "textingOk": true, + "managedProjects": [], + "isActive": true, + "_id": "61523fa80e0025656e23371d", + "email": "KPOP51367@BTS123.net", + "currentRole": "Test", + "desiredRole": "Test", + "newMember": true, + "firstAttended": "SEP 2021", + "createdDate": "2021-09-27T22:03:20.465Z", + "__v": 0 + }, + { + "name": { + "firstName": "Arthur", + "lastName": "Doyle" + }, + "accessLevel": "user", + "skillsToMatch": [], + "projects": [], + "textingOk": false, + "managedProjects": [], + "isActive": true, + "_id": "615cbd3a18c36c001e7b03b1", + "email": "Doyle@atemybread.com", + "currentRole": "baker", + "desiredRole": "better baker", + "newMember": true, + "firstAttended": "OCT 2021", + "createdDate": "2021-10-05T21:01:46.941Z", + "__v": 0 + }, + { + "name": { + "firstName": "rare", + "lastName": "person" + }, + "accessLevel": "user", + "skillsToMatch": [], + "projects": [], + "textingOk": false, + "managedProjects": [], + "isActive": true, + "_id": "61947ec1ef53983193f54ae7", + "email": "Rareisreal@fm.com", + "currentRole": "aerga", + "desiredRole": "arga", + "newMember": true, + "firstAttended": "NOV 2021", + "createdDate": "2021-11-17T04:02:09.726Z", + "__v": 0 + }, + { + "name": { + "firstName": "Glen", + "lastName": "Steven" + }, + "accessLevel": "user", + "skillsToMatch": [], + "projects": [], + "textingOk": false, + "managedProjects": [], + "isActive": true, + "_id": "6208327f7da35707d018e13c", + "email": "Stevedude73842@pet.com", + "currentRole": "Test", + "desiredRole": "Test", + "newMember": false, + "firstAttended": "FEB 2022", + "createdDate": "2022-02-12T22:19:43.301Z", + "__v": 0 + }, + { + "name": { + "firstName": "Shaun", + "lastName": "Murphy" + }, + "accessLevel": "admin", + "skillsToMatch": [], + "projects": [], + "textingOk": false, + "managedProjects": [ + "5edeac78ce228b001778facd", + "5edea961ce228b001778faca", + "620859fa46807266c6cbcf20", + "6205ceb5aaf98b0021b3c204", + "619af6c86a8afa609cd5c419" + ], + "isActive": true, + "_id": "620840ad46807266c6cbcf1e", + "email": "smurf@gmail.com", + "currentRole": "dev", + "desiredRole": "dev", + "newMember": false, + "firstAttended": "FEB 2022", + "createdDate": "2022-02-12T23:20:13.331Z", + "__v": 0 + }, + { + "name": { + "firstName": "Tommy", + "lastName": "Smith" + }, + "accessLevel": "admin", + "skillsToMatch": [], + "projects": [], + "textingOk": false, + "managedProjects": [], + "isActive": true, + "_id": "633b9a74d98663001f8b5c46", + "email": "tommy@thatsfabulous.com", + "currentRole": "Supreme Leader", + "desiredRole": "Front end developer", + "newMember": false, + "firstAttended": "OCT 2022", + "createdDate": "2022-10-04T02:29:08.363Z", + "__v": 0 + }, + { + "name": { + "firstName": "Testing", + "lastName": "test12" + }, + "accessLevel": "user", + "skillsToMatch": [], + "projects": [], + "textingOk": false, + "managedProjects": [], + "isActive": true, + "_id": "645986720659c772705c6bcb", + "email": "testrn@gmail.com", + "currentRole": "Test", + "desiredRole": "Test", + "newMember": true, + "firstAttended": "MAY 2023", + "createdDate": "2023-05-08T23:32:02.575Z", + "__v": 0 + }, + { + "name": { + "firstName": "Enola", + "lastName": "Holmes" + }, + "accessLevel": "user", + "skillsToMatch": [], + "projects": [], + "textingOk": false, + "managedProjects": [], + "isActive": true, + "_id": "5e1e764c0ab172001790f804", + "email": "enola@gmail.com", + "currentRole": "front-end web dev", + "desiredRole": "unsure", + "newMember": true, + "firstAttended": "JAN 2020", + "createdDate": "2020-01-15T02:17:48.287Z", + "__v": 0, + "attendanceReason": "Open Data" + }, + { + "name": { + "firstName": "Phoebe", + "lastName": "Phoenix" + }, + "accessLevel": "user", + "skillsToMatch": [], + "projects": [], + "textingOk": false, + "managedProjects": [], + "isActive": true, + "_id": "5e54818892b678001759b45a", + "email": "phoenixcodes@gmail.com", + "currentRole": "Freelance Fullstack JS Dev", + "desiredRole": "Fullstack JS Dev", + "newMember": false, + "firstAttended": "JAN 2020", + "createdDate": "2020-02-25T02:08:08.775Z", + "__v": 0, + "attendanceReason": "Social Justice/Equity", + "currentProject": "VRMS" + }, + { + "name": { + "firstName": "Abby", + "lastName": "Jordan" + }, + "accessLevel": "user", + "skillsToMatch": [], + "projects": [], + "textingOk": false, + "managedProjects": [], + "isActive": true, + "_id": "5f33508b0d62580017d78228", + "email": "hippyjordan@gmail.com", + "currentRole": "Product Manager", + "desiredRole": "Product Manager", + "newMember": false, + "firstAttended": "AUG 2020", + "createdDate": "2020-08-12T02:14:35.725Z", + "__v": 0 + }, + { + "name": { + "firstName": "Susan", + "lastName": "Lee" + }, + "accessLevel": "user", + "skillsToMatch": [], + "projects": [], + "textingOk": false, + "managedProjects": [], + "isActive": true, + "_id": "5e8bd28080fe360017694654", + "email": "xoxosusy@gmail.com", + "currentRole": "Student", + "desiredRole": "Software Engineer", + "newMember": false, + "firstAttended": "NOV 2019", + "createdDate": "2020-04-07T01:08:16.639Z", + "__v": 0 + }, + { + "name": { + "firstName": "Calvin ", + "lastName": "Klien" + }, + "accessLevel": "user", + "skillsToMatch": [], + "projects": [], + "textingOk": false, + "managedProjects": [], + "isActive": true, + "_id": "5ed84b1bb68bcc00179b27de", + "email": "info@ck.com", + "currentRole": "PHP Developer", + "desiredRole": "CEO", + "newMember": true, + "firstAttended": "JUN 2020", + "createdDate": "2020-06-04T01:15:07.602Z", + "__v": 0 + }, + { + "name": { + "firstName": "Sophia", + "lastName": "Yang" + }, + "accessLevel": "user", + "skillsToMatch": [], + "projects": [], + "textingOk": false, + "managedProjects": [], + "isActive": true, + "_id": "5e1e74390ab172001790f7ec", + "email": "slice212@gmail.com", + "currentRole": "applying", + "desiredRole": "Data Science", + "newMember": false, + "firstAttended": "JAN 2020", + "createdDate": "2020-01-15T02:08:57.796Z", + "__v": 0, + "attendanceReason": "Homelessness", + "currentProject": "Host Home" + }, + { + "name": { + "firstName": "Aston", + "lastName": "Martin" + }, + "accessLevel": "user", + "skillsToMatch": [], + "projects": [], + "textingOk": false, + "managedProjects": [], + "isActive": true, + "_id": "5ebdfa8005963b0017c6dd5c", + "email": "rickandmorty@gmail.com", + "currentRole": "Data Analyst/Scientist", + "desiredRole": "Data Analyst/Scientist", + "newMember": false, + "firstAttended": "MAY 2020", + "createdDate": "2020-05-15T02:12:16.545Z", + "__v": 0 + }, + { + "name": { + "firstName": "Selena", + "lastName": "Gomez" + }, + "accessLevel": "user", + "skillsToMatch": [], + "projects": [], + "textingOk": false, + "managedProjects": [], + "isActive": true, + "_id": "5e27ae4b4530cd0017eee422", + "email": "selena.gomez@gmail.com", + "currentRole": "SW Engineer", + "desiredRole": "Developer", + "newMember": true, + "firstAttended": "JAN 2020", + "createdDate": "2020-01-22T02:07:07.734Z", + "__v": 0 + }, + { + "name": { + "firstName": "Amrit", + "lastName": "Mann" + }, + "accessLevel": "user", + "skillsToMatch": [], + "projects": [], + "textingOk": false, + "managedProjects": [], + "isActive": true, + "_id": "5e8bd1aa80fe36001769464d", + "email": "amritmann@gmail.com", + "currentRole": "Dev", + "desiredRole": "Dev", + "newMember": false, + "firstAttended": "APR 2020", + "createdDate": "2020-04-07T01:04:42.306Z", + "__v": 0, + "attendanceReason": "Open Data" + }, + { + "name": { + "firstName": "Jennifer", + "lastName": "Winget" + }, + "accessLevel": "user", + "skillsToMatch": [], + "projects": [], + "textingOk": false, + "managedProjects": [], + "isActive": true, + "_id": "5e4364fdef67e100175c1eff", + "email": "jenny@valorie.com", + "currentRole": "Civic Engagement", + "desiredRole": "Dev Ops", + "newMember": true, + "firstAttended": "JAN 2020", + "createdDate": "2020-02-12T02:37:49.626Z", + "__v": 0 + }, + { + "name": { + "firstName": "asim", + "lastName": "rahman" + }, + "accessLevel": "user", + "skillsToMatch": [], + "projects": [], + "textingOk": false, + "managedProjects": [], + "isActive": true, + "_id": "60d7772c0707783dc86d60ff", + "email": "6k1.nyarly636@gmail.com", + "currentRole": "sdfasd", + "desiredRole": "asdasd", + "newMember": true, + "firstAttended": "JUN 2021", + "createdDate": "2021-06-26T18:51:24.004Z", + "__v": 0 + }, + { + "name": { + "firstName": "Glen", + "lastName": "Clark" + }, + "accessLevel": "user", + "skillsToMatch": [], + "projects": [], + "textingOk": false, + "managedProjects": [], + "isActive": true, + "_id": "61440e3218c36c001e7b03b0", + "email": "xoxoclark@debate.com", + "currentRole": "Test", + "desiredRole": "Test", + "newMember": true, + "firstAttended": "SEP 2021", + "createdDate": "2021-09-17T03:40:34.500Z", + "__v": 0 + }, + { + "name": { + "firstName": "Glen", + "lastName": "Clark" + }, + "accessLevel": "user", + "skillsToMatch": [], + "projects": [], + "textingOk": false, + "managedProjects": [], + "isActive": true, + "_id": "6157d8d979effa437122d670", + "email": "XOXOclark@debate.com", + "currentRole": "Test", + "desiredRole": "Test", + "newMember": true, + "firstAttended": "OCT 2021", + "createdDate": "2021-10-02T03:58:17.700Z", + "__v": 0 + }, + { + "name": { + "firstName": "Larry", + "lastName": "Page" + }, + "accessLevel": "user", + "skillsToMatch": [], + "projects": [], + "textingOk": false, + "managedProjects": [], + "isActive": true, + "_id": "5e435ce7ef67e100175c1ee3", + "email": "Larry@LarryPage.com", + "currentRole": "Student", + "desiredRole": "Python Developer", + "newMember": true, + "firstAttended": "JAN 2020", + "createdDate": "2020-02-12T02:03:19.149Z", + "__v": 0, + "attendanceReason": "Homelessness", + "currentProject": "Food Oasis" + }, + { + "name": { + "firstName": "asim", + "lastName": "rahman" + }, + "accessLevel": "user", + "skillsToMatch": [], + "projects": [], + "textingOk": false, + "managedProjects": [], + "isActive": true, + "_id": "60d7765b7eec843150f7cd7d", + "email": "654k1.nyarly636@gmail.com", + "currentRole": "sdfasd", + "desiredRole": "asdasd", + "newMember": true, + "firstAttended": "JUN 2021", + "createdDate": "2021-06-26T18:47:55.487Z", + "__v": 0 + }, + { + "name": { + "firstName": "Shark", + "lastName": "User" + }, + "accessLevel": "user", + "skillsToMatch": [], + "projects": [], + "textingOk": false, + "managedProjects": [], + "isActive": true, + "_id": "6101db234815993498437084", + "email": "WE@sharktank.com", + "currentRole": "af", + "desiredRole": "argfg", + "newMember": true, + "firstAttended": "JUL 2021", + "createdDate": "2021-07-28T22:33:07.120Z", + "__v": 0 + }, + { + "name": { + "firstName": "JASON", + "lastName": "YANG" + }, + "accessLevel": "user", + "skillsToMatch": [], + "projects": [], + "textingOk": false, + "managedProjects": [], + "isActive": true, + "_id": "5e38d1568d52770017ae8a86", + "email": "JASC68.JYANG@GMAIL.COM", + "currentRole": "DATA SCIENTIST", + "desiredRole": "DATA SCIENTIST", + "newMember": false, + "firstAttended": "SEP 2019", + "createdDate": "2020-02-04T02:05:10.999Z", + "__v": 0 + }, + { + "name": { + "firstName": "Jonathon", + "lastName": "Dooley" + }, + "accessLevel": "user", + "skillsToMatch": [], + "projects": [], + "textingOk": false, + "managedProjects": [], + "isActive": true, + "_id": "5f0540b24f31cf00174d7538", + "email": "DooleyJM0902@student.laccd.edu", + "currentRole": "Student", + "desiredRole": "Developer", + "newMember": false, + "firstAttended": "JUL 2020", + "createdDate": "2020-07-08T03:42:42.621Z", + "__v": 0 + }, + { + "name": { + "firstName": "Jason", + "lastName": "Yang" + }, + "accessLevel": "user", + "skillsToMatch": [], + "projects": [], + "textingOk": false, + "managedProjects": [], + "isActive": true, + "_id": "5e66e6fcbe3e0b001761a814", + "email": "jasc68.jyang@gmail.com", + "currentRole": "data scientist", + "desiredRole": "data scientist", + "newMember": false, + "firstAttended": "SEP 2019", + "createdDate": "2020-03-10T01:01:48.651Z", + "__v": 0, + "attendanceReason": "Open Data", + "currentProject": "None" + }, + { + "name": { + "firstName": "Julia", + "lastName": "Fong" + }, + "accessLevel": "user", + "skillsToMatch": [], + "projects": [], + "textingOk": false, + "managedProjects": [ + "5ec326c7a973810017c0de0c", + "619af6c86a8afa609cd5c419" + ], + "isActive": true, + "_id": "5e435911ef67e100175c1ecb", + "email": "Juliamzfong@gmail.com", + "currentRole": "fellow", + "desiredRole": "front-end developer", + "newMember": true, + "firstAttended": "JAN 2020", + "createdDate": "2020-02-12T01:46:57.788Z", + "__v": 0 + }, + { + "name": { + "firstName": "RYAN", + "lastName": "COLLINS" + }, + "accessLevel": "user", + "skillsToMatch": [], + "projects": [], + "textingOk": false, + "managedProjects": [], + "isActive": true, + "_id": "5e38d10a8d52770017ae8a81", + "email": "RMCOLLINS95@GMAIL.COM", + "currentRole": "NONE", + "desiredRole": "DATA SCIENTIST", + "newMember": true, + "firstAttended": "JAN 2020", + "createdDate": "2020-02-04T02:03:54.970Z", + "__v": 0 + }, + { + "name": { + "firstName": "asim", + "lastName": "rahman" + }, + "accessLevel": "user", + "skillsToMatch": [], + "projects": [], + "textingOk": false, + "managedProjects": [], + "isActive": true, + "_id": "60d775d47eec843150f7cd7b", + "email": "6541.ny636fm@gmail.com", + "currentRole": "sdfasd", + "desiredRole": "asdasd", + "newMember": true, + "firstAttended": "JUN 2021", + "createdDate": "2021-06-26T18:45:40.564Z", + "__v": 0 + }, + { + "name": { + "firstName": "asim", + "lastName": "rahman" + }, + "accessLevel": "user", + "skillsToMatch": [], + "projects": [], + "textingOk": false, + "managedProjects": [], + "isActive": true, + "_id": "60d76d8a7eec843150f7cd6c", + "email": "olafake908@gmail.com", + "currentRole": "asd", + "desiredRole": "asd", + "newMember": true, + "firstAttended": "JUN 2021", + "createdDate": "2021-06-26T18:10:18.494Z", + "__v": 0 + }, + { + "name": { + "firstName": "asim", + "lastName": "rahman" + }, + "accessLevel": "user", + "skillsToMatch": [], + "projects": [], + "textingOk": false, + "managedProjects": [], + "isActive": true, + "_id": "60d775727eec843150f7cd76", + "email": "6541.ny619@gmail.com", + "currentRole": "sdfasd", + "desiredRole": "asdasd", + "newMember": true, + "firstAttended": "JUN 2021", + "createdDate": "2021-06-26T18:44:02.536Z", + "__v": 0 + }, + { + "name": { + "firstName": "Bojack", + "lastName": "Horseman" + }, + "accessLevel": "user", + "skillsToMatch": [], + "projects": [], + "textingOk": false, + "managedProjects": [], + "isActive": true, + "_id": "64163f87ffac46b48a4abc87", + "email": "bojack@gmail.com", + "currentRole": "horse", + "desiredRole": "man", + "newMember": true, + "firstAttended": "MAR 2023", + "createdDate": "2023-03-18T22:47:35.138Z", + "__v": 0 + }, + { + "name": { + "firstName": "Chris", + "lastName": "Schmitz" + }, + "accessLevel": "user", + "skillsToMatch": [], + "projects": [], + "textingOk": false, + "managedProjects": [], + "isActive": true, + "_id": "5e30ee120b9d2300177d3b38", + "email": "Christopher.A.Schmitz@gmail.com", + "currentRole": "Mobile Engineering Manager", + "desiredRole": "Mobile Engineering Manager", + "newMember": false, + "firstAttended": "MAY 2017", + "createdDate": "2020-01-29T02:29:38.664Z", + "__v": 0, + "attendanceReason": "Civic Engagement", + "currentProject": "None" + }, + { + "name": { + "firstName": "asim", + "lastName": "rahman" + }, + "accessLevel": "user", + "skillsToMatch": [], + "projects": [], + "textingOk": false, + "managedProjects": [], + "isActive": true, + "_id": "60d77e950707783dc86d6103", + "email": "234asim.radhat@gmail.com34a", + "currentRole": "sdf", + "desiredRole": "sdf", + "newMember": true, + "firstAttended": "JUN 2021", + "createdDate": "2021-06-26T19:23:01.866Z", + "__v": 0 + } + ] \ No newline at end of file From 2c09b8d87ee03bc1dd3351318a81c9dff1ce11f7 Mon Sep 17 00:00:00 2001 From: pluto <99231462+pluto-bell@users.noreply.github.com> Date: Mon, 11 Nov 2024 16:35:20 -0800 Subject: [PATCH 03/12] .toLowerString() added --- client/src/components/auth/Auth.jsx | 4 ++-- client/src/components/dashboard/AddTeamMember.jsx | 2 +- client/src/components/presentational/newUserForm.jsx | 2 +- client/src/components/presentational/returnUserForm.jsx | 2 +- client/src/pages/CheckInForm.jsx | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/client/src/components/auth/Auth.jsx b/client/src/components/auth/Auth.jsx index e0427795c..908b0b9be 100644 --- a/client/src/components/auth/Auth.jsx +++ b/client/src/components/auth/Auth.jsx @@ -76,7 +76,7 @@ const Auth = () => { }; function handleInputChange(e) { - const inputValue = e.currentTarget.value.toString(); + const inputValue = e.currentTarget.value.toString().toLowerCase(); validateEmail(); if (!inputValue) { setIsDisabled(true); @@ -84,7 +84,7 @@ const Auth = () => { } else { setIsDisabled(false); setIsError(false); - setEmail(e.currentTarget.value.toString()); + setEmail(e.currentTarget.value.toString().toLowerCase()); } } diff --git a/client/src/components/dashboard/AddTeamMember.jsx b/client/src/components/dashboard/AddTeamMember.jsx index 61e8143d8..d0b1fb1d4 100644 --- a/client/src/components/dashboard/AddTeamMember.jsx +++ b/client/src/components/dashboard/AddTeamMember.jsx @@ -4,7 +4,7 @@ import "../../sass/AddTeamMember.scss"; const AddTeamMember = (props) => { const [email, setEmail] = useState(""); - const handleInputChange = (e) => setEmail(e.currentTarget.value); + const handleInputChange = (e) => setEmail(e.currentTarget.value.toLowerCase()); return (
diff --git a/client/src/components/presentational/newUserForm.jsx b/client/src/components/presentational/newUserForm.jsx index 1d86acf3f..178be15fb 100644 --- a/client/src/components/presentational/newUserForm.jsx +++ b/client/src/components/presentational/newUserForm.jsx @@ -63,7 +63,7 @@ const NewUserForm = (props) => { type="email" name="email" placeholder="Email Address" - value={props.formInput.email.toString()} + value={props.formInput.email.toString().toLowerCase()} onChange={props.handleInputChange} aria-label="Email Address" required diff --git a/client/src/components/presentational/returnUserForm.jsx b/client/src/components/presentational/returnUserForm.jsx index bde529317..f0c8f4259 100644 --- a/client/src/components/presentational/returnUserForm.jsx +++ b/client/src/components/presentational/returnUserForm.jsx @@ -21,7 +21,7 @@ const ReturnUserForm = (props) => { type="email" name="email" placeholder="Email Address" - value={props.formInput.email.toString()} + value={props.formInput.email.toString().toLowerCase()} onChange={props.handleInputChange} aria-label="Email Address" required diff --git a/client/src/pages/CheckInForm.jsx b/client/src/pages/CheckInForm.jsx index 4d80a61eb..7dc6be26e 100644 --- a/client/src/pages/CheckInForm.jsx +++ b/client/src/pages/CheckInForm.jsx @@ -307,7 +307,7 @@ const CheckInForm = (props) => { const currMonth = parseInt(moment().format('MM')); const yearJoined = parseInt(year); const monthJoined = parseInt(moment(month + ' 9, 2020').format('MM')); - + if ( yearJoined > currYear || (yearJoined === currYear && monthJoined > currMonth) From 2a4b402f0e90d3add3c506a43c4eb66160ea05c6 Mon Sep 17 00:00:00 2001 From: pluto <99231462+pluto-bell@users.noreply.github.com> Date: Mon, 11 Nov 2024 16:48:58 -0800 Subject: [PATCH 04/12] validate email addition --- client/src/pages/CheckInForm.jsx | 7 +++++++ client/src/utils/validateEmail.js | 4 ++++ 2 files changed, 11 insertions(+) create mode 100644 client/src/utils/validateEmail.js diff --git a/client/src/pages/CheckInForm.jsx b/client/src/pages/CheckInForm.jsx index 7dc6be26e..024063e75 100644 --- a/client/src/pages/CheckInForm.jsx +++ b/client/src/pages/CheckInForm.jsx @@ -4,6 +4,7 @@ import NewUserForm from './../components/presentational/newUserForm'; import ReturnUserForm from './../components/presentational/returnUserForm'; import { REACT_APP_CUSTOM_REQUEST_HEADER as headerToSend } from '../utils/globalSettings'; import { format } from 'date-fns'; +import { validateEmail } from '../utils/validateEmail' import '../sass/CheckIn.scss'; @@ -303,6 +304,12 @@ const CheckInForm = (props) => { ready = false; } + if (userForm.email.length > 0 && !validateEmail(userForm.email) ) { + setIsError(true); + setErrorMessage("Please use a valid email address"); + ready = false; + } + const currYear = parseInt(moment().format('YYYY')); const currMonth = parseInt(moment().format('MM')); const yearJoined = parseInt(year); diff --git a/client/src/utils/validateEmail.js b/client/src/utils/validateEmail.js new file mode 100644 index 000000000..ff78a111b --- /dev/null +++ b/client/src/utils/validateEmail.js @@ -0,0 +1,4 @@ +export const validateEmail = (email) => { + const re = /^[^\s@]+@[^\s@]+\.[^\s@]+$/; + return re.test(email); + } \ No newline at end of file From 66f933924c5109f5704273066fe75c71030483d5 Mon Sep 17 00:00:00 2001 From: pluto <99231462+pluto-bell@users.noreply.github.com> Date: Mon, 11 Nov 2024 17:00:27 -0800 Subject: [PATCH 05/12] removed validate email, read note in pr --- client/src/pages/CheckInForm.jsx | 7 ------- client/src/utils/validateEmail.js | 4 ---- 2 files changed, 11 deletions(-) delete mode 100644 client/src/utils/validateEmail.js diff --git a/client/src/pages/CheckInForm.jsx b/client/src/pages/CheckInForm.jsx index 024063e75..7dc6be26e 100644 --- a/client/src/pages/CheckInForm.jsx +++ b/client/src/pages/CheckInForm.jsx @@ -4,7 +4,6 @@ import NewUserForm from './../components/presentational/newUserForm'; import ReturnUserForm from './../components/presentational/returnUserForm'; import { REACT_APP_CUSTOM_REQUEST_HEADER as headerToSend } from '../utils/globalSettings'; import { format } from 'date-fns'; -import { validateEmail } from '../utils/validateEmail' import '../sass/CheckIn.scss'; @@ -304,12 +303,6 @@ const CheckInForm = (props) => { ready = false; } - if (userForm.email.length > 0 && !validateEmail(userForm.email) ) { - setIsError(true); - setErrorMessage("Please use a valid email address"); - ready = false; - } - const currYear = parseInt(moment().format('YYYY')); const currMonth = parseInt(moment().format('MM')); const yearJoined = parseInt(year); diff --git a/client/src/utils/validateEmail.js b/client/src/utils/validateEmail.js deleted file mode 100644 index ff78a111b..000000000 --- a/client/src/utils/validateEmail.js +++ /dev/null @@ -1,4 +0,0 @@ -export const validateEmail = (email) => { - const re = /^[^\s@]+@[^\s@]+\.[^\s@]+$/; - return re.test(email); - } \ No newline at end of file From 2975599b1ec29916ef8ab84d3fbfed692643c39e Mon Sep 17 00:00:00 2001 From: Danny Prikazsky Date: Fri, 11 Jul 2025 14:33:33 -0700 Subject: [PATCH 06/12] Enforce that emails can only be stored in lowercase --- backend/models/user.model.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/models/user.model.js b/backend/models/user.model.js index 4c2005cc6..6273320ff 100644 --- a/backend/models/user.model.js +++ b/backend/models/user.model.js @@ -8,7 +8,7 @@ const userSchema = mongoose.Schema({ firstName: { type: String }, lastName: { type: String }, }, - email: { type: String, unique: true }, + email: { type: String, unique: true, lowercase: true }, accessLevel: { type: String, enum: ["user", "admin", "superadmin"], // restricts values to "user", "admin" and "superadmin" From 6631aca921c02366a8acfea236a93f0df6fd21fd Mon Sep 17 00:00:00 2001 From: Danny Prikazsky Date: Mon, 14 Jul 2025 10:34:03 -0700 Subject: [PATCH 07/12] Implement script to identify problem users --- backend/scripts/python/env/.gitignore | 1 + .../python/env/Duplicate Removal.ipynb | 247 ++++++++++++++++++ 2 files changed, 248 insertions(+) create mode 100644 backend/scripts/python/env/.gitignore create mode 100644 backend/scripts/python/env/Duplicate Removal.ipynb diff --git a/backend/scripts/python/env/.gitignore b/backend/scripts/python/env/.gitignore new file mode 100644 index 000000000..150f68c80 --- /dev/null +++ b/backend/scripts/python/env/.gitignore @@ -0,0 +1 @@ +*/* diff --git a/backend/scripts/python/env/Duplicate Removal.ipynb b/backend/scripts/python/env/Duplicate Removal.ipynb new file mode 100644 index 000000000..2b6737a0d --- /dev/null +++ b/backend/scripts/python/env/Duplicate Removal.ipynb @@ -0,0 +1,247 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 22, + "id": "bf6a5708-01b8-4439-b085-996a0b9309df", + "metadata": {}, + "outputs": [], + "source": [ + "import requests\n", + "import json\n", + "from dotenv import load_dotenv\n", + "import os\n", + "import re\n", + "from datetime import datetime\n", + "\n", + "load_dotenv()\n", + "custom_request_header = os.getenv(\"CUSTOM_REQUEST_HEADER\")" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "3387bab6-f11c-47c2-9c7a-def83999f50e", + "metadata": {}, + "outputs": [], + "source": [ + "USER_API_URL = 'http://localhost:3000/api/users'\n", + "HEADERS = { \"x-customrequired-header\": custom_request_header }" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "2345b8e1-5601-4852-89c0-7e01f1b15e04", + "metadata": {}, + "outputs": [], + "source": [ + "# Get a List of all users\n", + "r = requests.get(USER_API_URL, headers=HEADERS)\n", + "users = json.loads(r.content)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "0fb71fe2-9976-4d24-b308-d9f511d01192", + "metadata": {}, + "outputs": [], + "source": [ + "user_dict = {}\n", + "for user in users:\n", + " user_dict[user['_id']] = user" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "8c5646cc-e708-4212-b280-d56711d71f64", + "metadata": {}, + "outputs": [], + "source": [ + "def identify_problem_users(users):\n", + " users_with_capital_emails = [user for user in users if re.compile('[A-Z]').search(user['email'])]\n", + "\n", + " potential_duplicate_emails = set([user['email'].lower() for user in users_with_capital_emails])\n", + "\n", + " problem_users = {}\n", + " for user in users:\n", + " current_email = user['email'].lower()\n", + " if current_email in potential_duplicate_emails:\n", + " if problem_users.get(current_email, None) is not None:\n", + " problem_users[current_email].append(user['_id'])\n", + " else:\n", + " problem_users[current_email] = [user['_id']]\n", + "\n", + " non_duped_capital_emails_with_ids = set([(email, problem_users[email][0]) for email in problem_users.keys() if len(problem_users[email]) == 1])\n", + "\n", + " for email, user_id in non_duped_capital_emails_with_ids:\n", + " problem_users.pop(email)\n", + "\n", + " return problem_users, non_duped_capital_emails_with_ids" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "feae3b2e-8bb8-43d2-9056-5db90b44708f", + "metadata": {}, + "outputs": [], + "source": [ + "duped_emails, non_duped_capital_emails_with_ids = identify_problem_users(users)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f8f0c346-9ac5-4e05-b3d7-25e68d6cbcf7", + "metadata": {}, + "outputs": [], + "source": [ + "duped_emails" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7b5d62f5-3625-4693-9471-8d49e7f4b6fe", + "metadata": {}, + "outputs": [], + "source": [ + "non_duped_capital_emails_with_ids" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "dba2cc1d-2df2-4322-a9c8-dc7351e9a1ac", + "metadata": {}, + "outputs": [], + "source": [ + "def update_user_email(email, user_id):\n", + " r = requests.patch(USER_API_URL + '/' + user_id, json={'email': email}, headers=HEADERS)\n", + " print(r.content)\n", + " \n", + "def fix_non_duped_capital_emails(emails_with_ids):\n", + " for email, user_id in emails_with_ids:\n", + " print(email, user_id)\n", + " update_user_email(user_id, email)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "297a5ba1-3b0e-402a-86b4-608f47004eea", + "metadata": {}, + "outputs": [], + "source": [ + "users[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "040b08df-fb94-4646-8811-a8da2235025c", + "metadata": {}, + "outputs": [], + "source": [ + "def determine_canonical_id_for_duped_email(duped_email_ids, user_dict):\n", + " canonical_id = ''\n", + " oldest_creation_date = datetime.now()\n", + " for user_id in duped_email_ids:\n", + " current_creation_date = datetime.strptime(user_dict[user_id]['createdDate'], '%Y-%m-%dT%H:%M:%S.%fZ')\n", + " if current_creation_date < oldest_creation_date:\n", + " oldest_creation_date = current_creation_date\n", + " canonical_id = user_id\n", + "\n", + " return canonical_id" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "23438283-6454-40d4-b6fc-e64bb5358642", + "metadata": {}, + "outputs": [], + "source": [ + "determine_canonical_id_for_duped_email(duped_emails['dannyprikaz@gmail.com'], user_dict)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277f6b1e-8869-4808-83d2-3046f770cd17", + "metadata": {}, + "outputs": [], + "source": [ + "duped_emails" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "0e68adcf-39a0-4b5a-a7e6-adf67305e624", + "metadata": {}, + "outputs": [], + "source": [ + "r = requests.get('http://localhost:3000/api/projectteammembers', headers=HEADERS)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "aab2c929-bc98-4683-9812-bd03fdfa837c", + "metadata": {}, + "outputs": [], + "source": [ + "r.content" + ] + }, + { + "cell_type": "markdown", + "id": "b14a845d-5e6f-457f-b828-ae838d431633", + "metadata": {}, + "source": [ + "# We need to find all of the other documents in the database that reference the duplicated users.\n", + "\n", + "### Database objects that reference User Ids:\n", + "- `checkIn`: checkIns have a userId and an eventId, but the API does not expose endpoints for deleting or updating checkIns. We could keep the IDs of every checkIn with a duplicated user and run them through a script that directly accesses the mongooes object, or we could attempt to access Mongo directly in this script\n", + "- `event`: events have a field for owner with an ownerId, which is supposed to be \"id of user who created event.\" None of the events in the database have the owner field filled in.\n", + "- `project`: projects have a field called `managedByUsers`. For most documents in the database, this is an empty array. For the ones where it is not an empty array, it does not contain any valid userIds.\n", + "- `projectTeamMember`: This seems like it should have plenty of data containing userIds, but there are no projectTeamMember documents in the database\n", + "- `recurringEvent`: Similar to event, this has a field for owner with ownerId. It is set to default to '123456', and there are no documents in the database that don't have that value.\n", + "- " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "75612bd3-2e6a-4198-9b1c-5732bd7451c2", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.1" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 5017c2e3587b6d5af6c20419ae1bfe3957c2222f Mon Sep 17 00:00:00 2001 From: Danny Prikazsky Date: Mon, 28 Jul 2025 19:24:03 -0700 Subject: [PATCH 08/12] Add method for merging duplicate users --- backend/scripts/python/env/.gitignore | 3 +- .../python/env/Duplicate Removal.ipynb | 176 +++++++++++++++--- 2 files changed, 153 insertions(+), 26 deletions(-) diff --git a/backend/scripts/python/env/.gitignore b/backend/scripts/python/env/.gitignore index 150f68c80..f514b74c5 100644 --- a/backend/scripts/python/env/.gitignore +++ b/backend/scripts/python/env/.gitignore @@ -1 +1,2 @@ -*/* +# Created by venv; see https://docs.python.org/3/library/venv.html +* diff --git a/backend/scripts/python/env/Duplicate Removal.ipynb b/backend/scripts/python/env/Duplicate Removal.ipynb index 2b6737a0d..4807f58b1 100644 --- a/backend/scripts/python/env/Duplicate Removal.ipynb +++ b/backend/scripts/python/env/Duplicate Removal.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 22, + "execution_count": 42, "id": "bf6a5708-01b8-4439-b085-996a0b9309df", "metadata": {}, "outputs": [], @@ -20,7 +20,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 43, "id": "3387bab6-f11c-47c2-9c7a-def83999f50e", "metadata": {}, "outputs": [], @@ -31,7 +31,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 44, "id": "2345b8e1-5601-4852-89c0-7e01f1b15e04", "metadata": {}, "outputs": [], @@ -43,7 +43,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 45, "id": "0fb71fe2-9976-4d24-b308-d9f511d01192", "metadata": {}, "outputs": [], @@ -55,7 +55,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 46, "id": "8c5646cc-e708-4212-b280-d56711d71f64", "metadata": {}, "outputs": [], @@ -84,7 +84,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 47, "id": "feae3b2e-8bb8-43d2-9056-5db90b44708f", "metadata": {}, "outputs": [], @@ -94,27 +94,70 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 48, "id": "f8f0c346-9ac5-4e05-b3d7-25e68d6cbcf7", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "{'atkendis@gmail.com': ['5e27b1e54530cd0017eee431',\n", + " '5e30ee300b9d2300177d3b3a'],\n", + " 'eli.j.selkin@gmail.com': ['5e7965101e29ad00179399bb',\n", + " '5e7024f88e1bee00178aa1e0'],\n", + " 'jasc68.jyang@gmail.com': ['5e38d1568d52770017ae8a86',\n", + " '5e66e6fcbe3e0b001761a814'],\n", + " 'trillium@hatsfabulous.com': ['5e965e554e2fc70017aa3970',\n", + " '633b9a74d98663001f8b5c46'],\n", + " 'dannyprikaz@gmail.com': ['678f122e4c6f61002a1e5e68',\n", + " '6871afe1e6bf590aede8f8da',\n", + " '6871b00ee6bf590aede8f8db']}" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "duped_emails" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 49, "id": "7b5d62f5-3625-4693-9471-8d49e7f4b6fe", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "{('acanderson358@gmail.com', '5e4c995eb73a2a001732f47e'),\n", + " ('akib.rhasast@gmail.com', '60d6a4e82e675e4a90e9ca92'),\n", + " ('christopher.a.schmitz@gmail.com', '5e30ee120b9d2300177d3b38'),\n", + " ('dannydaekim@gmail.com', '5f28c25206f21000177e690a'),\n", + " ('dedre@sharklasers.com', '6101d94c4815993498437083'),\n", + " ('dpease@dataminr.com', '5e1d2283316d2f00172ef05a'),\n", + " ('jbarib@gmail.com', '5e27abf74530cd0017eee417'),\n", + " ('juliamzfong@gmail.com', '5e435911ef67e100175c1ecb'),\n", + " ('kphowley@gmail.com', '5e4c9b59b73a2a001732f487'),\n", + " ('rmcollins95@gmail.com', '5e38d10a8d52770017ae8a81'),\n", + " ('scott@scottlarsen.com', '5e435ce7ef67e100175c1ee3'),\n", + " ('tywe@sharklasers.com', '6101db234815993498437084')}" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "non_duped_capital_emails_with_ids" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 50, "id": "dba2cc1d-2df2-4322-a9c8-dc7351e9a1ac", "metadata": {}, "outputs": [], @@ -126,7 +169,7 @@ "def fix_non_duped_capital_emails(emails_with_ids):\n", " for email, user_id in emails_with_ids:\n", " print(email, user_id)\n", - " update_user_email(user_id, email)\n" + " update_user_email(user_id, email)" ] }, { @@ -141,7 +184,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 51, "id": "040b08df-fb94-4646-8811-a8da2235025c", "metadata": {}, "outputs": [], @@ -160,42 +203,125 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 52, "id": "23438283-6454-40d4-b6fc-e64bb5358642", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'678f122e4c6f61002a1e5e68'" + ] + }, + "execution_count": 52, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "determine_canonical_id_for_duped_email(duped_emails['dannyprikaz@gmail.com'], user_dict)" ] }, { "cell_type": "code", - "execution_count": null, - "id": "277f6b1e-8869-4808-83d2-3046f770cd17", + "execution_count": 34, + "id": "b2e9128e-b338-4456-ae0d-8772621536dd", "metadata": {}, "outputs": [], "source": [ - "duped_emails" + "def merge_users(canonical_user_id, user_id_2, user_dict):\n", + " canonical_user = user_dict[canonical_user_id]\n", + " duplicate_user = user_dict[user_id_2]\n", + "\n", + " # For any list fields, combine them\n", + " list_fields = ['skillsToMatch', 'projects', 'managedProjects']\n", + " for field in list_fields:\n", + " canonical_user[field] += duplicate_user[field]\n", + "\n", + " # For boolean fields, set to true if either is true\n", + " bool_fields = ['textingOk', 'isActive', 'newMember']\n", + " for field in bool_fields:\n", + " canonical_user[field] = canonical_user[field] or duplicate_user[field]\n", + "\n", + " # For fields about roles, take the most recent information\n", + " take_the_newer_fields = ['currentRole', 'desiredRole']\n", + " for field in take_the_newer_fields:\n", + " if len(duplicate_user.get(field, '')) > 0:\n", + " canonical_user[field] = duplicat_user[field]\n", + "\n", + " # Take the highest access level\n", + " access_level = ['user', 'admin', 'superadmin']\n", + " highest_access_level = max(access_level.index(canonical_user['accessLevel']), access_level.index(duplicate_user['accessLevel']))\n", + " canonical_user['accessLevel'] = access_level[highest_access_level]\n", + "\n", + " # Make user that email is all lower case\n", + " canonical_user['email'] = canonical_user['email'].lower()\n", + " \n", + " \n", + " print(canonical_user)" ] }, { "cell_type": "code", - "execution_count": 39, - "id": "0e68adcf-39a0-4b5a-a7e6-adf67305e624", + "execution_count": 23, + "id": "8756f5eb-1b49-428b-aec7-677b14b3cb04", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'name': {'firstName': 'Danny', 'lastName': 'Prikazsky'}, 'accessLevel': 'admin', 'skillsToMatch': [], 'projects': [], 'textingOk': False, 'managedProjects': [], 'isActive': True, '_id': '678f122e4c6f61002a1e5e68', 'email': 'dannyprikaz@gmail.com', 'currentRole': 'Full Stack Developer', 'desiredRole': 'Full Stack Developer', 'newMember': True, 'firstAttended': 'JAN 2025', 'createdDate': '2025-01-21T03:19:10.548Z', '__v': 0}\n" + ] + } + ], "source": [ - "r = requests.get('http://localhost:3000/api/projectteammembers', headers=HEADERS)" + "merge_users(duped_emails['dannyprikaz@gmail.com'][0], duped_emails['dannyprikaz@gmail.com'][1], user_dict)" ] }, { "cell_type": "code", - "execution_count": null, - "id": "aab2c929-bc98-4683-9812-bd03fdfa837c", + "execution_count": 36, + "id": "32456806-58b9-4295-9f83-fd01f65741f0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'name': {'firstName': 'Danny', 'lastName': 'Prikazsky'},\n", + " 'accessLevel': 'admin',\n", + " 'skillsToMatch': [],\n", + " 'projects': [],\n", + " 'textingOk': False,\n", + " 'managedProjects': [],\n", + " 'isActive': True,\n", + " '_id': '6871b00ee6bf590aede8f8db',\n", + " 'email': 'dannypRikaz@gmail.com',\n", + " 'currentRole': 'Full Stack Developer',\n", + " 'desiredRole': 'Full Stack Developer',\n", + " 'newMember': True,\n", + " 'firstAttended': 'JAN 2025',\n", + " 'createdDate': '2025-01-21T03:19:10.548Z',\n", + " '__v': 0}" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "user_dict[duped_emails['dannyprikaz@gmail.com'][2]]" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "0e68adcf-39a0-4b5a-a7e6-adf67305e624", "metadata": {}, "outputs": [], "source": [ - "r.content" + "r = requests.get('http://localhost:3000/api/projectteammembers', headers=HEADERS)" ] }, { From a6ff776917567b67dfec2dc38010a2c5952303f4 Mon Sep 17 00:00:00 2001 From: Danny Prikazsky Date: Sat, 2 Aug 2025 18:58:36 -0700 Subject: [PATCH 09/12] Add test to ensure user model sets emails to lowercase --- backend/models/user.model.test.js | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/backend/models/user.model.test.js b/backend/models/user.model.test.js index 03d37f79e..ed802e2e9 100644 --- a/backend/models/user.model.test.js +++ b/backend/models/user.model.test.js @@ -96,6 +96,19 @@ describe('Unit tests for User Model', () => { expect(error.errors.accessLevel).toBeDefined(); }); + it('should enforce that emails are stored in lowercase', async () => { + // Create a mock user with an uppercase email + const uppercaseEmail = 'TEST@test.com'; + const mockUser = new User({ + email: uppercaseEmail, + }); + + mockUser.validate(); + // Tests + expect(mockUser.email).toBe(uppercaseEmail.toLowerCase()); + }); + + it('should pass validation with valid user data', async () => { // Create a mock user with valid data const mockUser = new User({ From 06ed72f9bcebf1a7ed3abf5fe7c14333e084c326 Mon Sep 17 00:00:00 2001 From: Danny Prikazsky Date: Mon, 4 Aug 2025 19:15:01 -0700 Subject: [PATCH 10/12] Add functionality to update checkins with new user_id --- .../python/env/Duplicate Removal.ipynb | 546 +++++++++++++----- backend/scripts/python/env/README.md | 69 +++ backend/scripts/python/env/requirements.txt | 104 ++++ 3 files changed, 582 insertions(+), 137 deletions(-) create mode 100644 backend/scripts/python/env/README.md create mode 100644 backend/scripts/python/env/requirements.txt diff --git a/backend/scripts/python/env/Duplicate Removal.ipynb b/backend/scripts/python/env/Duplicate Removal.ipynb index 4807f58b1..b76a6810c 100644 --- a/backend/scripts/python/env/Duplicate Removal.ipynb +++ b/backend/scripts/python/env/Duplicate Removal.ipynb @@ -1,8 +1,20 @@ { "cells": [ + { + "cell_type": "markdown", + "id": "377dcf28-dc41-416c-85bd-03c723ac73c5", + "metadata": {}, + "source": [ + "# Setup\n", + "\n", + "For dev, you must have the backend api running on your computer. For prod, please change USER_API_URL to reflect the production url.\n", + "\n", + "Please also configure the `x-custom-required-header` within your `.env` file to have the correct value." + ] + }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 52, "id": "bf6a5708-01b8-4439-b085-996a0b9309df", "metadata": {}, "outputs": [], @@ -13,6 +25,7 @@ "import os\n", "import re\n", "from datetime import datetime\n", + "from functools import reduce\n", "\n", "load_dotenv()\n", "custom_request_header = os.getenv(\"CUSTOM_REQUEST_HEADER\")" @@ -20,7 +33,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 2, "id": "3387bab6-f11c-47c2-9c7a-def83999f50e", "metadata": {}, "outputs": [], @@ -29,9 +42,19 @@ "HEADERS = { \"x-customrequired-header\": custom_request_header }" ] }, + { + "cell_type": "markdown", + "id": "0c952dc5-c39e-4337-9043-16c1dbce38b3", + "metadata": {}, + "source": [ + "## Retrieve Users\n", + "\n", + "Retrieve a list of all users and the format it into a dictionary where users are hashed to their _id." + ] + }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 42, "id": "2345b8e1-5601-4852-89c0-7e01f1b15e04", "metadata": {}, "outputs": [], @@ -43,7 +66,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 43, "id": "0fb71fe2-9976-4d24-b308-d9f511d01192", "metadata": {}, "outputs": [], @@ -53,9 +76,19 @@ " user_dict[user['_id']] = user" ] }, + { + "cell_type": "markdown", + "id": "e8658c5b-dc98-4954-befe-ddfc54668a25", + "metadata": {}, + "source": [ + "## Identify Capitalized Emails\n", + "\n", + "Create a function that identifies which users have capital characters in their email addresses. This function will return a dictionary of user ids hashed to the email that they all share called `duped_emails` and a set of tuples for capitalized emails addresses that don't have multiple user ids called `non_duped_capital_emails_with_ids`." + ] + }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 5, "id": "8c5646cc-e708-4212-b280-d56711d71f64", "metadata": {}, "outputs": [], @@ -84,7 +117,7 @@ }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 6, "id": "feae3b2e-8bb8-43d2-9056-5db90b44708f", "metadata": {}, "outputs": [], @@ -94,144 +127,105 @@ }, { "cell_type": "code", - "execution_count": 48, + "execution_count": null, "id": "f8f0c346-9ac5-4e05-b3d7-25e68d6cbcf7", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'atkendis@gmail.com': ['5e27b1e54530cd0017eee431',\n", - " '5e30ee300b9d2300177d3b3a'],\n", - " 'eli.j.selkin@gmail.com': ['5e7965101e29ad00179399bb',\n", - " '5e7024f88e1bee00178aa1e0'],\n", - " 'jasc68.jyang@gmail.com': ['5e38d1568d52770017ae8a86',\n", - " '5e66e6fcbe3e0b001761a814'],\n", - " 'trillium@hatsfabulous.com': ['5e965e554e2fc70017aa3970',\n", - " '633b9a74d98663001f8b5c46'],\n", - " 'dannyprikaz@gmail.com': ['678f122e4c6f61002a1e5e68',\n", - " '6871afe1e6bf590aede8f8da',\n", - " '6871b00ee6bf590aede8f8db']}" - ] - }, - "execution_count": 48, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "duped_emails" ] }, { "cell_type": "code", - "execution_count": 49, + "execution_count": null, "id": "7b5d62f5-3625-4693-9471-8d49e7f4b6fe", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{('acanderson358@gmail.com', '5e4c995eb73a2a001732f47e'),\n", - " ('akib.rhasast@gmail.com', '60d6a4e82e675e4a90e9ca92'),\n", - " ('christopher.a.schmitz@gmail.com', '5e30ee120b9d2300177d3b38'),\n", - " ('dannydaekim@gmail.com', '5f28c25206f21000177e690a'),\n", - " ('dedre@sharklasers.com', '6101d94c4815993498437083'),\n", - " ('dpease@dataminr.com', '5e1d2283316d2f00172ef05a'),\n", - " ('jbarib@gmail.com', '5e27abf74530cd0017eee417'),\n", - " ('juliamzfong@gmail.com', '5e435911ef67e100175c1ecb'),\n", - " ('kphowley@gmail.com', '5e4c9b59b73a2a001732f487'),\n", - " ('rmcollins95@gmail.com', '5e38d10a8d52770017ae8a81'),\n", - " ('scott@scottlarsen.com', '5e435ce7ef67e100175c1ee3'),\n", - " ('tywe@sharklasers.com', '6101db234815993498437084')}" - ] - }, - "execution_count": 49, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "non_duped_capital_emails_with_ids" ] }, + { + "cell_type": "markdown", + "id": "c762e074-04f9-4590-bef5-f4eaac0d3ac6", + "metadata": {}, + "source": [ + "## Fixing non-duped emails\n", + "\n", + "These functions will use the API to update user documents in the database that have an email with a capitalized character. To fix all such emails, run `fix_non_duped_capital_emails(non_duped_capital_emails_with_ids)`" + ] + }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 9, "id": "dba2cc1d-2df2-4322-a9c8-dc7351e9a1ac", "metadata": {}, "outputs": [], "source": [ - "def update_user_email(email, user_id):\n", - " r = requests.patch(USER_API_URL + '/' + user_id, json={'email': email}, headers=HEADERS)\n", + "def update_user(user_id, user_data):\n", + " r = requests.patch(USER_API_URL + '/' + user_id, json=user_data, headers=HEADERS)\n", " print(r.content)\n", " \n", "def fix_non_duped_capital_emails(emails_with_ids):\n", " for email, user_id in emails_with_ids:\n", " print(email, user_id)\n", - " update_user_email(user_id, email)" + " update_user(user_id, {'email': email})" ] }, { "cell_type": "code", "execution_count": null, - "id": "297a5ba1-3b0e-402a-86b4-608f47004eea", + "id": "c8c5313c-05fd-4121-87ce-b4e175a112d6", "metadata": {}, "outputs": [], "source": [ - "users[0]" + "fix_non_duped_capital_emails(non_duped_capital_emails_with_ids)" + ] + }, + { + "cell_type": "markdown", + "id": "75a6a051-8622-49d4-9508-0e6ac0526d6f", + "metadata": {}, + "source": [ + "## Removing duplicate users\n", + "\n", + "These functions will determine which user for a set of users with the same email has the oldest `createdDate` and then merge duplicate users into the oldest user document for that email." ] }, { "cell_type": "code", - "execution_count": 51, - "id": "040b08df-fb94-4646-8811-a8da2235025c", + "execution_count": 47, + "id": "3f9e99a4-c67e-4109-913d-ee8cced7bb57", "metadata": {}, "outputs": [], "source": [ - "def determine_canonical_id_for_duped_email(duped_email_ids, user_dict):\n", - " canonical_id = ''\n", - " oldest_creation_date = datetime.now()\n", - " for user_id in duped_email_ids:\n", - " current_creation_date = datetime.strptime(user_dict[user_id]['createdDate'], '%Y-%m-%dT%H:%M:%S.%fZ')\n", - " if current_creation_date < oldest_creation_date:\n", - " oldest_creation_date = current_creation_date\n", - " canonical_id = user_id\n", + "# Sort ids for each duped email by oldest to newest\n", "\n", - " return canonical_id" + "for lowercase_email in duped_emails.keys():\n", + " duped_emails[lowercase_email].sort(key=(lambda _id: user_dict[_id]['createdDate']))" ] }, { "cell_type": "code", - "execution_count": 52, - "id": "23438283-6454-40d4-b6fc-e64bb5358642", + "execution_count": 77, + "id": "6270f2b8-aa8d-446f-8ee8-7dcbac3a4cc7", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'678f122e4c6f61002a1e5e68'" - ] - }, - "execution_count": 52, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "determine_canonical_id_for_duped_email(duped_emails['dannyprikaz@gmail.com'], user_dict)" + "ids_to_replace = {}" ] }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 78, "id": "b2e9128e-b338-4456-ae0d-8772621536dd", "metadata": {}, "outputs": [], "source": [ - "def merge_users(canonical_user_id, user_id_2, user_dict):\n", - " canonical_user = user_dict[canonical_user_id]\n", - " duplicate_user = user_dict[user_id_2]\n", + "def merge_users(older_id, newer_id):\n", + " canonical_user = user_dict[older_id]\n", + " duplicate_user = user_dict[newer_id]\n", + " ids_to_replace[newer_id] = older_id\n", "\n", " # For any list fields, combine them\n", " list_fields = ['skillsToMatch', 'projects', 'managedProjects']\n", @@ -247,7 +241,7 @@ " take_the_newer_fields = ['currentRole', 'desiredRole']\n", " for field in take_the_newer_fields:\n", " if len(duplicate_user.get(field, '')) > 0:\n", - " canonical_user[field] = duplicat_user[field]\n", + " canonical_user[field] = duplicate_user[field]\n", "\n", " # Take the highest access level\n", " access_level = ['user', 'admin', 'superadmin']\n", @@ -258,92 +252,370 @@ " canonical_user['email'] = canonical_user['email'].lower()\n", " \n", " \n", - " print(canonical_user)" + " return older_id" ] }, { "cell_type": "code", - "execution_count": 23, - "id": "8756f5eb-1b49-428b-aec7-677b14b3cb04", + "execution_count": 79, + "id": "98f86221-5f32-4a1f-8537-bb40e967b17a", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'name': {'firstName': 'Danny', 'lastName': 'Prikazsky'}, 'accessLevel': 'admin', 'skillsToMatch': [], 'projects': [], 'textingOk': False, 'managedProjects': [], 'isActive': True, '_id': '678f122e4c6f61002a1e5e68', 'email': 'dannyprikaz@gmail.com', 'currentRole': 'Full Stack Developer', 'desiredRole': 'Full Stack Developer', 'newMember': True, 'firstAttended': 'JAN 2025', 'createdDate': '2025-01-21T03:19:10.548Z', '__v': 0}\n" - ] - } - ], + "outputs": [], "source": [ - "merge_users(duped_emails['dannyprikaz@gmail.com'][0], duped_emails['dannyprikaz@gmail.com'][1], user_dict)" + "for lowercase_email in duped_emails.keys():\n", + " reduce(merge_users, duped_emails[lowercase_email])\n", + " #correct_user_id = duped_emails[lower_case_email][0]\n", + " #update_user(correct_user_id, user_dict[correct_user_id])" ] }, { "cell_type": "code", - "execution_count": 36, - "id": "32456806-58b9-4295-9f83-fd01f65741f0", + "execution_count": null, + "id": "5d0c235b-b593-40e8-8190-745915529349", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 80, + "id": "2d6d014b-71c4-44a0-8247-8d10f87a2f1a", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "{'name': {'firstName': 'Danny', 'lastName': 'Prikazsky'},\n", - " 'accessLevel': 'admin',\n", - " 'skillsToMatch': [],\n", - " 'projects': [],\n", - " 'textingOk': False,\n", - " 'managedProjects': [],\n", - " 'isActive': True,\n", - " '_id': '6871b00ee6bf590aede8f8db',\n", - " 'email': 'dannypRikaz@gmail.com',\n", - " 'currentRole': 'Full Stack Developer',\n", - " 'desiredRole': 'Full Stack Developer',\n", - " 'newMember': True,\n", - " 'firstAttended': 'JAN 2025',\n", - " 'createdDate': '2025-01-21T03:19:10.548Z',\n", - " '__v': 0}" + "{'633b9a74d98663001f8b5c46': '5e965e554e2fc70017aa3970',\n", + " '5e66e6fcbe3e0b001761a814': '5e38d1568d52770017ae8a86',\n", + " '5e30ee300b9d2300177d3b3a': '5e27b1e54530cd0017eee431',\n", + " '5e7965101e29ad00179399bb': '5e7024f88e1bee00178aa1e0',\n", + " '6871afe1e6bf590aede8f8da': '678f122e4c6f61002a1e5e68',\n", + " '6871b00ee6bf590aede8f8db': '678f122e4c6f61002a1e5e68'}" ] }, - "execution_count": 36, + "execution_count": 80, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "user_dict[duped_emails['dannyprikaz@gmail.com'][2]]" + "ids_to_replace" ] }, { "cell_type": "code", - "execution_count": 39, - "id": "0e68adcf-39a0-4b5a-a7e6-adf67305e624", + "execution_count": 57, + "id": "78ccbbd6-7fea-4657-bf9a-850bd9e68505", "metadata": {}, "outputs": [], "source": [ - "r = requests.get('http://localhost:3000/api/projectteammembers', headers=HEADERS)" + "def delete_user(user_id):\n", + " r = requests.delete(USER_API_URL + '/' + user_id, headers=HEADERS)\n", + " print(r.content)" ] }, { - "cell_type": "markdown", - "id": "b14a845d-5e6f-457f-b828-ae838d431633", + "cell_type": "code", + "execution_count": 60, + "id": "5f4ec365-ab7c-45fe-bfbe-b35cdbb0ec94", "metadata": {}, + "outputs": [], "source": [ - "# We need to find all of the other documents in the database that reference the duplicated users.\n", - "\n", - "### Database objects that reference User Ids:\n", - "- `checkIn`: checkIns have a userId and an eventId, but the API does not expose endpoints for deleting or updating checkIns. We could keep the IDs of every checkIn with a duplicated user and run them through a script that directly accesses the mongooes object, or we could attempt to access Mongo directly in this script\n", - "- `event`: events have a field for owner with an ownerId, which is supposed to be \"id of user who created event.\" None of the events in the database have the owner field filled in.\n", - "- `project`: projects have a field called `managedByUsers`. For most documents in the database, this is an empty array. For the ones where it is not an empty array, it does not contain any valid userIds.\n", - "- `projectTeamMember`: This seems like it should have plenty of data containing userIds, but there are no projectTeamMember documents in the database\n", - "- `recurringEvent`: Similar to event, this has a field for owner with ownerId. It is set to default to '123456', and there are no documents in the database that don't have that value.\n", - "- " + "from pymongo import MongoClient\n", + "client = MongoClient(\"mongodb+srv://editor:557Ith3jq7ap2mnO@cluster0.5buwz.mongodb.net/vrms-test?retryWrites=true&w=majority\")" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "id": "8556cf83-a412-4ee0-bfde-0447b63d6ac4", + "metadata": {}, + "outputs": [], + "source": [ + "db = client['vrms-test']" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "id": "87d77a37-c404-4aa9-9b0a-7db74039db17", + "metadata": {}, + "outputs": [], + "source": [ + "col = db['checkins']" + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "id": "bcb71f5b-21d7-4929-a404-9771ba34cdfa", + "metadata": {}, + "outputs": [], + "source": [ + "checkins = col.find({'userId': {'$in': list(ids_to_replace.keys())}})" + ] + }, + { + "cell_type": "code", + "execution_count": 87, + "id": "314dd491-6a2e-442e-8dd2-08ba9dc08f0a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'_id': ObjectId('5f3f15e9e2779b0017bc88d8'),\n", + " 'checkedIn': True,\n", + " 'userId': '5e30ee300b9d2300177d3b3a',\n", + " 'eventId': '5f3e81e2e2779b0017bc88d7',\n", + " 'createdDate': '2020-08-21T00:31:37.569Z',\n", + " '__v': 0},\n", + " {'_id': ObjectId('5e30ee300b9d2300177d3b3b'),\n", + " 'checkedIn': True,\n", + " 'userId': '5e30ee300b9d2300177d3b3a',\n", + " 'eventId': '5e30daf244708e3726e15892',\n", + " 'createdDate': '2020-01-29T02:30:08.206Z',\n", + " '__v': 0},\n", + " {'_id': ObjectId('5e3a2a1360ee280017fee686'),\n", + " 'checkedIn': True,\n", + " 'userId': '5e30ee300b9d2300177d3b3a',\n", + " 'eventId': '5e39c1143c5e1c83597507e6',\n", + " 'createdDate': '2020-02-05T02:36:03.953Z',\n", + " '__v': 0},\n", + " {'_id': ObjectId('5f2caef2be8892001782f57b'),\n", + " 'checkedIn': True,\n", + " 'userId': '5e30ee300b9d2300177d3b3a',\n", + " 'eventId': '5f2c0ce284ecb10017ad1ae7',\n", + " 'createdDate': '2020-08-07T01:31:30.555Z',\n", + " '__v': 0},\n", + " {'_id': ObjectId('5e55da1df9819c00173fb2c2'),\n", + " 'checkedIn': True,\n", + " 'userId': '5e30ee300b9d2300177d3b3a',\n", + " 'eventId': '5e55a3559f734cce1e7de80f',\n", + " 'createdDate': '2020-02-26T02:38:21.252Z',\n", + " '__v': 0},\n", + " {'_id': ObjectId('5e8d2bc2e48350001771acc6'),\n", + " 'checkedIn': True,\n", + " 'userId': '5e30ee300b9d2300177d3b3a',\n", + " 'eventId': '5e8d1bb738f9b52492942030',\n", + " 'createdDate': '2020-04-08T01:41:22.376Z',\n", + " '__v': 0},\n", + " {'_id': ObjectId('5e4366e3ef67e100175c1f05'),\n", + " 'checkedIn': True,\n", + " 'userId': '5e30ee300b9d2300177d3b3a',\n", + " 'eventId': '5e43532d10ea9f049c245b0c',\n", + " 'createdDate': '2020-02-12T02:45:55.105Z',\n", + " '__v': 0},\n", + " {'_id': ObjectId('5e6847beaee6af0017b3fa0c'),\n", + " 'checkedIn': True,\n", + " 'userId': '5e30ee300b9d2300177d3b3a',\n", + " 'eventId': '5e68317c20f87d5df925b954',\n", + " 'createdDate': '2020-03-11T02:06:54.380Z',\n", + " '__v': 0},\n", + " {'_id': ObjectId('5e7025438e1bee00178aa1e4'),\n", + " 'checkedIn': True,\n", + " 'userId': '5e66e6fcbe3e0b001761a814',\n", + " 'eventId': '5e701ba16d144781e897ec6a',\n", + " 'createdDate': '2020-03-17T01:17:55.140Z',\n", + " '__v': 0},\n", + " {'_id': ObjectId('5e5f184803a60700179ba4f6'),\n", + " 'checkedIn': True,\n", + " 'userId': '5e30ee300b9d2300177d3b3a',\n", + " 'eventId': '5e5ec5cb8cea701b148fceae',\n", + " 'createdDate': '2020-03-04T02:54:00.467Z',\n", + " '__v': 0},\n", + " {'_id': ObjectId('5e4c9f4ab73a2a001732f493'),\n", + " 'checkedIn': True,\n", + " 'userId': '5e30ee300b9d2300177d3b3a',\n", + " 'eventId': '5e4c7985ec7f0743c0db510e',\n", + " 'createdDate': '2020-02-19T02:36:58.842Z',\n", + " '__v': 0},\n", + " {'_id': ObjectId('5e7965101e29ad00179399bc'),\n", + " 'checkedIn': True,\n", + " 'userId': '5e7965101e29ad00179399bb',\n", + " 'eventId': '5e794aef824fb04480af6959',\n", + " 'createdDate': '2020-03-24T01:40:32.769Z',\n", + " '__v': 0},\n", + " {'_id': ObjectId('5e66e6fcbe3e0b001761a815'),\n", + " 'checkedIn': True,\n", + " 'userId': '5e66e6fcbe3e0b001761a814',\n", + " 'eventId': '5e62b17b126c67427c5ce89f',\n", + " 'createdDate': '2020-03-10T01:01:48.845Z',\n", + " '__v': 0},\n", + " {'_id': ObjectId('5e7ab614d84f53001763f366'),\n", + " 'checkedIn': True,\n", + " 'userId': '5e30ee300b9d2300177d3b3a',\n", + " 'eventId': '5e7aa5a0c01bb54cdd36065c',\n", + " 'createdDate': '2020-03-25T01:38:28.198Z',\n", + " '__v': 0},\n", + " {'_id': ObjectId('5ed70511aa37d600179e450d'),\n", + " 'checkedIn': True,\n", + " 'userId': '5e30ee300b9d2300177d3b3a',\n", + " 'eventId': '5ed65b618e8b770017a9ce1e',\n", + " 'createdDate': '2020-06-03T02:04:01.552Z',\n", + " '__v': 0},\n", + " {'_id': ObjectId('5ec48f6d200f8b0017836d80'),\n", + " 'checkedIn': True,\n", + " 'userId': '5e30ee300b9d2300177d3b3a',\n", + " 'eventId': '5ec42cb16b7a9a00172a553d',\n", + " 'createdDate': '2020-05-20T02:01:17.031Z',\n", + " '__v': 0},\n", + " {'_id': ObjectId('5ed0874dca666400174b1e8c'),\n", + " 'checkedIn': True,\n", + " 'userId': '5e30ee300b9d2300177d3b3a',\n", + " 'eventId': '5ecfc3e1d6cbd800171847da',\n", + " 'createdDate': '2020-05-29T03:53:49.927Z',\n", + " '__v': 0},\n", + " {'_id': ObjectId('5ecdd407cef75e0017cf30f0'),\n", + " 'checkedIn': True,\n", + " 'userId': '5e30ee300b9d2300177d3b3a',\n", + " 'eventId': '5ecd20e179d6dc0017ac7db8',\n", + " 'createdDate': '2020-05-27T02:44:23.211Z',\n", + " '__v': 0},\n", + " {'_id': ObjectId('5ec7321329fb2d00172635ab'),\n", + " 'checkedIn': True,\n", + " 'userId': '5e30ee300b9d2300177d3b3a',\n", + " 'eventId': '5ec68961f504e90017fb11ff',\n", + " 'createdDate': '2020-05-22T01:59:47.999Z',\n", + " '__v': 0},\n", + " {'_id': ObjectId('5ebb4dd187aa6d00176227a6'),\n", + " 'checkedIn': True,\n", + " 'userId': '5e30ee300b9d2300177d3b3a',\n", + " 'eventId': '5ebb300c950796fde6cbe700',\n", + " 'createdDate': '2020-05-13T01:30:57.239Z',\n", + " '__v': 0},\n", + " {'_id': ObjectId('5ee03c6feab2e60017c44a28'),\n", + " 'checkedIn': True,\n", + " 'userId': '5e30ee300b9d2300177d3b3a',\n", + " 'eventId': '5edf95e1eab2e60017c44a1a',\n", + " 'createdDate': '2020-06-10T01:50:39.600Z',\n", + " '__v': 0},\n", + " {'_id': ObjectId('5eec1d8f411acc001748698d'),\n", + " 'checkedIn': True,\n", + " 'userId': '5e30ee300b9d2300177d3b3a',\n", + " 'eventId': '5eeb73616e32610017f67765',\n", + " 'createdDate': '2020-06-19T02:06:07.142Z',\n", + " '__v': 0},\n", + " {'_id': ObjectId('5ebdf93405963b0017c6dd5a'),\n", + " 'checkedIn': True,\n", + " 'userId': '5e30ee300b9d2300177d3b3a',\n", + " 'eventId': '5ebde5cf0b0d433916a4225f',\n", + " 'createdDate': '2020-05-15T02:06:44.938Z',\n", + " '__v': 0},\n", + " {'_id': ObjectId('5ee2e28b1ebf83001752b3ff'),\n", + " 'checkedIn': True,\n", + " 'userId': '5e30ee300b9d2300177d3b3a',\n", + " 'eventId': '5ee238e1895aad0017ee61b5',\n", + " 'createdDate': '2020-06-12T02:03:55.863Z',\n", + " '__v': 0},\n", + " {'_id': ObjectId('5efbed24ff13eb00172d4408'),\n", + " 'checkedIn': True,\n", + " 'userId': '5e30ee300b9d2300177d3b3a',\n", + " 'eventId': '5efb4562e0e39f0017c07d65',\n", + " 'createdDate': '2020-07-01T01:55:48.023Z',\n", + " '__v': 0},\n", + " {'_id': ObjectId('5f07d61d4927d900172f2ffc'),\n", + " 'checkedIn': True,\n", + " 'userId': '5e30ee300b9d2300177d3b3a',\n", + " 'eventId': '5f0722e13e45cb00176c72ca',\n", + " 'createdDate': '2020-07-10T02:44:45.363Z',\n", + " '__v': 0},\n", + " {'_id': ObjectId('5f0e65e204bebb0017213db0'),\n", + " 'checkedIn': True,\n", + " 'userId': '5e30ee300b9d2300177d3b3a',\n", + " 'eventId': '5f0dba6204bebb0017213da1',\n", + " 'createdDate': '2020-07-15T02:11:46.994Z',\n", + " '__v': 0},\n", + " {'_id': ObjectId('5f11065fd4c79f0017d25c6e'),\n", + " 'checkedIn': True,\n", + " 'userId': '5e30ee300b9d2300177d3b3a',\n", + " 'eventId': '5f105d62fdf82f0017e51aed',\n", + " 'createdDate': '2020-07-17T02:01:03.822Z',\n", + " '__v': 0},\n", + " {'_id': ObjectId('5f052ce54f31cf00174d7528'),\n", + " 'checkedIn': True,\n", + " 'userId': '5e30ee300b9d2300177d3b3a',\n", + " 'eventId': '5f047fe24adb5d001718c29f',\n", + " 'createdDate': '2020-07-08T02:18:13.140Z',\n", + " '__v': 0},\n", + " {'_id': ObjectId('5efe90057ecb370017c91b7b'),\n", + " 'checkedIn': True,\n", + " 'userId': '5e30ee300b9d2300177d3b3a',\n", + " 'eventId': '5efde8622bbd0c00177f5149',\n", + " 'createdDate': '2020-07-03T01:55:17.198Z',\n", + " '__v': 0},\n", + " {'_id': ObjectId('5f1a41acebfea60017126ba3'),\n", + " 'checkedIn': True,\n", + " 'userId': '5e30ee300b9d2300177d3b3a',\n", + " 'eventId': '5f1997e2896f200017a5833d',\n", + " 'createdDate': '2020-07-24T02:04:28.122Z',\n", + " '__v': 0},\n", + " {'_id': ObjectId('5f237addcfc86b00176cc139'),\n", + " 'checkedIn': True,\n", + " 'userId': '5e30ee300b9d2300177d3b3a',\n", + " 'eventId': '5f22d262cfc86b00176cc134',\n", + " 'createdDate': '2020-07-31T01:58:53.529Z',\n", + " '__v': 0},\n", + " {'_id': ObjectId('633b9a74d98663001f8b5c47'),\n", + " 'checkedIn': True,\n", + " 'userId': '633b9a74d98663001f8b5c46',\n", + " 'eventId': '62dc8b87d98663001f8b5a6c',\n", + " 'createdDate': datetime.datetime(2022, 10, 4, 2, 29, 8, 559000),\n", + " '__v': 0},\n", + " {'_id': ObjectId('64ae5c1883e28253b58b5bdc'),\n", + " 'checkedIn': True,\n", + " 'userId': '633b9a74d98663001f8b5c46',\n", + " 'eventId': '64ae3a5cf95a2e001f630b5f',\n", + " 'createdDate': datetime.datetime(2023, 7, 12, 7, 54, 0, 672000),\n", + " '__v': 0},\n", + " {'_id': ObjectId('64ae5ed9f95a2e001f630b60'),\n", + " 'checkedIn': True,\n", + " 'userId': '633b9a74d98663001f8b5c46',\n", + " 'eventId': 'qqqqqq',\n", + " 'createdDate': datetime.datetime(2023, 7, 12, 8, 5, 45, 22000),\n", + " '__v': 0},\n", + " {'_id': ObjectId('6549cbb97dfe210021abb385'),\n", + " 'checkedIn': True,\n", + " 'userId': '633b9a74d98663001f8b5c46',\n", + " 'eventId': '65497e0e7dfe210021abb379',\n", + " 'createdDate': datetime.datetime(2023, 11, 7, 5, 31, 37, 125000),\n", + " '__v': 0}]" + ] + }, + "execution_count": 87, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "list(checkins)" + ] + }, + { + "cell_type": "code", + "execution_count": 89, + "id": "ecd00e41-1f4c-4aec-97a9-bf64a92974a4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "UpdateResult({'n': 4, 'electionId': ObjectId('7fffffff0000000000000222'), 'opTime': {'ts': Timestamp(1754280535, 23), 't': 546}, 'nModified': 4, 'ok': 1.0, '$clusterTime': {'clusterTime': Timestamp(1754280535, 23), 'signature': {'hash': b'\\x00n\\x88\\xed\\xcc\\xb1\\x7f\\x99\\xf6@l\\xd4\\xa2N\\xb3\\xfa\\x9b\\xcd\\xec\\x7f', 'keyId': 7488330297243598876}}, 'operationTime': Timestamp(1754280535, 23), 'updatedExisting': True}, acknowledged=True)" + ] + }, + "execution_count": 89, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "col.update_many({'userId': '633b9a74d98663001f8b5c46'}, {'$set': {'userId': '5e965e554e2fc70017aa3970'}})" ] }, { "cell_type": "code", "execution_count": null, - "id": "75612bd3-2e6a-4198-9b1c-5732bd7451c2", + "id": "2db985ba-fab9-4820-9d67-ed42f8f1ba03", "metadata": {}, "outputs": [], "source": [] diff --git a/backend/scripts/python/env/README.md b/backend/scripts/python/env/README.md new file mode 100644 index 000000000..83fb65def --- /dev/null +++ b/backend/scripts/python/env/README.md @@ -0,0 +1,69 @@ +# Python Virtual Environment + +Welcome to the `scripts\python\env` folder of the VRMS backend. This folder contains a Jupyter notebook, dependencies for setting up the environment, and a `.gitignore` file for managing which files should be ignored by version control. + +## Prerequisites + +Before you begin, make sure you have Python installed on your machine. If you don't have Python installed yet, you can download and install it from the official website: + +[Download Python](https://www.python.org/downloads/) + +Once you have Python installed, you're ready to set up the virtual environment and install the necessary dependencies as described below. + +## Requirements + +Before you can run the Jupyter notebook, you will need to set up a Python virtual environment and install the required dependencies. Here's how you can do that: + +### 1. Set Up a Python Virtual Environment + +From within the `scripts` directory, run the following command to create a virtual environment: + +``` +python -m venv . +``` + +This will create a virtual environment within the current directory. + +### 2. Activate the Virtual Environment + +Once the virtual environment is created, you'll need to activate it. + +- On **Windows**, run: + + ``` + .\Scripts\activate + ``` + +- On **MacOS/Linux**, run: + + ``` + source bin/activate + ``` + +### 3. Install Dependencies + +With the virtual environment activated, you can now install the dependencies listed in `requirements.txt`: + +``` +pip install -r requirements.txt +``` + +### 4. Launch Jupyter Notebook + +After installing the required dependencies, you can start the Jupyter notebook by running the following command: + +``` +jupyter notebook +``` + +This will open the Jupyter notebook interface in your web browser, where you can navigate to and run the script. + +## .gitignore + +The `.gitignore` file in this directory is set to ignore all files, including the virtual environment, so that unnecessary files don't get committed to version control. If you wish to track changes to new files added to this directory, you will need to use a command like: + +``` +git add -f .\backend\scripts\python\env\your-file.file +``` + +where -f forces git to add and begin tracking that file. \ No newline at end of file diff --git a/backend/scripts/python/env/requirements.txt b/backend/scripts/python/env/requirements.txt new file mode 100644 index 000000000..f3c9e906d --- /dev/null +++ b/backend/scripts/python/env/requirements.txt @@ -0,0 +1,104 @@ +anyio==4.9.0 +argon2-cffi==25.1.0 +argon2-cffi-bindings==25.1.0 +arrow==1.3.0 +asttokens==3.0.0 +async-lru==2.0.5 +attrs==25.3.0 +babel==2.17.0 +beautifulsoup4==4.13.4 +bleach==6.2.0 +certifi==2025.7.14 +cffi==1.17.1 +charset-normalizer==3.4.2 +colorama==0.4.6 +comm==0.2.3 +debugpy==1.8.15 +decorator==5.2.1 +defusedxml==0.7.1 +dnspython==2.7.0 +dotenv==0.9.9 +executing==2.2.0 +fastjsonschema==2.21.1 +fqdn==1.5.1 +h11==0.16.0 +httpcore==1.0.9 +httpx==0.28.1 +idna==3.10 +ipykernel==6.30.0 +ipython==9.4.0 +ipython_pygments_lexers==1.1.1 +ipywidgets==8.1.7 +isoduration==20.11.0 +jedi==0.19.2 +Jinja2==3.1.6 +json5==0.12.0 +jsonpointer==3.0.0 +jsonschema==4.25.0 +jsonschema-specifications==2025.4.1 +jupyter==1.1.1 +jupyter-console==6.6.3 +jupyter-events==0.12.0 +jupyter-lsp==2.2.6 +jupyter_client==8.6.3 +jupyter_core==5.8.1 +jupyter_server==2.16.0 +jupyter_server_terminals==0.5.3 +jupyterlab==4.4.5 +jupyterlab_pygments==0.3.0 +jupyterlab_server==2.27.3 +jupyterlab_widgets==3.0.15 +lark==1.2.2 +MarkupSafe==3.0.2 +matplotlib-inline==0.1.7 +mistune==3.1.3 +nbclient==0.10.2 +nbconvert==7.16.6 +nbformat==5.10.4 +nest-asyncio==1.6.0 +notebook==7.4.4 +notebook_shim==0.2.4 +overrides==7.7.0 +packaging==25.0 +pandocfilters==1.5.1 +parso==0.8.4 +platformdirs==4.3.8 +prometheus_client==0.22.1 +prompt_toolkit==3.0.51 +psutil==7.0.0 +pure_eval==0.2.3 +pycparser==2.22 +Pygments==2.19.2 +pymongo==4.13.2 +python-dateutil==2.9.0.post0 +python-dotenv==1.1.1 +python-json-logger==3.3.0 +pywin32==311 +pywinpty==2.0.15 +PyYAML==6.0.2 +pyzmq==27.0.0 +referencing==0.36.2 +requests==2.32.4 +rfc3339-validator==0.1.4 +rfc3986-validator==0.1.1 +rfc3987-syntax==1.1.0 +rpds-py==0.26.0 +Send2Trash==1.8.3 +setuptools==80.9.0 +six==1.17.0 +sniffio==1.3.1 +soupsieve==2.7 +stack-data==0.6.3 +terminado==0.18.1 +tinycss2==1.4.0 +tornado==6.5.1 +traitlets==5.14.3 +types-python-dateutil==2.9.0.20250708 +typing_extensions==4.14.1 +uri-template==1.3.0 +urllib3==2.5.0 +wcwidth==0.2.13 +webcolors==24.11.1 +webencodings==0.5.1 +websocket-client==1.8.0 +widgetsnbextension==4.0.14 From 53a294fcccf49b0bfc17e0b404e33e7be6bbf517 Mon Sep 17 00:00:00 2001 From: Danny Prikazsky Date: Mon, 4 Aug 2025 19:48:04 -0700 Subject: [PATCH 11/12] Use sets to keep array fields from getting duplicate entries --- backend/scripts/emailCleaner.mjs | 33 - .../python/env/Duplicate Removal.ipynb | 5 +- backend/scripts/test-data.json | 1116 ----------------- 3 files changed, 3 insertions(+), 1151 deletions(-) delete mode 100644 backend/scripts/emailCleaner.mjs delete mode 100644 backend/scripts/test-data.json diff --git a/backend/scripts/emailCleaner.mjs b/backend/scripts/emailCleaner.mjs deleted file mode 100644 index 609572262..000000000 --- a/backend/scripts/emailCleaner.mjs +++ /dev/null @@ -1,33 +0,0 @@ -import jsonData from './test-data.json' assert {type: 'json'}; - -const emailMap = {}; - -jsonData.forEach(user => { - const lowercaseEmail = user.email.toLowerCase(); - if (!emailMap[lowercaseEmail]) { - emailMap[lowercaseEmail] = user; - } else { - const existingUser = emailMap[lowercaseEmail]; - existingUser.skillsToMatch.push(...user.skillsToMatch); - existingUser.projects.push(...user.projects); - existingUser.managedProjects.push(...user.managedProjects); - existingUser.textingOk = existingUser.textingOk || user.textingOk; - existingUser.isActive = existingUser.isActive || user.isActive; - existingUser.newMember = existingUser.newMember || user.newMember; - existingUser.currentRole = existingUser !== user ? existingUser.currentRole : user.currentRole; - existingUser.desiredRole = existingUser !== user ? existingUser.desiredRole : user.desiredRole; - - if (existingUser.accessLevel === 'admin' || user.accessLevel === 'admin') { - existingUser.accessLevel = 'admin'; - } - // Preserving the older createdDate, firstAttended and modifying the email - if (new Date(user.createdDate) < new Date(existingUser.createdDate)) { - existingUser.createdDate = user.createdDate; - existingUser.firstAttended = user.firstAttended; - existingUser.email = `${user.email.toLowerCase()}_${user._id}` - } - } - // Always lowercase email - user.email = lowercaseEmail; -}); - diff --git a/backend/scripts/python/env/Duplicate Removal.ipynb b/backend/scripts/python/env/Duplicate Removal.ipynb index b76a6810c..b8468dba6 100644 --- a/backend/scripts/python/env/Duplicate Removal.ipynb +++ b/backend/scripts/python/env/Duplicate Removal.ipynb @@ -217,7 +217,7 @@ }, { "cell_type": "code", - "execution_count": 78, + "execution_count": null, "id": "b2e9128e-b338-4456-ae0d-8772621536dd", "metadata": {}, "outputs": [], @@ -230,7 +230,8 @@ " # For any list fields, combine them\n", " list_fields = ['skillsToMatch', 'projects', 'managedProjects']\n", " for field in list_fields:\n", - " canonical_user[field] += duplicate_user[field]\n", + " all_array_values = set(canonical_user.get(field, []) + duplicate_user.get(field, []))\n", + " canonical_user[field] = list(all_array_values)\n", "\n", " # For boolean fields, set to true if either is true\n", " bool_fields = ['textingOk', 'isActive', 'newMember']\n", diff --git a/backend/scripts/test-data.json b/backend/scripts/test-data.json deleted file mode 100644 index 5519d4d72..000000000 --- a/backend/scripts/test-data.json +++ /dev/null @@ -1,1116 +0,0 @@ -[ - { - "name": { - "firstName": "John", - "lastName": "Doe" - }, - "accessLevel": "user", - "skillsToMatch": [], - "projects": [], - "textingOk": false, - "managedProjects": ["60d13ff818317127044e7f08"], - "isActive": true, - "_id": "5f1d23fe316d2f00345ef06a", - "email": "johnDoe@civic.org", - "currentRole": "most common human name", - "desiredRole": "most desired human name", - "newMember": false, - "firstAttended": "NOV 2015", - "createdDate": "2020-01-14T02:14:22.407Z", - "__v": 0, - "attendanceReason": "Civic Engagement", - "currentProject": "Undebate" - }, - { - "name": { - "firstName": "Iggy", - "lastName": "Stoic" - }, - "accessLevel": "user", - "skillsToMatch": [], - "projects": [], - "textingOk": false, - "managedProjects": [], - "isActive": true, - "_id": "5e1d2490316d2f00172ef072", - "email": "iggystoic@gmail.com", - "currentRole": "Tech Consulting", - "desiredRole": "Technical Product", - "newMember": true, - "firstAttended": "JAN 2020", - "createdDate": "2020-01-14T02:16:48.597Z", - "__v": 0 - }, - { - "name": { - "firstName": "TEST", - "lastName": "PERSON" - }, - "accessLevel": "user", - "skillsToMatch": [], - "projects": [], - "textingOk": false, - "managedProjects": [], - "isActive": true, - "_id": "5e1e76c20ab172001790f806", - "email": "TEST@GMAIL.com", - "currentRole": "Test Developer", - "desiredRole": "Senior Test Developer", - "newMember": true, - "firstAttended": "JAN 2020", - "createdDate": "2020-01-15T02:19:46.780Z", - "__v": 0 - }, - { - "name": { - "firstName": "Test", - "lastName": "Person" - }, - "accessLevel": "admin", - "skillsToMatch": ["Testing", "Quality Management"], - "projects": [], - "textingOk": false, - "managedProjects": ["60d13ff818317127044e7f09"], - "isActive": true, - "_id": "5f4bfbc8e9f4f121e8c1eb42", - "email": "test@gmail.com", - "currentRole": "Test Student", - "desiredRole": "Software Developer", - "newMember": false, - "attendanceReason": "Environment", - "currentProject": "VRMS", - "firstAttended": "JAN 2019", - "createdDate": "2024-05-10T03:37:30.363Z" - }, - { - "name": { - "firstName": "John", - "lastName": "Atkins" - }, - "accessLevel": "user", - "skillsToMatch": ["acting"], - "projects": ["Mr. Bean"], - "textingOk": true, - "managedProjects": [], - "isActive": true, - "_id": "5e1e74030ab172001790f7ea", - "email": "JOHN.J.ATKINS@GMAIL.COM", - "currentRole": "Mr. Bean", - "desiredRole": "Johnny English", - "newMember": true, - "firstAttended": "JAN 2020", - "createdDate": "2020-01-15T02:08:03.024Z", - "__v": 0 - }, - { - "name": { - "firstName": "Alex", - "lastName": "Chu" - }, - "accessLevel": "admin", - "skillsToMatch": [], - "projects": [], - "textingOk": false, - "managedProjects": [ - "5edeac78ce228b001778facd", - "60d13ff818317127044e7f08" - ], - "isActive": true, - "_id": "5f164d2839cb9c001736f4cf", - "email": "chu.alex@gmail.com", - "currentRole": "Software engineer", - "desiredRole": "Software engineer", - "newMember": false, - "firstAttended": "JUL 2020", - "createdDate": "2020-07-21T02:04:24.241Z", - "__v": 0 - }, - { - "name": { - "firstName": "Boston", - "lastName": "Langford" - }, - "accessLevel": "user", - "skillsToMatch": [], - "projects": [], - "textingOk": false, - "managedProjects": [], - "isActive": true, - "_id": "5e1d27d9316d2f00172ef090", - "email": "boston@snl.com", - "currentRole": "PM", - "desiredRole": "PM", - "newMember": true, - "firstAttended": "JAN 2020", - "createdDate": "2020-01-14T02:30:49.449Z", - "__v": 0 - }, - { - "name": { - "firstName": "Cole", - "lastName": "Bennett" - }, - "accessLevel": "user", - "skillsToMatch": [], - "projects": [], - "textingOk": false, - "managedProjects": [], - "isActive": true, - "_id": "5e1d2323316d2f00172ef064", - "email": "bennett.cole@gmail.com", - "currentRole": "idk", - "desiredRole": "idk", - "newMember": true, - "firstAttended": "APR 2020", - "createdDate": "2020-01-14T02:10:43.978Z", - "__v": 0 - }, - { - "name": { - "firstName": "Cole", - "lastName": "Bennett" - }, - "accessLevel": "admin", - "skillsToMatch": [], - "projects": [], - "textingOk": false, - "managedProjects": [], - "isActive": true, - "_id": "5e1d2081316d2f00172ef052", - "email": "BENNETT.COLE@GMAIL.COM", - "currentRole": "Dir of MUSIC", - "desiredRole": "None", - "newMember": true, - "firstAttended": "JAN 2020", - "createdDate": "2020-01-14T01:59:29.273Z", - "__v": 0, - "attendanceReason": "Social Justice/Equity" - }, - { - "name": { - "firstName": "John", - "lastName": "Atkins" - }, - "accessLevel": "user", - "skillsToMatch": ["comic"], - "projects": ["Johnny English"], - "textingOk": false, - "managedProjects": [], - "isActive": true, - "_id": "5e1d2137316d2f00172ef056", - "email": "john.j.atkins@gmail.com", - "currentRole": "High School Student", - "desiredRole": "Software/Research", - "newMember": false, - "firstAttended": "DEC 2019", - "createdDate": "2020-01-14T02:02:31.455Z", - "__v": 0, - "attendanceReason": "Open Data", - "currentProject": "New Schools Today" - }, - { - "name": { - "firstName": "Greg", - "lastName": "Smith" - }, - "accessLevel": "user", - "skillsToMatch": [], - "projects": [], - "textingOk": false, - "managedProjects": [], - "isActive": true, - "_id": "5eec3e16411acc00174869a3", - "email": "gregpawpetrol@gmail.com", - "currentRole": "Data Consultant", - "desiredRole": "Data Scientist", - "newMember": false, - "firstAttended": "NOV 2019", - "createdDate": "2020-06-19T04:24:54.887Z", - "__v": 0 - }, - { - "name": { - "firstName": "Ryan", - "lastName": "Gosling" - }, - "accessLevel": "user", - "skillsToMatch": [], - "projects": [], - "textingOk": false, - "managedProjects": [], - "isActive": true, - "_id": "5e30e8a80b9d2300177d3b20", - "email": "rgosling@gmail.com", - "currentRole": "Data Scientist", - "desiredRole": "Data Scientist", - "newMember": false, - "firstAttended": "OCT 2019", - "createdDate": "2020-01-29T02:06:32.192Z", - "__v": 0, - "attendanceReason": "Open Data", - "currentProject": "311 Data" - }, - { - "name": { - "firstName": "Jared", - "lastName": "Maxwell" - }, - "accessLevel": "user", - "skillsToMatch": [], - "projects": [], - "textingOk": false, - "managedProjects": [], - "isActive": true, - "_id": "5e38d3cf8d52770017ae8a91", - "email": "jmax812@max.com", - "currentRole": "freelance dev", - "desiredRole": "employed def", - "newMember": false, - "firstAttended": "JAN 2020", - "createdDate": "2020-02-04T02:15:43.745Z", - "__v": 0, - "attendanceReason": "Civic Engagement", - "currentProject": "VRMS" - }, - { - "name": { - "firstName": "Dexter", - "lastName": "Robinson" - }, - "accessLevel": "user", - "skillsToMatch": [], - "projects": [], - "textingOk": false, - "managedProjects": [], - "isActive": true, - "_id": "5f28c25206f21000177e690a", - "email": "Dexterlab@gmail.com", - "currentRole": "Software Developer", - "desiredRole": "Frontend Developer", - "newMember": true, - "firstAttended": "AUG 2020", - "createdDate": "2020-08-04T02:05:06.309Z", - "__v": 0 - }, - { - "name": { - "firstName": "Sharon", - "lastName": "Wesley" - }, - "accessLevel": "user", - "skillsToMatch": [], - "projects": [], - "textingOk": false, - "managedProjects": [], - "isActive": true, - "_id": "5e4ca746b73a2a001732f49a", - "email": "sharonwesleycodes@gmail.com", - "currentRole": "Student", - "desiredRole": "Programmer", - "newMember": true, - "firstAttended": "JAN 2020", - "createdDate": "2020-02-19T03:11:02.863Z", - "__v": 0 - }, - { - "name": { - "firstName": "Mr", - "lastName": "Awesome" - }, - "accessLevel": "user", - "skillsToMatch": [], - "projects": [], - "textingOk": false, - "managedProjects": [], - "isActive": true, - "_id": "5e421328ccd154001772603b", - "email": "pseudo.randsome@gmail.clm", - "currentRole": "Engineer", - "desiredRole": "God", - "newMember": true, - "firstAttended": "JAN 2020", - "createdDate": "2020-02-11T02:36:24.529Z", - "__v": 0 - }, - { - "name": { - "firstName": "chichi", - "lastName": "hughes" - }, - "accessLevel": "admin", - "skillsToMatch": [], - "projects": [], - "textingOk": false, - "managedProjects": [], - "isActive": true, - "_id": "5ea74d2720cec100179465cf", - "email": "chichi.hughes@gmail.com", - "currentRole": "UX/UI Designer", - "desiredRole": "UX/UI Designer", - "newMember": false, - "firstAttended": "APR 2020", - "createdDate": "2020-04-27T21:22:47.465Z", - "__v": 0, - "attendanceReason": "Open Data" - }, - { - "name": { - "firstName": "asim", - "lastName": "rahman" - }, - "accessLevel": "user", - "skillsToMatch": [], - "projects": [], - "textingOk": false, - "managedProjects": [], - "isActive": true, - "_id": "60d777c20707783dc86d6101", - "email": "asim.radhat@gmail.comdfm34", - "currentRole": "zxc", - "desiredRole": "zxc", - "newMember": true, - "firstAttended": "JUN 2021", - "createdDate": "2021-06-26T18:53:54.935Z", - "__v": 0 - }, - { - "name": { - "firstName": "asim", - "lastName": "rahman" - }, - "accessLevel": "user", - "skillsToMatch": [], - "projects": [], - "textingOk": false, - "managedProjects": [], - "isActive": true, - "_id": "60d95ead26878d41f48aa15f", - "email": "asim.rhalskdfm10923knzdst@gmail.com", - "currentRole": "asldkm", - "desiredRole": "sdlkfm", - "newMember": true, - "createdDate": "2021-06-28T05:31:25.482Z", - "__v": 0 - }, - { - "name": { - "firstName": "Glen", - "lastName": "Steven" - }, - "accessLevel": "user", - "skillsToMatch": [], - "projects": [], - "textingOk": false, - "managedProjects": [], - "isActive": true, - "_id": "61523ca806ddd96310eb3e49", - "email": "kpop51367@bts123.net", - "currentRole": "Test", - "desiredRole": "Test", - "newMember": true, - "firstAttended": "JAN 2021", - "createdDate": "2021-01-27T21:50:32.255Z", - "__v": 0 - }, - { - "name": { - "firstName": "Glen", - "lastName": "Steven" - }, - "accessLevel": "user", - "skillsToMatch": [], - "projects": [], - "textingOk": true, - "managedProjects": [], - "isActive": true, - "_id": "61523fa80e0025656e23371d", - "email": "KPOP51367@BTS123.net", - "currentRole": "Test", - "desiredRole": "Test", - "newMember": true, - "firstAttended": "SEP 2021", - "createdDate": "2021-09-27T22:03:20.465Z", - "__v": 0 - }, - { - "name": { - "firstName": "Arthur", - "lastName": "Doyle" - }, - "accessLevel": "user", - "skillsToMatch": [], - "projects": [], - "textingOk": false, - "managedProjects": [], - "isActive": true, - "_id": "615cbd3a18c36c001e7b03b1", - "email": "Doyle@atemybread.com", - "currentRole": "baker", - "desiredRole": "better baker", - "newMember": true, - "firstAttended": "OCT 2021", - "createdDate": "2021-10-05T21:01:46.941Z", - "__v": 0 - }, - { - "name": { - "firstName": "rare", - "lastName": "person" - }, - "accessLevel": "user", - "skillsToMatch": [], - "projects": [], - "textingOk": false, - "managedProjects": [], - "isActive": true, - "_id": "61947ec1ef53983193f54ae7", - "email": "Rareisreal@fm.com", - "currentRole": "aerga", - "desiredRole": "arga", - "newMember": true, - "firstAttended": "NOV 2021", - "createdDate": "2021-11-17T04:02:09.726Z", - "__v": 0 - }, - { - "name": { - "firstName": "Glen", - "lastName": "Steven" - }, - "accessLevel": "user", - "skillsToMatch": [], - "projects": [], - "textingOk": false, - "managedProjects": [], - "isActive": true, - "_id": "6208327f7da35707d018e13c", - "email": "Stevedude73842@pet.com", - "currentRole": "Test", - "desiredRole": "Test", - "newMember": false, - "firstAttended": "FEB 2022", - "createdDate": "2022-02-12T22:19:43.301Z", - "__v": 0 - }, - { - "name": { - "firstName": "Shaun", - "lastName": "Murphy" - }, - "accessLevel": "admin", - "skillsToMatch": [], - "projects": [], - "textingOk": false, - "managedProjects": [ - "5edeac78ce228b001778facd", - "5edea961ce228b001778faca", - "620859fa46807266c6cbcf20", - "6205ceb5aaf98b0021b3c204", - "619af6c86a8afa609cd5c419" - ], - "isActive": true, - "_id": "620840ad46807266c6cbcf1e", - "email": "smurf@gmail.com", - "currentRole": "dev", - "desiredRole": "dev", - "newMember": false, - "firstAttended": "FEB 2022", - "createdDate": "2022-02-12T23:20:13.331Z", - "__v": 0 - }, - { - "name": { - "firstName": "Tommy", - "lastName": "Smith" - }, - "accessLevel": "admin", - "skillsToMatch": [], - "projects": [], - "textingOk": false, - "managedProjects": [], - "isActive": true, - "_id": "633b9a74d98663001f8b5c46", - "email": "tommy@thatsfabulous.com", - "currentRole": "Supreme Leader", - "desiredRole": "Front end developer", - "newMember": false, - "firstAttended": "OCT 2022", - "createdDate": "2022-10-04T02:29:08.363Z", - "__v": 0 - }, - { - "name": { - "firstName": "Testing", - "lastName": "test12" - }, - "accessLevel": "user", - "skillsToMatch": [], - "projects": [], - "textingOk": false, - "managedProjects": [], - "isActive": true, - "_id": "645986720659c772705c6bcb", - "email": "testrn@gmail.com", - "currentRole": "Test", - "desiredRole": "Test", - "newMember": true, - "firstAttended": "MAY 2023", - "createdDate": "2023-05-08T23:32:02.575Z", - "__v": 0 - }, - { - "name": { - "firstName": "Enola", - "lastName": "Holmes" - }, - "accessLevel": "user", - "skillsToMatch": [], - "projects": [], - "textingOk": false, - "managedProjects": [], - "isActive": true, - "_id": "5e1e764c0ab172001790f804", - "email": "enola@gmail.com", - "currentRole": "front-end web dev", - "desiredRole": "unsure", - "newMember": true, - "firstAttended": "JAN 2020", - "createdDate": "2020-01-15T02:17:48.287Z", - "__v": 0, - "attendanceReason": "Open Data" - }, - { - "name": { - "firstName": "Phoebe", - "lastName": "Phoenix" - }, - "accessLevel": "user", - "skillsToMatch": [], - "projects": [], - "textingOk": false, - "managedProjects": [], - "isActive": true, - "_id": "5e54818892b678001759b45a", - "email": "phoenixcodes@gmail.com", - "currentRole": "Freelance Fullstack JS Dev", - "desiredRole": "Fullstack JS Dev", - "newMember": false, - "firstAttended": "JAN 2020", - "createdDate": "2020-02-25T02:08:08.775Z", - "__v": 0, - "attendanceReason": "Social Justice/Equity", - "currentProject": "VRMS" - }, - { - "name": { - "firstName": "Abby", - "lastName": "Jordan" - }, - "accessLevel": "user", - "skillsToMatch": [], - "projects": [], - "textingOk": false, - "managedProjects": [], - "isActive": true, - "_id": "5f33508b0d62580017d78228", - "email": "hippyjordan@gmail.com", - "currentRole": "Product Manager", - "desiredRole": "Product Manager", - "newMember": false, - "firstAttended": "AUG 2020", - "createdDate": "2020-08-12T02:14:35.725Z", - "__v": 0 - }, - { - "name": { - "firstName": "Susan", - "lastName": "Lee" - }, - "accessLevel": "user", - "skillsToMatch": [], - "projects": [], - "textingOk": false, - "managedProjects": [], - "isActive": true, - "_id": "5e8bd28080fe360017694654", - "email": "xoxosusy@gmail.com", - "currentRole": "Student", - "desiredRole": "Software Engineer", - "newMember": false, - "firstAttended": "NOV 2019", - "createdDate": "2020-04-07T01:08:16.639Z", - "__v": 0 - }, - { - "name": { - "firstName": "Calvin ", - "lastName": "Klien" - }, - "accessLevel": "user", - "skillsToMatch": [], - "projects": [], - "textingOk": false, - "managedProjects": [], - "isActive": true, - "_id": "5ed84b1bb68bcc00179b27de", - "email": "info@ck.com", - "currentRole": "PHP Developer", - "desiredRole": "CEO", - "newMember": true, - "firstAttended": "JUN 2020", - "createdDate": "2020-06-04T01:15:07.602Z", - "__v": 0 - }, - { - "name": { - "firstName": "Sophia", - "lastName": "Yang" - }, - "accessLevel": "user", - "skillsToMatch": [], - "projects": [], - "textingOk": false, - "managedProjects": [], - "isActive": true, - "_id": "5e1e74390ab172001790f7ec", - "email": "slice212@gmail.com", - "currentRole": "applying", - "desiredRole": "Data Science", - "newMember": false, - "firstAttended": "JAN 2020", - "createdDate": "2020-01-15T02:08:57.796Z", - "__v": 0, - "attendanceReason": "Homelessness", - "currentProject": "Host Home" - }, - { - "name": { - "firstName": "Aston", - "lastName": "Martin" - }, - "accessLevel": "user", - "skillsToMatch": [], - "projects": [], - "textingOk": false, - "managedProjects": [], - "isActive": true, - "_id": "5ebdfa8005963b0017c6dd5c", - "email": "rickandmorty@gmail.com", - "currentRole": "Data Analyst/Scientist", - "desiredRole": "Data Analyst/Scientist", - "newMember": false, - "firstAttended": "MAY 2020", - "createdDate": "2020-05-15T02:12:16.545Z", - "__v": 0 - }, - { - "name": { - "firstName": "Selena", - "lastName": "Gomez" - }, - "accessLevel": "user", - "skillsToMatch": [], - "projects": [], - "textingOk": false, - "managedProjects": [], - "isActive": true, - "_id": "5e27ae4b4530cd0017eee422", - "email": "selena.gomez@gmail.com", - "currentRole": "SW Engineer", - "desiredRole": "Developer", - "newMember": true, - "firstAttended": "JAN 2020", - "createdDate": "2020-01-22T02:07:07.734Z", - "__v": 0 - }, - { - "name": { - "firstName": "Amrit", - "lastName": "Mann" - }, - "accessLevel": "user", - "skillsToMatch": [], - "projects": [], - "textingOk": false, - "managedProjects": [], - "isActive": true, - "_id": "5e8bd1aa80fe36001769464d", - "email": "amritmann@gmail.com", - "currentRole": "Dev", - "desiredRole": "Dev", - "newMember": false, - "firstAttended": "APR 2020", - "createdDate": "2020-04-07T01:04:42.306Z", - "__v": 0, - "attendanceReason": "Open Data" - }, - { - "name": { - "firstName": "Jennifer", - "lastName": "Winget" - }, - "accessLevel": "user", - "skillsToMatch": [], - "projects": [], - "textingOk": false, - "managedProjects": [], - "isActive": true, - "_id": "5e4364fdef67e100175c1eff", - "email": "jenny@valorie.com", - "currentRole": "Civic Engagement", - "desiredRole": "Dev Ops", - "newMember": true, - "firstAttended": "JAN 2020", - "createdDate": "2020-02-12T02:37:49.626Z", - "__v": 0 - }, - { - "name": { - "firstName": "asim", - "lastName": "rahman" - }, - "accessLevel": "user", - "skillsToMatch": [], - "projects": [], - "textingOk": false, - "managedProjects": [], - "isActive": true, - "_id": "60d7772c0707783dc86d60ff", - "email": "6k1.nyarly636@gmail.com", - "currentRole": "sdfasd", - "desiredRole": "asdasd", - "newMember": true, - "firstAttended": "JUN 2021", - "createdDate": "2021-06-26T18:51:24.004Z", - "__v": 0 - }, - { - "name": { - "firstName": "Glen", - "lastName": "Clark" - }, - "accessLevel": "user", - "skillsToMatch": [], - "projects": [], - "textingOk": false, - "managedProjects": [], - "isActive": true, - "_id": "61440e3218c36c001e7b03b0", - "email": "xoxoclark@debate.com", - "currentRole": "Test", - "desiredRole": "Test", - "newMember": true, - "firstAttended": "SEP 2021", - "createdDate": "2021-09-17T03:40:34.500Z", - "__v": 0 - }, - { - "name": { - "firstName": "Glen", - "lastName": "Clark" - }, - "accessLevel": "user", - "skillsToMatch": [], - "projects": [], - "textingOk": false, - "managedProjects": [], - "isActive": true, - "_id": "6157d8d979effa437122d670", - "email": "XOXOclark@debate.com", - "currentRole": "Test", - "desiredRole": "Test", - "newMember": true, - "firstAttended": "OCT 2021", - "createdDate": "2021-10-02T03:58:17.700Z", - "__v": 0 - }, - { - "name": { - "firstName": "Larry", - "lastName": "Page" - }, - "accessLevel": "user", - "skillsToMatch": [], - "projects": [], - "textingOk": false, - "managedProjects": [], - "isActive": true, - "_id": "5e435ce7ef67e100175c1ee3", - "email": "Larry@LarryPage.com", - "currentRole": "Student", - "desiredRole": "Python Developer", - "newMember": true, - "firstAttended": "JAN 2020", - "createdDate": "2020-02-12T02:03:19.149Z", - "__v": 0, - "attendanceReason": "Homelessness", - "currentProject": "Food Oasis" - }, - { - "name": { - "firstName": "asim", - "lastName": "rahman" - }, - "accessLevel": "user", - "skillsToMatch": [], - "projects": [], - "textingOk": false, - "managedProjects": [], - "isActive": true, - "_id": "60d7765b7eec843150f7cd7d", - "email": "654k1.nyarly636@gmail.com", - "currentRole": "sdfasd", - "desiredRole": "asdasd", - "newMember": true, - "firstAttended": "JUN 2021", - "createdDate": "2021-06-26T18:47:55.487Z", - "__v": 0 - }, - { - "name": { - "firstName": "Shark", - "lastName": "User" - }, - "accessLevel": "user", - "skillsToMatch": [], - "projects": [], - "textingOk": false, - "managedProjects": [], - "isActive": true, - "_id": "6101db234815993498437084", - "email": "WE@sharktank.com", - "currentRole": "af", - "desiredRole": "argfg", - "newMember": true, - "firstAttended": "JUL 2021", - "createdDate": "2021-07-28T22:33:07.120Z", - "__v": 0 - }, - { - "name": { - "firstName": "JASON", - "lastName": "YANG" - }, - "accessLevel": "user", - "skillsToMatch": [], - "projects": [], - "textingOk": false, - "managedProjects": [], - "isActive": true, - "_id": "5e38d1568d52770017ae8a86", - "email": "JASC68.JYANG@GMAIL.COM", - "currentRole": "DATA SCIENTIST", - "desiredRole": "DATA SCIENTIST", - "newMember": false, - "firstAttended": "SEP 2019", - "createdDate": "2020-02-04T02:05:10.999Z", - "__v": 0 - }, - { - "name": { - "firstName": "Jonathon", - "lastName": "Dooley" - }, - "accessLevel": "user", - "skillsToMatch": [], - "projects": [], - "textingOk": false, - "managedProjects": [], - "isActive": true, - "_id": "5f0540b24f31cf00174d7538", - "email": "DooleyJM0902@student.laccd.edu", - "currentRole": "Student", - "desiredRole": "Developer", - "newMember": false, - "firstAttended": "JUL 2020", - "createdDate": "2020-07-08T03:42:42.621Z", - "__v": 0 - }, - { - "name": { - "firstName": "Jason", - "lastName": "Yang" - }, - "accessLevel": "user", - "skillsToMatch": [], - "projects": [], - "textingOk": false, - "managedProjects": [], - "isActive": true, - "_id": "5e66e6fcbe3e0b001761a814", - "email": "jasc68.jyang@gmail.com", - "currentRole": "data scientist", - "desiredRole": "data scientist", - "newMember": false, - "firstAttended": "SEP 2019", - "createdDate": "2020-03-10T01:01:48.651Z", - "__v": 0, - "attendanceReason": "Open Data", - "currentProject": "None" - }, - { - "name": { - "firstName": "Julia", - "lastName": "Fong" - }, - "accessLevel": "user", - "skillsToMatch": [], - "projects": [], - "textingOk": false, - "managedProjects": [ - "5ec326c7a973810017c0de0c", - "619af6c86a8afa609cd5c419" - ], - "isActive": true, - "_id": "5e435911ef67e100175c1ecb", - "email": "Juliamzfong@gmail.com", - "currentRole": "fellow", - "desiredRole": "front-end developer", - "newMember": true, - "firstAttended": "JAN 2020", - "createdDate": "2020-02-12T01:46:57.788Z", - "__v": 0 - }, - { - "name": { - "firstName": "RYAN", - "lastName": "COLLINS" - }, - "accessLevel": "user", - "skillsToMatch": [], - "projects": [], - "textingOk": false, - "managedProjects": [], - "isActive": true, - "_id": "5e38d10a8d52770017ae8a81", - "email": "RMCOLLINS95@GMAIL.COM", - "currentRole": "NONE", - "desiredRole": "DATA SCIENTIST", - "newMember": true, - "firstAttended": "JAN 2020", - "createdDate": "2020-02-04T02:03:54.970Z", - "__v": 0 - }, - { - "name": { - "firstName": "asim", - "lastName": "rahman" - }, - "accessLevel": "user", - "skillsToMatch": [], - "projects": [], - "textingOk": false, - "managedProjects": [], - "isActive": true, - "_id": "60d775d47eec843150f7cd7b", - "email": "6541.ny636fm@gmail.com", - "currentRole": "sdfasd", - "desiredRole": "asdasd", - "newMember": true, - "firstAttended": "JUN 2021", - "createdDate": "2021-06-26T18:45:40.564Z", - "__v": 0 - }, - { - "name": { - "firstName": "asim", - "lastName": "rahman" - }, - "accessLevel": "user", - "skillsToMatch": [], - "projects": [], - "textingOk": false, - "managedProjects": [], - "isActive": true, - "_id": "60d76d8a7eec843150f7cd6c", - "email": "olafake908@gmail.com", - "currentRole": "asd", - "desiredRole": "asd", - "newMember": true, - "firstAttended": "JUN 2021", - "createdDate": "2021-06-26T18:10:18.494Z", - "__v": 0 - }, - { - "name": { - "firstName": "asim", - "lastName": "rahman" - }, - "accessLevel": "user", - "skillsToMatch": [], - "projects": [], - "textingOk": false, - "managedProjects": [], - "isActive": true, - "_id": "60d775727eec843150f7cd76", - "email": "6541.ny619@gmail.com", - "currentRole": "sdfasd", - "desiredRole": "asdasd", - "newMember": true, - "firstAttended": "JUN 2021", - "createdDate": "2021-06-26T18:44:02.536Z", - "__v": 0 - }, - { - "name": { - "firstName": "Bojack", - "lastName": "Horseman" - }, - "accessLevel": "user", - "skillsToMatch": [], - "projects": [], - "textingOk": false, - "managedProjects": [], - "isActive": true, - "_id": "64163f87ffac46b48a4abc87", - "email": "bojack@gmail.com", - "currentRole": "horse", - "desiredRole": "man", - "newMember": true, - "firstAttended": "MAR 2023", - "createdDate": "2023-03-18T22:47:35.138Z", - "__v": 0 - }, - { - "name": { - "firstName": "Chris", - "lastName": "Schmitz" - }, - "accessLevel": "user", - "skillsToMatch": [], - "projects": [], - "textingOk": false, - "managedProjects": [], - "isActive": true, - "_id": "5e30ee120b9d2300177d3b38", - "email": "Christopher.A.Schmitz@gmail.com", - "currentRole": "Mobile Engineering Manager", - "desiredRole": "Mobile Engineering Manager", - "newMember": false, - "firstAttended": "MAY 2017", - "createdDate": "2020-01-29T02:29:38.664Z", - "__v": 0, - "attendanceReason": "Civic Engagement", - "currentProject": "None" - }, - { - "name": { - "firstName": "asim", - "lastName": "rahman" - }, - "accessLevel": "user", - "skillsToMatch": [], - "projects": [], - "textingOk": false, - "managedProjects": [], - "isActive": true, - "_id": "60d77e950707783dc86d6103", - "email": "234asim.radhat@gmail.com34a", - "currentRole": "sdf", - "desiredRole": "sdf", - "newMember": true, - "firstAttended": "JUN 2021", - "createdDate": "2021-06-26T19:23:01.866Z", - "__v": 0 - } - ] \ No newline at end of file From c528c6987d420f0a18b8f935ab03868353f656c0 Mon Sep 17 00:00:00 2001 From: Danny Prikazsky Date: Mon, 4 Aug 2025 20:07:08 -0700 Subject: [PATCH 12/12] Remove mongo url from code, and clean up notes --- .../python/env/Duplicate Removal.ipynb | 323 +++--------------- 1 file changed, 41 insertions(+), 282 deletions(-) diff --git a/backend/scripts/python/env/Duplicate Removal.ipynb b/backend/scripts/python/env/Duplicate Removal.ipynb index b8468dba6..847f9e455 100644 --- a/backend/scripts/python/env/Duplicate Removal.ipynb +++ b/backend/scripts/python/env/Duplicate Removal.ipynb @@ -14,7 +14,7 @@ }, { "cell_type": "code", - "execution_count": 52, + "execution_count": 2, "id": "bf6a5708-01b8-4439-b085-996a0b9309df", "metadata": {}, "outputs": [], @@ -28,7 +28,8 @@ "from functools import reduce\n", "\n", "load_dotenv()\n", - "custom_request_header = os.getenv(\"CUSTOM_REQUEST_HEADER\")" + "custom_request_header = os.getenv(\"CUSTOM_REQUEST_HEADER\")\n", + "DATABASE_URL = os.getenv(\"DATABASE_URL\")" ] }, { @@ -189,7 +190,7 @@ "source": [ "## Removing duplicate users\n", "\n", - "These functions will determine which user for a set of users with the same email has the oldest `createdDate` and then merge duplicate users into the oldest user document for that email." + "These following cells will order the userIds in the duped_emails dict from oldest to newest, merge the information from new user documents into the oldest one, update the original, and then delete the duplicates. It will also keep track of the duplicate user documents' userIds so that we can change any checkins later to have the userId of the original user document." ] }, { @@ -207,11 +208,12 @@ }, { "cell_type": "code", - "execution_count": 77, + "execution_count": 8, "id": "6270f2b8-aa8d-446f-8ee8-7dcbac3a4cc7", "metadata": {}, "outputs": [], "source": [ + "# This will be used later for updating checkins\n", "ids_to_replace = {}" ] }, @@ -256,6 +258,18 @@ " return older_id" ] }, + { + "cell_type": "code", + "execution_count": 57, + "id": "78ccbbd6-7fea-4657-bf9a-850bd9e68505", + "metadata": {}, + "outputs": [], + "source": [ + "def delete_user(user_id):\n", + " r = requests.delete(USER_API_URL + '/' + user_id, headers=HEADERS)\n", + " print(r.content)" + ] + }, { "cell_type": "code", "execution_count": 79, @@ -265,36 +279,36 @@ "source": [ "for lowercase_email in duped_emails.keys():\n", " reduce(merge_users, duped_emails[lowercase_email])\n", - " #correct_user_id = duped_emails[lower_case_email][0]\n", - " #update_user(correct_user_id, user_dict[correct_user_id])" + " correct_user_id = duped_emails[lower_case_email][0]\n", + " dupes = duped_emails[lower_case_email][1:]\n", + " update_user(correct_user_id, user_dict[correct_user_id])\n", + " for dupe in dupes:\n", + " delete_user(dupe)" ] }, { - "cell_type": "code", - "execution_count": null, - "id": "5d0c235b-b593-40e8-8190-745915529349", + "cell_type": "markdown", + "id": "d57d8cac-0873-42ee-86ca-aa61e8be119f", "metadata": {}, - "outputs": [], - "source": [] + "source": [ + "## Correcting Checkins\n", + "\n", + "With the following cells, we use pymongo becuase our API does not expose any endpoints for editing checkins. For each duplicate_id, we will find all checkins with that userId and replace it with the id of the original user document." + ] }, { "cell_type": "code", - "execution_count": 80, + "execution_count": 9, "id": "2d6d014b-71c4-44a0-8247-8d10f87a2f1a", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "{'633b9a74d98663001f8b5c46': '5e965e554e2fc70017aa3970',\n", - " '5e66e6fcbe3e0b001761a814': '5e38d1568d52770017ae8a86',\n", - " '5e30ee300b9d2300177d3b3a': '5e27b1e54530cd0017eee431',\n", - " '5e7965101e29ad00179399bb': '5e7024f88e1bee00178aa1e0',\n", - " '6871afe1e6bf590aede8f8da': '678f122e4c6f61002a1e5e68',\n", - " '6871b00ee6bf590aede8f8db': '678f122e4c6f61002a1e5e68'}" + "{}" ] }, - "execution_count": 80, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -305,30 +319,18 @@ }, { "cell_type": "code", - "execution_count": 57, - "id": "78ccbbd6-7fea-4657-bf9a-850bd9e68505", - "metadata": {}, - "outputs": [], - "source": [ - "def delete_user(user_id):\n", - " r = requests.delete(USER_API_URL + '/' + user_id, headers=HEADERS)\n", - " print(r.content)" - ] - }, - { - "cell_type": "code", - "execution_count": 60, + "execution_count": 3, "id": "5f4ec365-ab7c-45fe-bfbe-b35cdbb0ec94", "metadata": {}, "outputs": [], "source": [ "from pymongo import MongoClient\n", - "client = MongoClient(\"mongodb+srv://editor:557Ith3jq7ap2mnO@cluster0.5buwz.mongodb.net/vrms-test?retryWrites=true&w=majority\")" + "client = MongoClient(DATABASE_URL)" ] }, { "cell_type": "code", - "execution_count": 61, + "execution_count": 4, "id": "8556cf83-a412-4ee0-bfde-0447b63d6ac4", "metadata": {}, "outputs": [], @@ -338,7 +340,7 @@ }, { "cell_type": "code", - "execution_count": 63, + "execution_count": 5, "id": "87d77a37-c404-4aa9-9b0a-7db74039db17", "metadata": {}, "outputs": [], @@ -346,252 +348,6 @@ "col = db['checkins']" ] }, - { - "cell_type": "code", - "execution_count": 86, - "id": "bcb71f5b-21d7-4929-a404-9771ba34cdfa", - "metadata": {}, - "outputs": [], - "source": [ - "checkins = col.find({'userId': {'$in': list(ids_to_replace.keys())}})" - ] - }, - { - "cell_type": "code", - "execution_count": 87, - "id": "314dd491-6a2e-442e-8dd2-08ba9dc08f0a", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[{'_id': ObjectId('5f3f15e9e2779b0017bc88d8'),\n", - " 'checkedIn': True,\n", - " 'userId': '5e30ee300b9d2300177d3b3a',\n", - " 'eventId': '5f3e81e2e2779b0017bc88d7',\n", - " 'createdDate': '2020-08-21T00:31:37.569Z',\n", - " '__v': 0},\n", - " {'_id': ObjectId('5e30ee300b9d2300177d3b3b'),\n", - " 'checkedIn': True,\n", - " 'userId': '5e30ee300b9d2300177d3b3a',\n", - " 'eventId': '5e30daf244708e3726e15892',\n", - " 'createdDate': '2020-01-29T02:30:08.206Z',\n", - " '__v': 0},\n", - " {'_id': ObjectId('5e3a2a1360ee280017fee686'),\n", - " 'checkedIn': True,\n", - " 'userId': '5e30ee300b9d2300177d3b3a',\n", - " 'eventId': '5e39c1143c5e1c83597507e6',\n", - " 'createdDate': '2020-02-05T02:36:03.953Z',\n", - " '__v': 0},\n", - " {'_id': ObjectId('5f2caef2be8892001782f57b'),\n", - " 'checkedIn': True,\n", - " 'userId': '5e30ee300b9d2300177d3b3a',\n", - " 'eventId': '5f2c0ce284ecb10017ad1ae7',\n", - " 'createdDate': '2020-08-07T01:31:30.555Z',\n", - " '__v': 0},\n", - " {'_id': ObjectId('5e55da1df9819c00173fb2c2'),\n", - " 'checkedIn': True,\n", - " 'userId': '5e30ee300b9d2300177d3b3a',\n", - " 'eventId': '5e55a3559f734cce1e7de80f',\n", - " 'createdDate': '2020-02-26T02:38:21.252Z',\n", - " '__v': 0},\n", - " {'_id': ObjectId('5e8d2bc2e48350001771acc6'),\n", - " 'checkedIn': True,\n", - " 'userId': '5e30ee300b9d2300177d3b3a',\n", - " 'eventId': '5e8d1bb738f9b52492942030',\n", - " 'createdDate': '2020-04-08T01:41:22.376Z',\n", - " '__v': 0},\n", - " {'_id': ObjectId('5e4366e3ef67e100175c1f05'),\n", - " 'checkedIn': True,\n", - " 'userId': '5e30ee300b9d2300177d3b3a',\n", - " 'eventId': '5e43532d10ea9f049c245b0c',\n", - " 'createdDate': '2020-02-12T02:45:55.105Z',\n", - " '__v': 0},\n", - " {'_id': ObjectId('5e6847beaee6af0017b3fa0c'),\n", - " 'checkedIn': True,\n", - " 'userId': '5e30ee300b9d2300177d3b3a',\n", - " 'eventId': '5e68317c20f87d5df925b954',\n", - " 'createdDate': '2020-03-11T02:06:54.380Z',\n", - " '__v': 0},\n", - " {'_id': ObjectId('5e7025438e1bee00178aa1e4'),\n", - " 'checkedIn': True,\n", - " 'userId': '5e66e6fcbe3e0b001761a814',\n", - " 'eventId': '5e701ba16d144781e897ec6a',\n", - " 'createdDate': '2020-03-17T01:17:55.140Z',\n", - " '__v': 0},\n", - " {'_id': ObjectId('5e5f184803a60700179ba4f6'),\n", - " 'checkedIn': True,\n", - " 'userId': '5e30ee300b9d2300177d3b3a',\n", - " 'eventId': '5e5ec5cb8cea701b148fceae',\n", - " 'createdDate': '2020-03-04T02:54:00.467Z',\n", - " '__v': 0},\n", - " {'_id': ObjectId('5e4c9f4ab73a2a001732f493'),\n", - " 'checkedIn': True,\n", - " 'userId': '5e30ee300b9d2300177d3b3a',\n", - " 'eventId': '5e4c7985ec7f0743c0db510e',\n", - " 'createdDate': '2020-02-19T02:36:58.842Z',\n", - " '__v': 0},\n", - " {'_id': ObjectId('5e7965101e29ad00179399bc'),\n", - " 'checkedIn': True,\n", - " 'userId': '5e7965101e29ad00179399bb',\n", - " 'eventId': '5e794aef824fb04480af6959',\n", - " 'createdDate': '2020-03-24T01:40:32.769Z',\n", - " '__v': 0},\n", - " {'_id': ObjectId('5e66e6fcbe3e0b001761a815'),\n", - " 'checkedIn': True,\n", - " 'userId': '5e66e6fcbe3e0b001761a814',\n", - " 'eventId': '5e62b17b126c67427c5ce89f',\n", - " 'createdDate': '2020-03-10T01:01:48.845Z',\n", - " '__v': 0},\n", - " {'_id': ObjectId('5e7ab614d84f53001763f366'),\n", - " 'checkedIn': True,\n", - " 'userId': '5e30ee300b9d2300177d3b3a',\n", - " 'eventId': '5e7aa5a0c01bb54cdd36065c',\n", - " 'createdDate': '2020-03-25T01:38:28.198Z',\n", - " '__v': 0},\n", - " {'_id': ObjectId('5ed70511aa37d600179e450d'),\n", - " 'checkedIn': True,\n", - " 'userId': '5e30ee300b9d2300177d3b3a',\n", - " 'eventId': '5ed65b618e8b770017a9ce1e',\n", - " 'createdDate': '2020-06-03T02:04:01.552Z',\n", - " '__v': 0},\n", - " {'_id': ObjectId('5ec48f6d200f8b0017836d80'),\n", - " 'checkedIn': True,\n", - " 'userId': '5e30ee300b9d2300177d3b3a',\n", - " 'eventId': '5ec42cb16b7a9a00172a553d',\n", - " 'createdDate': '2020-05-20T02:01:17.031Z',\n", - " '__v': 0},\n", - " {'_id': ObjectId('5ed0874dca666400174b1e8c'),\n", - " 'checkedIn': True,\n", - " 'userId': '5e30ee300b9d2300177d3b3a',\n", - " 'eventId': '5ecfc3e1d6cbd800171847da',\n", - " 'createdDate': '2020-05-29T03:53:49.927Z',\n", - " '__v': 0},\n", - " {'_id': ObjectId('5ecdd407cef75e0017cf30f0'),\n", - " 'checkedIn': True,\n", - " 'userId': '5e30ee300b9d2300177d3b3a',\n", - " 'eventId': '5ecd20e179d6dc0017ac7db8',\n", - " 'createdDate': '2020-05-27T02:44:23.211Z',\n", - " '__v': 0},\n", - " {'_id': ObjectId('5ec7321329fb2d00172635ab'),\n", - " 'checkedIn': True,\n", - " 'userId': '5e30ee300b9d2300177d3b3a',\n", - " 'eventId': '5ec68961f504e90017fb11ff',\n", - " 'createdDate': '2020-05-22T01:59:47.999Z',\n", - " '__v': 0},\n", - " {'_id': ObjectId('5ebb4dd187aa6d00176227a6'),\n", - " 'checkedIn': True,\n", - " 'userId': '5e30ee300b9d2300177d3b3a',\n", - " 'eventId': '5ebb300c950796fde6cbe700',\n", - " 'createdDate': '2020-05-13T01:30:57.239Z',\n", - " '__v': 0},\n", - " {'_id': ObjectId('5ee03c6feab2e60017c44a28'),\n", - " 'checkedIn': True,\n", - " 'userId': '5e30ee300b9d2300177d3b3a',\n", - " 'eventId': '5edf95e1eab2e60017c44a1a',\n", - " 'createdDate': '2020-06-10T01:50:39.600Z',\n", - " '__v': 0},\n", - " {'_id': ObjectId('5eec1d8f411acc001748698d'),\n", - " 'checkedIn': True,\n", - " 'userId': '5e30ee300b9d2300177d3b3a',\n", - " 'eventId': '5eeb73616e32610017f67765',\n", - " 'createdDate': '2020-06-19T02:06:07.142Z',\n", - " '__v': 0},\n", - " {'_id': ObjectId('5ebdf93405963b0017c6dd5a'),\n", - " 'checkedIn': True,\n", - " 'userId': '5e30ee300b9d2300177d3b3a',\n", - " 'eventId': '5ebde5cf0b0d433916a4225f',\n", - " 'createdDate': '2020-05-15T02:06:44.938Z',\n", - " '__v': 0},\n", - " {'_id': ObjectId('5ee2e28b1ebf83001752b3ff'),\n", - " 'checkedIn': True,\n", - " 'userId': '5e30ee300b9d2300177d3b3a',\n", - " 'eventId': '5ee238e1895aad0017ee61b5',\n", - " 'createdDate': '2020-06-12T02:03:55.863Z',\n", - " '__v': 0},\n", - " {'_id': ObjectId('5efbed24ff13eb00172d4408'),\n", - " 'checkedIn': True,\n", - " 'userId': '5e30ee300b9d2300177d3b3a',\n", - " 'eventId': '5efb4562e0e39f0017c07d65',\n", - " 'createdDate': '2020-07-01T01:55:48.023Z',\n", - " '__v': 0},\n", - " {'_id': ObjectId('5f07d61d4927d900172f2ffc'),\n", - " 'checkedIn': True,\n", - " 'userId': '5e30ee300b9d2300177d3b3a',\n", - " 'eventId': '5f0722e13e45cb00176c72ca',\n", - " 'createdDate': '2020-07-10T02:44:45.363Z',\n", - " '__v': 0},\n", - " {'_id': ObjectId('5f0e65e204bebb0017213db0'),\n", - " 'checkedIn': True,\n", - " 'userId': '5e30ee300b9d2300177d3b3a',\n", - " 'eventId': '5f0dba6204bebb0017213da1',\n", - " 'createdDate': '2020-07-15T02:11:46.994Z',\n", - " '__v': 0},\n", - " {'_id': ObjectId('5f11065fd4c79f0017d25c6e'),\n", - " 'checkedIn': True,\n", - " 'userId': '5e30ee300b9d2300177d3b3a',\n", - " 'eventId': '5f105d62fdf82f0017e51aed',\n", - " 'createdDate': '2020-07-17T02:01:03.822Z',\n", - " '__v': 0},\n", - " {'_id': ObjectId('5f052ce54f31cf00174d7528'),\n", - " 'checkedIn': True,\n", - " 'userId': '5e30ee300b9d2300177d3b3a',\n", - " 'eventId': '5f047fe24adb5d001718c29f',\n", - " 'createdDate': '2020-07-08T02:18:13.140Z',\n", - " '__v': 0},\n", - " {'_id': ObjectId('5efe90057ecb370017c91b7b'),\n", - " 'checkedIn': True,\n", - " 'userId': '5e30ee300b9d2300177d3b3a',\n", - " 'eventId': '5efde8622bbd0c00177f5149',\n", - " 'createdDate': '2020-07-03T01:55:17.198Z',\n", - " '__v': 0},\n", - " {'_id': ObjectId('5f1a41acebfea60017126ba3'),\n", - " 'checkedIn': True,\n", - " 'userId': '5e30ee300b9d2300177d3b3a',\n", - " 'eventId': '5f1997e2896f200017a5833d',\n", - " 'createdDate': '2020-07-24T02:04:28.122Z',\n", - " '__v': 0},\n", - " {'_id': ObjectId('5f237addcfc86b00176cc139'),\n", - " 'checkedIn': True,\n", - " 'userId': '5e30ee300b9d2300177d3b3a',\n", - " 'eventId': '5f22d262cfc86b00176cc134',\n", - " 'createdDate': '2020-07-31T01:58:53.529Z',\n", - " '__v': 0},\n", - " {'_id': ObjectId('633b9a74d98663001f8b5c47'),\n", - " 'checkedIn': True,\n", - " 'userId': '633b9a74d98663001f8b5c46',\n", - " 'eventId': '62dc8b87d98663001f8b5a6c',\n", - " 'createdDate': datetime.datetime(2022, 10, 4, 2, 29, 8, 559000),\n", - " '__v': 0},\n", - " {'_id': ObjectId('64ae5c1883e28253b58b5bdc'),\n", - " 'checkedIn': True,\n", - " 'userId': '633b9a74d98663001f8b5c46',\n", - " 'eventId': '64ae3a5cf95a2e001f630b5f',\n", - " 'createdDate': datetime.datetime(2023, 7, 12, 7, 54, 0, 672000),\n", - " '__v': 0},\n", - " {'_id': ObjectId('64ae5ed9f95a2e001f630b60'),\n", - " 'checkedIn': True,\n", - " 'userId': '633b9a74d98663001f8b5c46',\n", - " 'eventId': 'qqqqqq',\n", - " 'createdDate': datetime.datetime(2023, 7, 12, 8, 5, 45, 22000),\n", - " '__v': 0},\n", - " {'_id': ObjectId('6549cbb97dfe210021abb385'),\n", - " 'checkedIn': True,\n", - " 'userId': '633b9a74d98663001f8b5c46',\n", - " 'eventId': '65497e0e7dfe210021abb379',\n", - " 'createdDate': datetime.datetime(2023, 11, 7, 5, 31, 37, 125000),\n", - " '__v': 0}]" - ] - }, - "execution_count": 87, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "list(checkins)" - ] - }, { "cell_type": "code", "execution_count": 89, @@ -619,7 +375,10 @@ "id": "2db985ba-fab9-4820-9d67-ed42f8f1ba03", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "for duplicate_user_id in ids_to_replace.keys():\n", + " col.update_many({'userId': duplicate_user_id}, {'$set': {'userId': ids_to_replace[duplicate_user_id]}})" + ] } ], "metadata": {