Update user_id in extractions collection (#145)

Michael-D-Johnson · Michael D. Johnson · max-zilla · web-flow · commit e84df1946a92 · 2020-12-01T17:16:02.000-06:00
* adding script to add user_id to documents in extractions collection if does not exist

* moving authorID == null declaration within loop. renaming findAuthor... to foundAuthor... to be more consistent with foundFile

* updating comments. adding update if job_id exists

* moving UpdateUserId.js to scripts/updates. Updated documentation for script and added contributer

Co-authored-by: Michael D. Johnson &lt;michaeldjohnson@control.local&gt;
Co-authored-by: Max Burnette &lt;mburnet88@gmail.com&gt;
Co-authored-by: Luigi Marini &lt;lmarini@users.noreply.github.com&gt;
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
 
 ### Added
 - Added support for Amplitude clickstream tracking. See Admin -> Customize to configure Amplitude apikey.
+- UpdateUserId.js to scripts/updates. This code adds user_id to each document in extractions collection in mongodb. 
+  user_id is taken from author id in uploads.files if exists, else it taken from author id in datasets collection.
 - Ability to submit multiple selected files within a dataset to an extractor.
 
 ### Fixed
diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md
@@ -21,6 +21,7 @@ Following is a list of contributors in alphabetical order:
 - Mario Felarca
 - Max Burnette
 - Michal Ondrejcek
+- Michael Johnson
 - Michelle Pitcel
 - Mike Bobak
 - Mike Lambert
diff --git a/scripts/updates/README.md b/scripts/updates/README.md
@@ -9,6 +9,8 @@ are named with the actual name of the update .js
 - update-avatar-url-to-https.js
 
 
-MISCALANOUS SCRIPTS:
+MISCELLANEOUS SCRIPTS:
 
-fix-counts.js: script to redo the counts in clowder
+- fix-counts.js: script to redo the counts in clowder
+
+- UpdateUserId.js: Adds user_id to documents in extractions collection in clowder mongo db. Uses author id in uploads.files if exists, else it takes the author id from datasets collection. Usage: mongo clowder UpdateUserId.js
diff --git a/scripts/updates/UpdateUserId.js b/scripts/updates/UpdateUserId.js
@@ -0,0 +1,46 @@
+/***
+This code iterates through each document where user_id does not exist and checks if file_id exists
+in uploads.files collection. If a file exists, it grabs the author._id for use as user_id. If it
+does not exist (or if author._id is null), it searches for the file_id in the datasets collection.
+If found, it gets the author._id for use as the user_id. If an author id is found and not null
+an update to the extractions collection is made by adding user_id: author._id. 
+***/
+
+db.extractions.find({"user_id":{$exists: 0}}).forEach(function(ext) {
+    let authorID = null;
+    // Looping through each extraction where user_id doesn't exist,
+    // if file_id found in uploads.files, get author._id
+    let foundFile = db.uploads.files.findOne({"_id": ext.file_id})
+    if (foundFile != null) {
+        authorID = foundFile.author._id;
+    }
+
+    // If file not found in uploads.files or if author._id doesn't exist,
+    // look up file_id in datasets, get author.id if found
+    if (foundFile == null || authorID == null) {
+        let foundAuthorInDatasets = db.datasets.findOne({"files": {$in: [ext.file_id]}});
+        if (foundAuthorInDatasets != null) {
+            authorID = foundAuthorInDatasets.author._id;
+        }
+    }
+    if (authorID != null) {
+        // If job_id exists update author._id for all documents with job_id,
+        // else update based on the current document id.
+        if (ext.job_id != null) {
+            // Update user_id for entry in extractions database
+            db.extractions.update({"job_id": ext.job_id}, {
+                "$set": {
+                    "user_id": authorID
+                }
+            });
+        }
+        else {
+            // Update user_id for entry in extractions database
+            db.extractions.update({"_id": ext._id}, {
+                "$set": {
+                    "user_id": authorID
+                }
+            });
+        }
+    }
+});