Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions backend/scripts/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Jupyter Notebook checkpoints
.ipynb_checkpoints/

# Jupyter runtime files
.jupyter/

# Hidden notebook state (metadata, autosaves)
*.nbconvert.ipynb
*.nbconvert/

# VSCode / PyCharm or other IDE junk (optional, but useful)
.vscode/
.idea/

# Python cache
__pycache__/
*.pyc
*.pyo
*.pyd
*.pkl

# OS files
.DS_Store
Thumbs.db

# Virtual environments
venv/

# Additional text files
adjustedreqs.txt
2 changes: 0 additions & 2 deletions backend/scripts/python/env/.gitignore

This file was deleted.

2 changes: 1 addition & 1 deletion backend/scripts/python/env/Duplicate Removal.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -397,7 +397,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.1"
"version": "3.8.10"
}
},
"nbformat": 4,
Expand Down
317 changes: 317 additions & 0 deletions backend/scripts/python/env/Populate Projects.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,317 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "b9b6c5e5-4b20-4407-9542-3bea81ab742e",
"metadata": {},
"source": [
"# Setup\n",
"For dev, you must have the backend api running on your computer. For prod, please change USER_API_URL to reflect the production url."
]
},
{
"cell_type": "code",
"execution_count": 393,
"id": "d04b046c-ad92-4f9b-a7d1-c900c1ff4581",
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"import json\n",
"import os\n",
"import re\n",
"import pprint as pp\n",
"from dotenv import load_dotenv\n",
"from bson.objectid import ObjectId\n",
"from datetime import datetime\n",
"from functools import reduce\n",
"from pymongo import MongoClient, ReturnDocument, UpdateOne\n",
"from pymongo.errors import BulkWriteError\n",
"\n",
"load_dotenv()\n",
"custom_request_header = os.getenv(\"CUSTOM_REQUEST_HEADER\")\n",
"DATABASE_URL = os.getenv(\"DATABASE_URL\")"
]
},
{
"cell_type": "markdown",
"id": "852bea67-8354-49df-b6fb-c766f305ee8a",
"metadata": {},
"source": [
"# Connect to database and check current list of DBs"
]
},
{
"cell_type": "code",
"execution_count": 395,
"id": "33d48fca-a40d-4619-b97b-46b598258967",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['backup_db', 'testdb', 'vrms-populate-projects-test', 'vrms-slack-dev', 'vrms-slack-main', 'vrms-slack-staging', 'vrms-test', 'vrms-test-2', 'vrms-test-3', 'vrms-test-4', 'vrms-test-5', 'vrms-test-6', 'vrms-test-clone-project-sync', 'vrms-test-copy', 'vrms-test-sync', 'vrms-user-migration-test', 'admin', 'local']\n"
]
}
],
"source": [
"# Connect to MongoDB\n",
"client = MongoClient(DATABASE_URL)\n",
"print(client.list_database_names())"
]
},
{
"cell_type": "markdown",
"id": "0e4d3414-f130-4e76-9506-efd468d401df",
"metadata": {},
"source": [
"# Create a new test database\n",
"\n",
"Define a source and copy for databases\n"
]
},
{
"cell_type": "code",
"execution_count": 396,
"id": "68a7e8a9-e3f3-4231-8424-8b8dd44f522f",
"metadata": {},
"outputs": [],
"source": [
"db_source = client['vrms-test']\n",
"db_copy = client['vrms-populate-projects-test']"
]
},
{
"cell_type": "markdown",
"id": "6565ea84-e799-40d0-a56b-7859620db461",
"metadata": {},
"source": [
"# Drop all collections in test database (ONLY IF NECESSARY!)\n"
]
},
{
"cell_type": "code",
"execution_count": 405,
"id": "a4cb07f2-3e55-4a2e-8358-96bf67ebf354",
"metadata": {},
"outputs": [],
"source": [
"# for collection_name in db_copy.list_collection_names():\n",
"# db_copy.drop_collection(collection_name)\n",
"# print(f\"Dropped collection: {collection_name}\")"
]
},
{
"cell_type": "markdown",
"id": "141b69ae-a407-4c41-a551-33f547244eb0",
"metadata": {},
"source": [
"# Copy Users and Projects collections from source -> test databases\n"
]
},
{
"cell_type": "code",
"execution_count": 398,
"id": "fd46eb06-d246-455e-8f48-a4e5df0efc9a",
"metadata": {},
"outputs": [],
"source": [
"users_collection = db_source['users']\n",
"users = list(users_collection.find())\n",
"projects_collection = db_source['projects']\n",
"projects = list(projects_collection.find())\n",
"\n",
"users_copy = db_copy['users']\n",
"projects_copy = db_copy['projects']\n",
"\n",
"try:\n",
" users_copy.insert_many(users, ordered=False) # Copy source db users to test db users\n",
" projects_copy.insert_many(projects, ordered=False) # Copy source db projects to test db projects\n",
"except BulkWriteError as bwe:\n",
" print(\"BulkWriteError details:\")\n",
" print(bwe.details) # This contains info on which documents failed and why"
]
},
{
"cell_type": "markdown",
"id": "0c8b8712-7654-4f42-96c2-3809d33d214a",
"metadata": {},
"source": [
"# Get Users with at least one managedProjects\n",
"\n",
"Retrieve a list of all users with at least one managedProject.\n"
]
},
{
"cell_type": "code",
"execution_count": 399,
"id": "d4f52891-72c0-440c-8ef1-0f2102cebdb1",
"metadata": {},
"outputs": [],
"source": [
"query = {\n",
" \"managedProjects\": { \n",
" \"$exists\": True, \n",
" \"$not\": { \"$size\": 0 } \n",
" }\n",
"}\n",
"\n",
"target_users = list(users_copy.find(query))"
]
},
{
"cell_type": "markdown",
"id": "de61c365-ec09-4acf-b863-221067f988db",
"metadata": {},
"source": [
"# Create an dictionary called `projects_users`\n",
"\n",
"The dict has project IDs as keys and arrays of user IDs as values\n"
]
},
{
"cell_type": "code",
"execution_count": 400,
"id": "dd384405-c9bc-4b00-bb9b-8dcd4be0e9ba",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'68a3e64ee2653c001fe3ff3b': [ObjectId('6481155fab091f001e30925b'),\n",
" ObjectId('66024c13e6a0050028e07948'),\n",
" ObjectId('670dd397cace6a002abb20ce')],\n",
" '68a3e75ea19d60385b3938f8': [ObjectId('670dd397cace6a002abb20ce')]}\n"
]
}
],
"source": [
"projects_users = {}\n",
"\n",
"# Function to filter only projects with valid mongoose IDs\n",
"def filter_valid_mongoose_ids(id_list):\n",
" return [x for x in id_list if ObjectId.is_valid(x)]\n",
"\n",
"for user in target_users:\n",
" # Destructure id and managed projects from user\n",
" _id, managed_projects = user['_id'], user['managedProjects']\n",
"\n",
" # Filter projects\n",
" filtered_projects = filter_valid_mongoose_ids(managed_projects)\n",
"\n",
" for proj_id in filtered_projects:\n",
" if proj_id in projects_users:\n",
" projects_users[f\"{proj_id}\"].append(_id)\n",
" else:\n",
" projects_users[f\"{proj_id}\"] = [_id]\n",
"\n",
"pp.pprint(projects_users)"
]
},
{
"cell_type": "markdown",
"id": "a34d198a-ce32-41af-b4e2-2be590a6f5a6",
"metadata": {},
"source": [
"# Update `managedByUsers` field in Projects \n",
"\n",
"Update all project's `managedByUsers` array using bulk write"
]
},
{
"cell_type": "code",
"execution_count": 404,
"id": "f280d029-47ed-46ef-a8d1-731071600a49",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Project before update:\n",
"{'__v': 0,\n",
" '_id': ObjectId('68a3e64ee2653c001fe3ff3b'),\n",
" 'createdDate': datetime.datetime(2025, 8, 19, 2, 49, 50, 843000),\n",
" 'description': 'Testing...',\n",
" 'githubIdentifier': 'lkjlkj',\n",
" 'githubUrl': 'lkjlk',\n",
" 'googleDriveUrl': 'https://drive.google.com/drive/folders/1hAq0wyZKOaZLujqOYiaFv5PYgooISger?usp=drive_link',\n",
" 'hflaWebsiteUrl': 'lkjlkj',\n",
" 'managedByUsers': [ObjectId('6481155fab091f001e30925b'),\n",
" ObjectId('66024c13e6a0050028e07948'),\n",
" ObjectId('670dd397cace6a002abb20ce')],\n",
" 'name': 'Jacks Test Project',\n",
" 'partners': [],\n",
" 'projectStatus': 'Active',\n",
" 'recruitingCategories': [],\n",
" 'slackUrl': 'lkjlkj'}\n",
"Project before update:\n",
"{'__v': 0,\n",
" '_id': ObjectId('68a3e75ea19d60385b3938f8'),\n",
" 'createdDate': datetime.datetime(2025, 8, 19, 2, 54, 22, 871000),\n",
" 'description': 'afk',\n",
" 'githubIdentifier': 'afk',\n",
" 'githubUrl': 'afk',\n",
" 'googleDriveUrl': 'https://drive.google.com/test',\n",
" 'hflaWebsiteUrl': 'afk',\n",
" 'managedByUsers': [ObjectId('670dd397cace6a002abb20ce')],\n",
" 'name': 'VRMS Test Project',\n",
" 'partners': [],\n",
" 'projectStatus': 'Active',\n",
" 'recruitingCategories': [],\n",
" 'slackUrl': 'afk'}\n",
"Result: BulkWriteResult({'writeErrors': [], 'writeConcernErrors': [], 'nInserted': 0, 'nUpserted': 0, 'nMatched': 2, 'nModified': 0, 'nRemoved': 0, 'upserted': []}, acknowledged=True)\n"
]
}
],
"source": [
"operations = []\n",
"\n",
"for proj_id, user_ids in projects_users.items():\n",
" valid_user_ids = [uid for uid in user_ids if ObjectId.is_valid(uid)] \n",
"\n",
" proj = projects_copy.find_one({\"_id\": ObjectId(proj_id)})\n",
"\n",
" if proj:\n",
" print('Project before update:')\n",
" pp.pprint(proj)\n",
" \n",
" # Compile individual updates in operations \n",
" operations.append(UpdateOne(\n",
" {\"_id\": ObjectId(proj_id)}, # Filter\n",
" {\"$set\": {\"managedByUsers\": valid_user_ids}}, # Update\n",
" ))\n",
" else:\n",
" print(f\"No project with {proj_id} found\")\n",
"\n",
"# Execute the bulk write to update operations\n",
"result = projects_copy.bulk_write(operations)\n",
"\n",
"print(f\"Result: \", result)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Loading
Loading