diff --git a/backend/scripts/.gitignore b/backend/scripts/.gitignore new file mode 100644 index 000000000..801650518 --- /dev/null +++ b/backend/scripts/.gitignore @@ -0,0 +1,30 @@ +# Jupyter Notebook checkpoints +.ipynb_checkpoints/ + +# Jupyter runtime files +.jupyter/ + +# Hidden notebook state (metadata, autosaves) +*.nbconvert.ipynb +*.nbconvert/ + +# VSCode / PyCharm or other IDE junk (optional, but useful) +.vscode/ +.idea/ + +# Python cache +__pycache__/ +*.pyc +*.pyo +*.pyd +*.pkl + +# OS files +.DS_Store +Thumbs.db + +# Virtual environments +venv/ + +# Additional text files +adjustedreqs.txt \ No newline at end of file diff --git a/backend/scripts/python/env/.gitignore b/backend/scripts/python/env/.gitignore deleted file mode 100644 index f514b74c5..000000000 --- a/backend/scripts/python/env/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -# Created by venv; see https://docs.python.org/3/library/venv.html -* diff --git a/backend/scripts/python/env/Duplicate Removal.ipynb b/backend/scripts/python/env/Duplicate Removal.ipynb index 847f9e455..5e59938a9 100644 --- a/backend/scripts/python/env/Duplicate Removal.ipynb +++ b/backend/scripts/python/env/Duplicate Removal.ipynb @@ -397,7 +397,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.13.1" + "version": "3.8.10" } }, "nbformat": 4, diff --git a/backend/scripts/python/env/Populate Projects.ipynb b/backend/scripts/python/env/Populate Projects.ipynb new file mode 100644 index 000000000..36fd748cf --- /dev/null +++ b/backend/scripts/python/env/Populate Projects.ipynb @@ -0,0 +1,317 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "b9b6c5e5-4b20-4407-9542-3bea81ab742e", + "metadata": {}, + "source": [ + "# Setup\n", + "For dev, you must have the backend api running on your computer. For prod, please change USER_API_URL to reflect the production url." + ] + }, + { + "cell_type": "code", + "execution_count": 393, + "id": "d04b046c-ad92-4f9b-a7d1-c900c1ff4581", + "metadata": {}, + "outputs": [], + "source": [ + "import requests\n", + "import json\n", + "import os\n", + "import re\n", + "import pprint as pp\n", + "from dotenv import load_dotenv\n", + "from bson.objectid import ObjectId\n", + "from datetime import datetime\n", + "from functools import reduce\n", + "from pymongo import MongoClient, ReturnDocument, UpdateOne\n", + "from pymongo.errors import BulkWriteError\n", + "\n", + "load_dotenv()\n", + "custom_request_header = os.getenv(\"CUSTOM_REQUEST_HEADER\")\n", + "DATABASE_URL = os.getenv(\"DATABASE_URL\")" + ] + }, + { + "cell_type": "markdown", + "id": "852bea67-8354-49df-b6fb-c766f305ee8a", + "metadata": {}, + "source": [ + "# Connect to database and check current list of DBs" + ] + }, + { + "cell_type": "code", + "execution_count": 395, + "id": "33d48fca-a40d-4619-b97b-46b598258967", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['backup_db', 'testdb', 'vrms-populate-projects-test', 'vrms-slack-dev', 'vrms-slack-main', 'vrms-slack-staging', 'vrms-test', 'vrms-test-2', 'vrms-test-3', 'vrms-test-4', 'vrms-test-5', 'vrms-test-6', 'vrms-test-clone-project-sync', 'vrms-test-copy', 'vrms-test-sync', 'vrms-user-migration-test', 'admin', 'local']\n" + ] + } + ], + "source": [ + "# Connect to MongoDB\n", + "client = MongoClient(DATABASE_URL)\n", + "print(client.list_database_names())" + ] + }, + { + "cell_type": "markdown", + "id": "0e4d3414-f130-4e76-9506-efd468d401df", + "metadata": {}, + "source": [ + "# Create a new test database\n", + "\n", + "Define a source and copy for databases\n" + ] + }, + { + "cell_type": "code", + "execution_count": 396, + "id": "68a7e8a9-e3f3-4231-8424-8b8dd44f522f", + "metadata": {}, + "outputs": [], + "source": [ + "db_source = client['vrms-test']\n", + "db_copy = client['vrms-populate-projects-test']" + ] + }, + { + "cell_type": "markdown", + "id": "6565ea84-e799-40d0-a56b-7859620db461", + "metadata": {}, + "source": [ + "# Drop all collections in test database (ONLY IF NECESSARY!)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 405, + "id": "a4cb07f2-3e55-4a2e-8358-96bf67ebf354", + "metadata": {}, + "outputs": [], + "source": [ + "# for collection_name in db_copy.list_collection_names():\n", + "# db_copy.drop_collection(collection_name)\n", + "# print(f\"Dropped collection: {collection_name}\")" + ] + }, + { + "cell_type": "markdown", + "id": "141b69ae-a407-4c41-a551-33f547244eb0", + "metadata": {}, + "source": [ + "# Copy Users and Projects collections from source -> test databases\n" + ] + }, + { + "cell_type": "code", + "execution_count": 398, + "id": "fd46eb06-d246-455e-8f48-a4e5df0efc9a", + "metadata": {}, + "outputs": [], + "source": [ + "users_collection = db_source['users']\n", + "users = list(users_collection.find())\n", + "projects_collection = db_source['projects']\n", + "projects = list(projects_collection.find())\n", + "\n", + "users_copy = db_copy['users']\n", + "projects_copy = db_copy['projects']\n", + "\n", + "try:\n", + " users_copy.insert_many(users, ordered=False) # Copy source db users to test db users\n", + " projects_copy.insert_many(projects, ordered=False) # Copy source db projects to test db projects\n", + "except BulkWriteError as bwe:\n", + " print(\"BulkWriteError details:\")\n", + " print(bwe.details) # This contains info on which documents failed and why" + ] + }, + { + "cell_type": "markdown", + "id": "0c8b8712-7654-4f42-96c2-3809d33d214a", + "metadata": {}, + "source": [ + "# Get Users with at least one managedProjects\n", + "\n", + "Retrieve a list of all users with at least one managedProject.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 399, + "id": "d4f52891-72c0-440c-8ef1-0f2102cebdb1", + "metadata": {}, + "outputs": [], + "source": [ + "query = {\n", + " \"managedProjects\": { \n", + " \"$exists\": True, \n", + " \"$not\": { \"$size\": 0 } \n", + " }\n", + "}\n", + "\n", + "target_users = list(users_copy.find(query))" + ] + }, + { + "cell_type": "markdown", + "id": "de61c365-ec09-4acf-b863-221067f988db", + "metadata": {}, + "source": [ + "# Create an dictionary called `projects_users`\n", + "\n", + "The dict has project IDs as keys and arrays of user IDs as values\n" + ] + }, + { + "cell_type": "code", + "execution_count": 400, + "id": "dd384405-c9bc-4b00-bb9b-8dcd4be0e9ba", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'68a3e64ee2653c001fe3ff3b': [ObjectId('6481155fab091f001e30925b'),\n", + " ObjectId('66024c13e6a0050028e07948'),\n", + " ObjectId('670dd397cace6a002abb20ce')],\n", + " '68a3e75ea19d60385b3938f8': [ObjectId('670dd397cace6a002abb20ce')]}\n" + ] + } + ], + "source": [ + "projects_users = {}\n", + "\n", + "# Function to filter only projects with valid mongoose IDs\n", + "def filter_valid_mongoose_ids(id_list):\n", + " return [x for x in id_list if ObjectId.is_valid(x)]\n", + "\n", + "for user in target_users:\n", + " # Destructure id and managed projects from user\n", + " _id, managed_projects = user['_id'], user['managedProjects']\n", + "\n", + " # Filter projects\n", + " filtered_projects = filter_valid_mongoose_ids(managed_projects)\n", + "\n", + " for proj_id in filtered_projects:\n", + " if proj_id in projects_users:\n", + " projects_users[f\"{proj_id}\"].append(_id)\n", + " else:\n", + " projects_users[f\"{proj_id}\"] = [_id]\n", + "\n", + "pp.pprint(projects_users)" + ] + }, + { + "cell_type": "markdown", + "id": "a34d198a-ce32-41af-b4e2-2be590a6f5a6", + "metadata": {}, + "source": [ + "# Update `managedByUsers` field in Projects \n", + "\n", + "Update all project's `managedByUsers` array using bulk write" + ] + }, + { + "cell_type": "code", + "execution_count": 404, + "id": "f280d029-47ed-46ef-a8d1-731071600a49", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Project before update:\n", + "{'__v': 0,\n", + " '_id': ObjectId('68a3e64ee2653c001fe3ff3b'),\n", + " 'createdDate': datetime.datetime(2025, 8, 19, 2, 49, 50, 843000),\n", + " 'description': 'Testing...',\n", + " 'githubIdentifier': 'lkjlkj',\n", + " 'githubUrl': 'lkjlk',\n", + " 'googleDriveUrl': 'https://drive.google.com/drive/folders/1hAq0wyZKOaZLujqOYiaFv5PYgooISger?usp=drive_link',\n", + " 'hflaWebsiteUrl': 'lkjlkj',\n", + " 'managedByUsers': [ObjectId('6481155fab091f001e30925b'),\n", + " ObjectId('66024c13e6a0050028e07948'),\n", + " ObjectId('670dd397cace6a002abb20ce')],\n", + " 'name': 'Jacks Test Project',\n", + " 'partners': [],\n", + " 'projectStatus': 'Active',\n", + " 'recruitingCategories': [],\n", + " 'slackUrl': 'lkjlkj'}\n", + "Project before update:\n", + "{'__v': 0,\n", + " '_id': ObjectId('68a3e75ea19d60385b3938f8'),\n", + " 'createdDate': datetime.datetime(2025, 8, 19, 2, 54, 22, 871000),\n", + " 'description': 'afk',\n", + " 'githubIdentifier': 'afk',\n", + " 'githubUrl': 'afk',\n", + " 'googleDriveUrl': 'https://drive.google.com/test',\n", + " 'hflaWebsiteUrl': 'afk',\n", + " 'managedByUsers': [ObjectId('670dd397cace6a002abb20ce')],\n", + " 'name': 'VRMS Test Project',\n", + " 'partners': [],\n", + " 'projectStatus': 'Active',\n", + " 'recruitingCategories': [],\n", + " 'slackUrl': 'afk'}\n", + "Result: BulkWriteResult({'writeErrors': [], 'writeConcernErrors': [], 'nInserted': 0, 'nUpserted': 0, 'nMatched': 2, 'nModified': 0, 'nRemoved': 0, 'upserted': []}, acknowledged=True)\n" + ] + } + ], + "source": [ + "operations = []\n", + "\n", + "for proj_id, user_ids in projects_users.items():\n", + " valid_user_ids = [uid for uid in user_ids if ObjectId.is_valid(uid)] \n", + "\n", + " proj = projects_copy.find_one({\"_id\": ObjectId(proj_id)})\n", + "\n", + " if proj:\n", + " print('Project before update:')\n", + " pp.pprint(proj)\n", + " \n", + " # Compile individual updates in operations \n", + " operations.append(UpdateOne(\n", + " {\"_id\": ObjectId(proj_id)}, # Filter\n", + " {\"$set\": {\"managedByUsers\": valid_user_ids}}, # Update\n", + " ))\n", + " else:\n", + " print(f\"No project with {proj_id} found\")\n", + "\n", + "# Execute the bulk write to update operations\n", + "result = projects_copy.bulk_write(operations)\n", + "\n", + "print(f\"Result: \", result)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/backend/scripts/python/env/README.md b/backend/scripts/python/env/README.md index 83fb65def..76ad4feda 100644 --- a/backend/scripts/python/env/README.md +++ b/backend/scripts/python/env/README.md @@ -22,6 +22,12 @@ From within the `scripts` directory, run the following command to create a virtu python -m venv . ``` +or + +``` +python3 -m venv venv +``` + This will create a virtual environment within the current directory. ### 2. Activate the Virtual Environment @@ -29,13 +35,19 @@ This will create a virtual environment within the current directory. Once the virtual environment is created, you'll need to activate it. - On **Windows**, run: - + ``` .\Scripts\activate ``` + If using **Ubuntu**, run: + + ``` + source venv/bin/activate + ``` + - On **MacOS/Linux**, run: - + ``` source bin/activate ``` @@ -66,4 +78,4 @@ The `.gitignore` file in this directory is set to ignore all files, including th git add -f .\backend\scripts\python\env\your-file.file ``` -where -f forces git to add and begin tracking that file. \ No newline at end of file +where -f forces git to add and begin tracking that file.