-
Notifications
You must be signed in to change notification settings - Fork 24
Bug Fix: Inconsistent Field Naming - avatar vs imageUrl #89
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
c7c8ec2
10f0d3a
119d7f8
62433da
5412422
d28f15e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,81 @@ | ||||||||||||||||||||||||
| """ | ||||||||||||||||||||||||
| Database backup script for Splitwiser. | ||||||||||||||||||||||||
| Creates a backup of all collections before performing migrations. | ||||||||||||||||||||||||
| """ | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| import json | ||||||||||||||||||||||||
| import os | ||||||||||||||||||||||||
| from datetime import datetime | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| from dotenv import load_dotenv | ||||||||||||||||||||||||
| from pymongo import MongoClient | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| # Get the script's directory and backend directory | ||||||||||||||||||||||||
| SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) | ||||||||||||||||||||||||
| BACKEND_DIR = os.path.dirname(SCRIPT_DIR) | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| # Load environment variables from .env file in backend directory | ||||||||||||||||||||||||
| load_dotenv(os.path.join(BACKEND_DIR, ".env")) | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| # Get MongoDB connection details from environment | ||||||||||||||||||||||||
| MONGODB_URL = os.getenv("MONGODB_URL") | ||||||||||||||||||||||||
| DATABASE_NAME = os.getenv("DATABASE_NAME") | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| def create_backup(): | ||||||||||||||||||||||||
| """Create a backup of all collections.""" | ||||||||||||||||||||||||
| try: | ||||||||||||||||||||||||
| # Create backup directory if it doesn't exist | ||||||||||||||||||||||||
| backup_dir = "backups" | ||||||||||||||||||||||||
| backup_time = datetime.now().strftime("%Y%m%d_%H%M%S") | ||||||||||||||||||||||||
| backup_path = os.path.join(backup_dir, f"backup_{backup_time}") | ||||||||||||||||||||||||
| os.makedirs(backup_path, exist_ok=True) | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| # Connect to MongoDB | ||||||||||||||||||||||||
| client = MongoClient(MONGODB_URL) | ||||||||||||||||||||||||
| db = client[DATABASE_NAME] | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| # Get all collections | ||||||||||||||||||||||||
| collections = db.list_collection_names() | ||||||||||||||||||||||||
| backup_stats = {} | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| for collection_name in collections: | ||||||||||||||||||||||||
| collection = db[collection_name] | ||||||||||||||||||||||||
| documents = list(collection.find({})) | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| # Convert ObjectId to string for JSON serialization | ||||||||||||||||||||||||
| for doc in documents: | ||||||||||||||||||||||||
| doc["_id"] = str(doc["_id"]) | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
|
Comment on lines
+46
to
+49
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🛠️ Refactor suggestion Potential memory issue with large collections. Using Consider using pagination or streaming approach: - documents = list(collection.find({}))
-
- # Convert ObjectId to string for JSON serialization
- for doc in documents:
- doc["_id"] = str(doc["_id"])
+ documents = []
+ batch_size = 1000
+ cursor = collection.find({}).batch_size(batch_size)
+ async for doc in cursor:
+ doc["_id"] = str(doc["_id"])
+ documents.append(doc)
🤖 Prompt for AI Agents |
||||||||||||||||||||||||
| # Save to file | ||||||||||||||||||||||||
| backup_file = os.path.join(backup_path, f"{collection_name}.json") | ||||||||||||||||||||||||
| with open(backup_file, "w") as f: | ||||||||||||||||||||||||
| json.dump(documents, f, indent=2, default=str) | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| backup_stats[collection_name] = len(documents) | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| # Save backup metadata | ||||||||||||||||||||||||
| metadata = { | ||||||||||||||||||||||||
| "timestamp": datetime.now().isoformat(), | ||||||||||||||||||||||||
| "database": DATABASE_NAME, | ||||||||||||||||||||||||
| "collections": backup_stats, | ||||||||||||||||||||||||
| "total_documents": sum(backup_stats.values()), | ||||||||||||||||||||||||
| } | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| with open(os.path.join(backup_path, "backup_metadata.json"), "w") as f: | ||||||||||||||||||||||||
| json.dump(metadata, f, indent=2) | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| return backup_path, metadata | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| except Exception as e: | ||||||||||||||||||||||||
| print(f"Backup failed: {str(e)}") | ||||||||||||||||||||||||
| raise | ||||||||||||||||||||||||
|
Comment on lines
+70
to
+72
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🛠️ Refactor suggestion Enhance error handling and cleanup. The error handling prints but doesn't clean up partial backup files on failure. except Exception as e:
print(f"Backup failed: {str(e)}")
+ # Clean up partial backup on failure
+ if 'backup_path' in locals() and os.path.exists(backup_path):
+ import shutil
+ shutil.rmtree(backup_path, ignore_errors=True)
+ print(f"Cleaned up partial backup at: {backup_path}")
raise📝 Committable suggestion
Suggested change
🤖 Prompt for AI Agents |
||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| if __name__ == "__main__": | ||||||||||||||||||||||||
| backup_path, metadata = create_backup() | ||||||||||||||||||||||||
| print(f"Backup created successfully at: {backup_path}") | ||||||||||||||||||||||||
| print("\nBackup statistics:") | ||||||||||||||||||||||||
| print(f"Total documents: {metadata['total_documents']}") | ||||||||||||||||||||||||
| for coll, count in metadata["collections"].items(): | ||||||||||||||||||||||||
| print(f"{coll}: {count} documents") | ||||||||||||||||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,167 @@ | ||
| """ | ||
| Migration script to standardize user avatar fields to imageUrl. | ||
| This script: | ||
| 1. Identifies users with avatar field but no imageUrl field | ||
| 2. Copies avatar values to imageUrl field | ||
| 3. Removes the deprecated avatar field | ||
| 4. Logs migration statistics | ||
| """ | ||
|
|
||
| import json | ||
| import logging | ||
| import os | ||
| import sys | ||
| from datetime import datetime | ||
|
|
||
| from backup_db import create_backup | ||
| from bson import ObjectId | ||
| from dotenv import load_dotenv | ||
| from pymongo import MongoClient, UpdateOne | ||
|
|
||
| # Add the script's directory to Python path | ||
| SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) | ||
| sys.path.append(SCRIPT_DIR) | ||
|
|
||
|
|
||
| # Load environment variables from the backend directory | ||
| BACKEND_DIR = os.path.dirname(SCRIPT_DIR) | ||
| load_dotenv(os.path.join(BACKEND_DIR, ".env")) | ||
|
|
||
| # Get MongoDB connection details from environment | ||
| MONGODB_URL = os.getenv("MONGODB_URL") | ||
| DATABASE_NAME = os.getenv("DATABASE_NAME") | ||
coderabbitai[bot] marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| # Configure logging | ||
| logging.basicConfig(level=logging.INFO) | ||
| logger = logging.getLogger(__name__) | ||
|
|
||
| # Set up file logging | ||
| log_dir = "logs" | ||
| os.makedirs(log_dir, exist_ok=True) | ||
| log_file = os.path.join( | ||
| log_dir, f"migration_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log" | ||
| ) | ||
| file_handler = logging.FileHandler(log_file) | ||
| file_handler.setFormatter( | ||
| logging.Formatter("%(asctime)s - %(levelname)s - %(message)s") | ||
| ) | ||
| logger.addHandler(file_handler) | ||
|
|
||
| # Validate required environment variables | ||
| if not MONGODB_URL: | ||
| logger.error("MONGODB_URL environment variable is required") | ||
| sys.exit(1) | ||
| if not DATABASE_NAME: | ||
| logger.error("DATABASE_NAME environment variable is required") | ||
| sys.exit(1) | ||
|
|
||
|
|
||
| def migrate_avatar_to_imageurl(): | ||
| """ | ||
| Migrate avatar field to imageUrl in users collection. | ||
| Returns statistics about the migration. | ||
| """ | ||
| try: | ||
| # First create a backup | ||
| logger.info("Creating database backup...") | ||
| backup_path, backup_metadata = create_backup() | ||
| logger.info(f"Backup created at: {backup_path}") | ||
|
|
||
| # Connect to MongoDB | ||
| client = MongoClient(MONGODB_URL) | ||
| db = client[DATABASE_NAME] | ||
| users = db.users | ||
|
|
||
| # Find users with avatar field | ||
| users_with_avatar = users.find({"avatar": {"$exists": True}}) | ||
| users_to_update = [] | ||
| stats = { | ||
| "total_users": users.count_documents({}), | ||
| "users_with_avatar": 0, | ||
| "users_with_both_fields": 0, | ||
| "users_updated": 0, | ||
| "conflicts": 0, | ||
| } | ||
|
|
||
| for user in users_with_avatar: | ||
| stats["users_with_avatar"] += 1 | ||
|
|
||
| # Check for conflicts (users with both fields) | ||
| if "imageUrl" in user and user["imageUrl"] is not None: | ||
| if user["imageUrl"] != user["avatar"]: | ||
| logger.warning( | ||
| f"Conflict found for user {user['_id']}: " | ||
| f"avatar='{user['avatar']}', imageUrl='{user['imageUrl']}'" | ||
| ) | ||
| stats["conflicts"] += 1 | ||
| continue | ||
| stats["users_with_both_fields"] += 1 | ||
|
|
||
| # Prepare update | ||
| users_to_update.append( | ||
| UpdateOne( | ||
| {"_id": user["_id"]}, | ||
| {"$set": {"imageUrl": user["avatar"]}, "$unset": { | ||
| "avatar": ""}}, | ||
| ) | ||
| ) | ||
|
|
||
| # Perform bulk update if there are users to update | ||
| if users_to_update: | ||
| result = users.bulk_write(users_to_update) | ||
| stats["users_updated"] = result.modified_count | ||
| logger.info(f"Successfully updated {result.modified_count} users") | ||
|
|
||
| return stats | ||
|
|
||
| except Exception as e: | ||
| logger.error(f"Migration failed: {str(e)}") | ||
| raise | ||
|
|
||
|
|
||
| def rollback_migration(backup_path): | ||
| """ | ||
| Rollback the migration using a specified backup. | ||
| """ | ||
| try: | ||
| client = MongoClient(MONGODB_URL) | ||
| db = client[DATABASE_NAME] | ||
|
|
||
| backup_file_path = os.path.join(backup_path, "users.json") | ||
| if not os.path.exists(backup_file_path): | ||
| raise FileNotFoundError( | ||
| f"Backup file not found: {backup_file_path}") | ||
|
|
||
| # Read users collection backup | ||
| with open(backup_file_path, "r") as f: | ||
| users_backup = json.load(f) | ||
|
|
||
| # Convert string IDs back to ObjectId | ||
| for user in users_backup: | ||
| user["_id"] = ObjectId(user["_id"]) | ||
|
|
||
| # Replace current users collection with backup | ||
| db.users.drop() | ||
| if users_backup: | ||
| db.users.insert_many(users_backup) | ||
|
|
||
| logger.info(f"Successfully rolled back to backup: {backup_path}") | ||
| return True | ||
|
|
||
| except Exception as e: | ||
| logger.error(f"Rollback failed: {str(e)}") | ||
| raise | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| logger.info("Starting avatar to imageUrl migration...") | ||
| stats = migrate_avatar_to_imageurl() | ||
|
|
||
| logger.info("\nMigration completed. Statistics:") | ||
| logger.info(f"Total users: {stats['total_users']}") | ||
| logger.info(f"Users with avatar field: {stats['users_with_avatar']}") | ||
| logger.info(f"Users with both fields: {stats['users_with_both_fields']}") | ||
| logger.info(f"Users updated: {stats['users_updated']}") | ||
| logger.info(f"Conflicts found: {stats['conflicts']}") | ||
|
|
||
| print("\nMigration completed. Check the log file for details:", log_file) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
💡 Verification agent
🧩 Analysis chain
Verify MongoDB connection environment variables.
The script relies on
MONGODB_URLandDATABASE_NAMEenvironment variables but doesn't validate their presence before proceeding.🏁 Script executed:
Length of output: 48
🏁 Script executed:
Length of output: 2618
Add validation for required environment variables
The script loads
MONGODB_URLandDATABASE_NAMEbut will fail with a less‐helpful error if they aren’t set. Before using them, explicitly check and raise an informative exception.• File:
backend/scripts/backup_db.py• Around lines 17–22 (after loading env vars)
Suggested change:
load_dotenv(os.path.join(BACKEND_DIR, ".env")) # Get MongoDB connection details from environment MONGODB_URL = os.getenv("MONGODB_URL") DATABASE_NAME = os.getenv("DATABASE_NAME") +if not MONGODB_URL or not DATABASE_NAME: + missing = [] + if not MONGODB_URL: + missing.append("MONGODB_URL") + if not DATABASE_NAME: + missing.append("DATABASE_NAME") + raise EnvironmentError( + f"Missing required environment variable(s): {', '.join(missing)}" + )This ensures the script fails fast with a clear message when required configuration is absent.
🤖 Prompt for AI Agents