Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion backend/app/auth/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ class UserResponse(BaseModel):
id: str = Field(alias="_id")
email: str
name: str
avatar: Optional[str] = None
imageUrl: Optional[str] = None
currency: str = "USD"
created_at: datetime

Expand Down
8 changes: 4 additions & 4 deletions backend/app/auth/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ async def create_user_with_email(
"email": email,
"hashed_password": get_password_hash(password),
"name": name,
"avatar": None,
"imageUrl": None,
"currency": "USD",
"created_at": datetime.now(timezone.utc),
"auth_provider": "email",
Expand Down Expand Up @@ -202,8 +202,8 @@ async def authenticate_with_google(self, id_token: str) -> Dict[str, Any]:
update_data = {}
if user.get("firebase_uid") != firebase_uid:
update_data["firebase_uid"] = firebase_uid
if user.get("avatar") != picture and picture:
update_data["avatar"] = picture
if user.get("imageUrl") != picture and picture:
update_data["imageUrl"] = picture

if update_data:
await db.users.update_one(
Expand All @@ -215,7 +215,7 @@ async def authenticate_with_google(self, id_token: str) -> Dict[str, Any]:
user_doc = {
"email": email,
"name": name,
"avatar": picture,
"imageUrl": picture,
"currency": "USD",
"created_at": datetime.now(timezone.utc),
"auth_provider": "google",
Expand Down
10 changes: 3 additions & 7 deletions backend/app/groups/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,17 +54,13 @@ async def _enrich_members_with_user_details(
if user
else f"{member_user_id}@example.com"
),
"avatar": (
user.get("imageUrl") or user.get("avatar")
if user
else None
),
"imageUrl": (user.get("imageUrl") if user else None),
}
if user
else {
"name": f"User {member_user_id[-4:]}",
"email": f"{member_user_id}@example.com",
"avatar": None,
"imageUrl": None,
}
),
}
Expand All @@ -79,7 +75,7 @@ async def _enrich_members_with_user_details(
"user": {
"name": f"User {member_user_id[-4:]}",
"email": f"{member_user_id}@example.com",
"avatar": None,
"imageUrl": None,
},
}
)
Expand Down
2 changes: 1 addition & 1 deletion backend/app/user/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def iso(dt):
"id": user_id,
"name": user.get("name"),
"email": user.get("email"),
"imageUrl": user.get("imageUrl") or user.get("avatar"),
"imageUrl": user.get("imageUrl"),
"currency": user.get("currency", "USD"),
"createdAt": iso(user.get("created_at")),
"updatedAt": iso(user.get("updated_at") or user.get("created_at")),
Expand Down
81 changes: 81 additions & 0 deletions backend/scripts/backup_db.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
"""
Database backup script for Splitwiser.
Creates a backup of all collections before performing migrations.
"""

import json
import os
from datetime import datetime

from dotenv import load_dotenv
from pymongo import MongoClient

# Get the script's directory and backend directory
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
BACKEND_DIR = os.path.dirname(SCRIPT_DIR)

# Load environment variables from .env file in backend directory
load_dotenv(os.path.join(BACKEND_DIR, ".env"))

# Get MongoDB connection details from environment
MONGODB_URL = os.getenv("MONGODB_URL")
DATABASE_NAME = os.getenv("DATABASE_NAME")
Comment on lines +21 to +22
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

💡 Verification agent

🧩 Analysis chain

Verify MongoDB connection environment variables.

The script relies on MONGODB_URL and DATABASE_NAME environment variables but doesn't validate their presence before proceeding.


🏁 Script executed:

#!/bin/bash
# Check if the environment variables are properly handled in other parts of the codebase
rg -A 3 "MONGODB_URL.*DATABASE_NAME" --type py

Length of output: 48


🏁 Script executed:

#!/bin/bash
# Display the first 100 lines of the backup_db.py script to check env var handling
head -n 100 backend/scripts/backup_db.py

Length of output: 2618


Add validation for required environment variables

The script loads MONGODB_URL and DATABASE_NAME but will fail with a less‐helpful error if they aren’t set. Before using them, explicitly check and raise an informative exception.

• File: backend/scripts/backup_db.py
• Around lines 17–22 (after loading env vars)

Suggested change:

 load_dotenv(os.path.join(BACKEND_DIR, ".env"))

 # Get MongoDB connection details from environment
 MONGODB_URL = os.getenv("MONGODB_URL")
 DATABASE_NAME = os.getenv("DATABASE_NAME")

+if not MONGODB_URL or not DATABASE_NAME:
+    missing = []
+    if not MONGODB_URL:
+        missing.append("MONGODB_URL")
+    if not DATABASE_NAME:
+        missing.append("DATABASE_NAME")
+    raise EnvironmentError(
+        f"Missing required environment variable(s): {', '.join(missing)}"
+    )

This ensures the script fails fast with a clear message when required configuration is absent.

🤖 Prompt for AI Agents
In backend/scripts/backup_db.py around lines 17 to 22, the environment variables
MONGODB_URL and DATABASE_NAME are loaded but not validated. Add explicit checks
after loading these variables to verify they are not None or empty, and if they
are missing, raise a clear and informative exception indicating which variable
is required. This will ensure the script fails fast with a helpful error message
when the configuration is absent.



def create_backup():
"""Create a backup of all collections."""
try:
# Create backup directory if it doesn't exist
backup_dir = "backups"
backup_time = datetime.now().strftime("%Y%m%d_%H%M%S")
backup_path = os.path.join(backup_dir, f"backup_{backup_time}")
os.makedirs(backup_path, exist_ok=True)

# Connect to MongoDB
client = MongoClient(MONGODB_URL)
db = client[DATABASE_NAME]

# Get all collections
collections = db.list_collection_names()
backup_stats = {}

for collection_name in collections:
collection = db[collection_name]
documents = list(collection.find({}))

# Convert ObjectId to string for JSON serialization
for doc in documents:
doc["_id"] = str(doc["_id"])

Comment on lines +46 to +49
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Potential memory issue with large collections.

Using list(collection.find({})) loads all documents into memory at once, which could cause memory issues for large collections.

Consider using pagination or streaming approach:

-            documents = list(collection.find({}))
-
-            # Convert ObjectId to string for JSON serialization
-            for doc in documents:
-                doc["_id"] = str(doc["_id"])
+            documents = []
+            batch_size = 1000
+            cursor = collection.find({}).batch_size(batch_size)
+            async for doc in cursor:
+                doc["_id"] = str(doc["_id"])
+                documents.append(doc)

Committable suggestion skipped: line range outside the PR's diff.

🤖 Prompt for AI Agents
In backend/scripts/backup_db.py around lines 46 to 49, loading all documents
from the collection into memory at once using list(collection.find({})) can
cause memory issues with large collections. Modify the code to iterate over the
cursor returned by collection.find({}) directly without converting it to a list,
processing each document one by one to avoid loading everything into memory
simultaneously. This streaming approach will reduce memory usage and improve
scalability.

# Save to file
backup_file = os.path.join(backup_path, f"{collection_name}.json")
with open(backup_file, "w") as f:
json.dump(documents, f, indent=2, default=str)

backup_stats[collection_name] = len(documents)

# Save backup metadata
metadata = {
"timestamp": datetime.now().isoformat(),
"database": DATABASE_NAME,
"collections": backup_stats,
"total_documents": sum(backup_stats.values()),
}

with open(os.path.join(backup_path, "backup_metadata.json"), "w") as f:
json.dump(metadata, f, indent=2)

return backup_path, metadata

except Exception as e:
print(f"Backup failed: {str(e)}")
raise
Comment on lines +70 to +72
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Enhance error handling and cleanup.

The error handling prints but doesn't clean up partial backup files on failure.

    except Exception as e:
        print(f"Backup failed: {str(e)}")
+        # Clean up partial backup on failure
+        if 'backup_path' in locals() and os.path.exists(backup_path):
+            import shutil
+            shutil.rmtree(backup_path, ignore_errors=True)
+            print(f"Cleaned up partial backup at: {backup_path}")
        raise
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
except Exception as e:
print(f"Backup failed: {str(e)}")
raise
except Exception as e:
print(f"Backup failed: {str(e)}")
# Clean up partial backup on failure
if 'backup_path' in locals() and os.path.exists(backup_path):
import shutil
shutil.rmtree(backup_path, ignore_errors=True)
print(f"Cleaned up partial backup at: {backup_path}")
raise
🤖 Prompt for AI Agents
In backend/scripts/backup_db.py around lines 70 to 72, the current error
handling prints the exception but does not remove any partial backup files
created before the failure. Modify the except block to include cleanup logic
that deletes any incomplete backup files to avoid leaving corrupted or partial
backups on disk. Ensure the cleanup runs before re-raising the exception.



if __name__ == "__main__":
backup_path, metadata = create_backup()
print(f"Backup created successfully at: {backup_path}")
print("\nBackup statistics:")
print(f"Total documents: {metadata['total_documents']}")
for coll, count in metadata["collections"].items():
print(f"{coll}: {count} documents")
167 changes: 167 additions & 0 deletions backend/scripts/migrate_avatar_to_imageurl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
"""
Migration script to standardize user avatar fields to imageUrl.
This script:
1. Identifies users with avatar field but no imageUrl field
2. Copies avatar values to imageUrl field
3. Removes the deprecated avatar field
4. Logs migration statistics
"""

import json
import logging
import os
import sys
from datetime import datetime

from backup_db import create_backup
from bson import ObjectId
from dotenv import load_dotenv
from pymongo import MongoClient, UpdateOne

# Add the script's directory to Python path
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(SCRIPT_DIR)


# Load environment variables from the backend directory
BACKEND_DIR = os.path.dirname(SCRIPT_DIR)
load_dotenv(os.path.join(BACKEND_DIR, ".env"))

# Get MongoDB connection details from environment
MONGODB_URL = os.getenv("MONGODB_URL")
DATABASE_NAME = os.getenv("DATABASE_NAME")

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Set up file logging
log_dir = "logs"
os.makedirs(log_dir, exist_ok=True)
log_file = os.path.join(
log_dir, f"migration_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"
)
file_handler = logging.FileHandler(log_file)
file_handler.setFormatter(
logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
)
logger.addHandler(file_handler)

# Validate required environment variables
if not MONGODB_URL:
logger.error("MONGODB_URL environment variable is required")
sys.exit(1)
if not DATABASE_NAME:
logger.error("DATABASE_NAME environment variable is required")
sys.exit(1)


def migrate_avatar_to_imageurl():
"""
Migrate avatar field to imageUrl in users collection.
Returns statistics about the migration.
"""
try:
# First create a backup
logger.info("Creating database backup...")
backup_path, backup_metadata = create_backup()
logger.info(f"Backup created at: {backup_path}")

# Connect to MongoDB
client = MongoClient(MONGODB_URL)
db = client[DATABASE_NAME]
users = db.users

# Find users with avatar field
users_with_avatar = users.find({"avatar": {"$exists": True}})
users_to_update = []
stats = {
"total_users": users.count_documents({}),
"users_with_avatar": 0,
"users_with_both_fields": 0,
"users_updated": 0,
"conflicts": 0,
}

for user in users_with_avatar:
stats["users_with_avatar"] += 1

# Check for conflicts (users with both fields)
if "imageUrl" in user and user["imageUrl"] is not None:
if user["imageUrl"] != user["avatar"]:
logger.warning(
f"Conflict found for user {user['_id']}: "
f"avatar='{user['avatar']}', imageUrl='{user['imageUrl']}'"
)
stats["conflicts"] += 1
continue
stats["users_with_both_fields"] += 1

# Prepare update
users_to_update.append(
UpdateOne(
{"_id": user["_id"]},
{"$set": {"imageUrl": user["avatar"]}, "$unset": {
"avatar": ""}},
)
)

# Perform bulk update if there are users to update
if users_to_update:
result = users.bulk_write(users_to_update)
stats["users_updated"] = result.modified_count
logger.info(f"Successfully updated {result.modified_count} users")

return stats

except Exception as e:
logger.error(f"Migration failed: {str(e)}")
raise


def rollback_migration(backup_path):
"""
Rollback the migration using a specified backup.
"""
try:
client = MongoClient(MONGODB_URL)
db = client[DATABASE_NAME]

backup_file_path = os.path.join(backup_path, "users.json")
if not os.path.exists(backup_file_path):
raise FileNotFoundError(
f"Backup file not found: {backup_file_path}")

# Read users collection backup
with open(backup_file_path, "r") as f:
users_backup = json.load(f)

# Convert string IDs back to ObjectId
for user in users_backup:
user["_id"] = ObjectId(user["_id"])

# Replace current users collection with backup
db.users.drop()
if users_backup:
db.users.insert_many(users_backup)

logger.info(f"Successfully rolled back to backup: {backup_path}")
return True

except Exception as e:
logger.error(f"Rollback failed: {str(e)}")
raise


if __name__ == "__main__":
logger.info("Starting avatar to imageUrl migration...")
stats = migrate_avatar_to_imageurl()

logger.info("\nMigration completed. Statistics:")
logger.info(f"Total users: {stats['total_users']}")
logger.info(f"Users with avatar field: {stats['users_with_avatar']}")
logger.info(f"Users with both fields: {stats['users_with_both_fields']}")
logger.info(f"Users updated: {stats['users_updated']}")
logger.info(f"Conflicts found: {stats['conflicts']}")

print("\nMigration completed. Check the log file for details:", log_file)
2 changes: 1 addition & 1 deletion backend/tests/auth/test_auth_routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ async def test_login_with_email_success(mock_db):
"email": user_email,
"hashed_password": hashed_password,
"name": "Login User",
"avatar": None,
"imageUrl": None,
"currency": "USD",
# Ensure datetime is used
"created_at": datetime.now(timezone.utc),
Expand Down
Loading