Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 15 additions & 10 deletions backup_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from pathlib import Path
from github import Github
from models import db, BackupJob
from urllib.parse import urlparse

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -179,22 +180,26 @@ def _get_file_size(self, path):
def verify_github_access(self, repo_url, github_token=None):
"""Verify if we can access a GitHub repository"""
try:
# Extract owner and repo name from URL
if 'github.com/' in repo_url:
parts = repo_url.split('github.com/')[-1].split('/')
if len(parts) >= 2:
owner = parts[0]
repo_name = parts[1].replace('.git', '')

# Parse the URL and check the hostname
parsed = urlparse(repo_url)
if parsed.hostname and parsed.hostname.lower() == "github.com":
# Path is of the form /owner/repo(.git)? or /owner/repo/
Copy link

Copilot AI Aug 14, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The hostname check should also validate the scheme to ensure it's HTTPS. URLs with 'http://' scheme or no scheme could be security risks. Consider adding parsed.scheme == 'https' to the condition.

Suggested change
# Path is of the form /owner/repo(.git)? or /owner/repo/
if parsed.scheme == 'https' and parsed.hostname and parsed.hostname.lower() == "github.com":
# Path is of the form /owner/repo(.git)? or /owner/repo/

Copilot uses AI. Check for mistakes.
path_parts = parsed.path.strip("/").split("/")
if len(path_parts) >= 2:
Copy link

Copilot AI Aug 14, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The path validation should be more strict. Currently it accepts paths with more than 2 parts (e.g., '/owner/repo/extra/parts'), which may not be valid GitHub repository URLs. Consider checking for exactly 2 parts or validating that additional parts are acceptable (like '/tree/branch').

Copilot uses AI. Check for mistakes.
owner = path_parts[0]
Copy link

Copilot AI Aug 14, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The owner and repository name should be validated to ensure they contain only valid characters. GitHub usernames and repository names have specific character restrictions (alphanumeric, hyphens, underscores, periods). Consider adding validation to prevent injection attacks.

Copilot uses AI. Check for mistakes.
repo_name = path_parts[1]
if repo_name.endswith('.git'):
repo_name = repo_name[:-4]

if github_token:
g = Github(github_token)
else:
g = Github() # Anonymous access for public repos

repo = g.get_repo(f"{owner}/{repo_name}")
return True, f"Repository access verified: {repo.full_name}"

return False, "Invalid GitHub repository URL"

except Exception as e:
return False, f"Repository access failed: {str(e)}"