@@ -92,7 +92,8 @@ def download_repo(repo_url: str, local_path: str, type: str = "github", access_t
92
92
parsed = urlparse (repo_url )
93
93
# Determine the repository type and format the URL accordingly
94
94
if type == "github" :
95
- # Format: https://{token}@github.com/owner/repo.git
95
+ # Format: https://{token}@{domain}/owner/repo.git
96
+ # Works for both github.com and enterprise GitHub domains
96
97
clone_url = urlunparse ((parsed .scheme , f"{ access_token } @{ parsed .netloc } " , parsed .path , '' , '' , '' ))
97
98
elif type == "gitlab" :
98
99
# Format: https://oauth2:{token}@gitlab.com/owner/repo.git
@@ -414,9 +415,11 @@ def transform_documents_and_save_to_db(
414
415
def get_github_file_content (repo_url : str , file_path : str , access_token : str = None ) -> str :
415
416
"""
416
417
Retrieves the content of a file from a GitHub repository using the GitHub API.
417
-
418
+ Supports both public GitHub (github.com) and GitHub Enterprise (custom domains).
419
+
418
420
Args:
419
- repo_url (str): The URL of the GitHub repository (e.g., "https://github.com/username/repo")
421
+ repo_url (str): The URL of the GitHub repository
422
+ (e.g., "https://github.com/username/repo" or "https://github.company.com/username/repo")
420
423
file_path (str): The path to the file within the repository (e.g., "src/main.py")
421
424
access_token (str, optional): GitHub personal access token for private repositories
422
425
@@ -427,20 +430,30 @@ def get_github_file_content(repo_url: str, file_path: str, access_token: str = N
427
430
ValueError: If the file cannot be fetched or if the URL is not a valid GitHub URL
428
431
"""
429
432
try :
430
- # Extract owner and repo name from GitHub URL
431
- if not (repo_url .startswith ("https://github.com/" ) or repo_url .startswith ("http://github.com/" )):
433
+ # Parse the repository URL to support both github.com and enterprise GitHub
434
+ parsed_url = urlparse (repo_url )
435
+ if not parsed_url .scheme or not parsed_url .netloc :
432
436
raise ValueError ("Not a valid GitHub repository URL" )
433
437
434
- parts = repo_url .rstrip ('/' ).split ('/' )
435
- if len (parts ) < 5 :
436
- raise ValueError ("Invalid GitHub URL format" )
438
+ # Check if it's a GitHub-like URL structure
439
+ path_parts = parsed_url .path .strip ('/' ).split ('/' )
440
+ if len (path_parts ) < 2 :
441
+ raise ValueError ("Invalid GitHub URL format - expected format: https://domain/owner/repo" )
437
442
438
- owner = parts [- 2 ]
439
- repo = parts [- 1 ].replace (".git" , "" )
443
+ owner = path_parts [- 2 ]
444
+ repo = path_parts [- 1 ].replace (".git" , "" )
440
445
446
+ # Determine the API base URL
447
+ if parsed_url .netloc == "github.com" :
448
+ # Public GitHub
449
+ api_base = "https://api.github.com"
450
+ else :
451
+ # GitHub Enterprise - API is typically at https://domain/api/v3/
452
+ api_base = f"{ parsed_url .scheme } ://{ parsed_url .netloc } /api/v3"
453
+
441
454
# Use GitHub API to get file content
442
455
# The API endpoint for getting file content is: /repos/{owner}/{repo}/contents/{path}
443
- api_url = f"https://api.github.com /repos/{ owner } /{ repo } /contents/{ file_path } "
456
+ api_url = f"{ api_base } /repos/{ owner } /{ repo } /contents/{ file_path } "
444
457
445
458
# Fetch file content from GitHub API
446
459
headers = {}
0 commit comments