Skip to content

Commit c7d488b

Browse files
feat: update github api with Enterprise endpoint support (#239)
Co-authored-by: carbarcha <[email protected]>
1 parent 7ed5249 commit c7d488b

File tree

2 files changed

+51
-13
lines changed

2 files changed

+51
-13
lines changed

api/data_pipeline.py

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,8 @@ def download_repo(repo_url: str, local_path: str, type: str = "github", access_t
9292
parsed = urlparse(repo_url)
9393
# Determine the repository type and format the URL accordingly
9494
if type == "github":
95-
# Format: https://{token}@github.com/owner/repo.git
95+
# Format: https://{token}@{domain}/owner/repo.git
96+
# Works for both github.com and enterprise GitHub domains
9697
clone_url = urlunparse((parsed.scheme, f"{access_token}@{parsed.netloc}", parsed.path, '', '', ''))
9798
elif type == "gitlab":
9899
# Format: https://oauth2:{token}@gitlab.com/owner/repo.git
@@ -414,9 +415,11 @@ def transform_documents_and_save_to_db(
414415
def get_github_file_content(repo_url: str, file_path: str, access_token: str = None) -> str:
415416
"""
416417
Retrieves the content of a file from a GitHub repository using the GitHub API.
417-
418+
Supports both public GitHub (github.com) and GitHub Enterprise (custom domains).
419+
418420
Args:
419-
repo_url (str): The URL of the GitHub repository (e.g., "https://github.com/username/repo")
421+
repo_url (str): The URL of the GitHub repository
422+
(e.g., "https://github.com/username/repo" or "https://github.company.com/username/repo")
420423
file_path (str): The path to the file within the repository (e.g., "src/main.py")
421424
access_token (str, optional): GitHub personal access token for private repositories
422425
@@ -427,20 +430,30 @@ def get_github_file_content(repo_url: str, file_path: str, access_token: str = N
427430
ValueError: If the file cannot be fetched or if the URL is not a valid GitHub URL
428431
"""
429432
try:
430-
# Extract owner and repo name from GitHub URL
431-
if not (repo_url.startswith("https://github.com/") or repo_url.startswith("http://github.com/")):
433+
# Parse the repository URL to support both github.com and enterprise GitHub
434+
parsed_url = urlparse(repo_url)
435+
if not parsed_url.scheme or not parsed_url.netloc:
432436
raise ValueError("Not a valid GitHub repository URL")
433437

434-
parts = repo_url.rstrip('/').split('/')
435-
if len(parts) < 5:
436-
raise ValueError("Invalid GitHub URL format")
438+
# Check if it's a GitHub-like URL structure
439+
path_parts = parsed_url.path.strip('/').split('/')
440+
if len(path_parts) < 2:
441+
raise ValueError("Invalid GitHub URL format - expected format: https://domain/owner/repo")
437442

438-
owner = parts[-2]
439-
repo = parts[-1].replace(".git", "")
443+
owner = path_parts[-2]
444+
repo = path_parts[-1].replace(".git", "")
440445

446+
# Determine the API base URL
447+
if parsed_url.netloc == "github.com":
448+
# Public GitHub
449+
api_base = "https://api.github.com"
450+
else:
451+
# GitHub Enterprise - API is typically at https://domain/api/v3/
452+
api_base = f"{parsed_url.scheme}://{parsed_url.netloc}/api/v3"
453+
441454
# Use GitHub API to get file content
442455
# The API endpoint for getting file content is: /repos/{owner}/{repo}/contents/{path}
443-
api_url = f"https://api.github.com/repos/{owner}/{repo}/contents/{file_path}"
456+
api_url = f"{api_base}/repos/{owner}/{repo}/contents/{file_path}"
444457

445458
# Fetch file content from GitHub API
446459
headers = {}

src/app/[owner]/[repo]/page.tsx

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1120,8 +1120,33 @@ IMPORTANT:
11201120
let treeData = null;
11211121
let apiErrorDetails = '';
11221122

1123+
// Determine the GitHub API base URL based on the repository URL
1124+
const getGithubApiUrl = (repoUrl: string | null): string => {
1125+
if (!repoUrl) {
1126+
return 'https://api.github.com'; // Default to public GitHub
1127+
}
1128+
1129+
try {
1130+
const url = new URL(repoUrl);
1131+
const hostname = url.hostname;
1132+
1133+
// If it's the public GitHub, use the standard API URL
1134+
if (hostname === 'github.com') {
1135+
return 'https://api.github.com';
1136+
}
1137+
1138+
// For GitHub Enterprise, use the enterprise API URL format
1139+
// GitHub Enterprise API URL format: https://github.company.com/api/v3
1140+
return `${url.protocol}//${hostname}/api/v3`;
1141+
} catch {
1142+
return 'https://api.github.com'; // Fallback to public GitHub if URL parsing fails
1143+
}
1144+
};
1145+
1146+
const githubApiBaseUrl = getGithubApiUrl(effectiveRepoInfo.repoUrl);
1147+
11231148
for (const branch of ['main', 'master']) {
1124-
const apiUrl = `https://api.github.com/repos/${owner}/${repo}/git/trees/${branch}?recursive=1`;
1149+
const apiUrl = `${githubApiBaseUrl}/repos/${owner}/${repo}/git/trees/${branch}?recursive=1`;
11251150
const headers = createGithubHeaders(currentToken);
11261151

11271152
console.log(`Fetching repository structure from branch: ${branch}`);
@@ -1162,7 +1187,7 @@ IMPORTANT:
11621187
try {
11631188
const headers = createGithubHeaders(currentToken);
11641189

1165-
const readmeResponse = await fetch(`https://api.github.com/repos/${owner}/${repo}/readme`, {
1190+
const readmeResponse = await fetch(`${githubApiBaseUrl}/repos/${owner}/${repo}/readme`, {
11661191
headers
11671192
});
11681193

0 commit comments

Comments
 (0)