Skip to content

Commit ca14883

Browse files
authored
chore: removing calls to curl, replacing with requests parity (#179)
* removing calls to curl, replacing with requests parity * removing useless check
1 parent ab83620 commit ca14883

File tree

1 file changed

+40
-64
lines changed

1 file changed

+40
-64
lines changed

api/data_pipeline.py

Lines changed: 40 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
from api.config import configs, DEFAULT_EXCLUDED_DIRS, DEFAULT_EXCLUDED_FILES
1515
from api.ollama_patch import OllamaDocumentProcessor
1616
from urllib.parse import urlparse, urlunparse, quote
17+
import requests
18+
from requests.exceptions import RequestException
1719

1820
from api.tools.embedder import get_embedder
1921

@@ -440,21 +442,20 @@ def get_github_file_content(repo_url: str, file_path: str, access_token: str = N
440442
# The API endpoint for getting file content is: /repos/{owner}/{repo}/contents/{path}
441443
api_url = f"https://api.github.com/repos/{owner}/{repo}/contents/{file_path}"
442444

443-
# Prepare curl command with authentication if token is provided
444-
curl_cmd = ["curl", "-s"]
445+
# Fetch file content from GitHub API
446+
headers = {}
445447
if access_token:
446-
curl_cmd.extend(["-H", f"Authorization: token {access_token}"])
447-
curl_cmd.append(api_url)
448-
448+
headers["Authorization"] = f"token {access_token}"
449449
logger.info(f"Fetching file content from GitHub API: {api_url}")
450-
result = subprocess.run(
451-
curl_cmd,
452-
check=True,
453-
stdout=subprocess.PIPE,
454-
stderr=subprocess.PIPE,
455-
)
456-
457-
content_data = json.loads(result.stdout.decode("utf-8"))
450+
try:
451+
response = requests.get(api_url, headers=headers)
452+
response.raise_for_status()
453+
except RequestException as e:
454+
raise ValueError(f"Error fetching file content: {e}")
455+
try:
456+
content_data = response.json()
457+
except json.JSONDecodeError:
458+
raise ValueError("Invalid response from GitHub API")
458459

459460
# Check if we got an error response
460461
if "message" in content_data and "documentation_url" in content_data:
@@ -472,14 +473,6 @@ def get_github_file_content(repo_url: str, file_path: str, access_token: str = N
472473
else:
473474
raise ValueError("File content not found in GitHub API response")
474475

475-
except subprocess.CalledProcessError as e:
476-
error_msg = e.stderr.decode('utf-8')
477-
# Sanitize error message to remove any tokens
478-
if access_token and access_token in error_msg:
479-
error_msg = error_msg.replace(access_token, "***TOKEN***")
480-
raise ValueError(f"Error fetching file content: {error_msg}")
481-
except json.JSONDecodeError:
482-
raise ValueError("Invalid response from GitHub API")
483476
except Exception as e:
484477
raise ValueError(f"Failed to get file content: {str(e)}")
485478

@@ -522,20 +515,17 @@ def get_gitlab_file_content(repo_url: str, file_path: str, access_token: str = N
522515
default_branch = 'main'
523516

524517
api_url = f"{gitlab_domain}/api/v4/projects/{encoded_project_path}/repository/files/{encoded_file_path}/raw?ref={default_branch}"
525-
curl_cmd = ["curl", "-s"]
518+
# Fetch file content from GitLab API
519+
headers = {}
526520
if access_token:
527-
curl_cmd.extend(["-H", f"PRIVATE-TOKEN: {access_token}"])
528-
curl_cmd.append(api_url)
529-
521+
headers["PRIVATE-TOKEN"] = access_token
530522
logger.info(f"Fetching file content from GitLab API: {api_url}")
531-
result = subprocess.run(
532-
curl_cmd,
533-
check=True,
534-
stdout=subprocess.PIPE,
535-
stderr=subprocess.PIPE,
536-
)
537-
538-
content = result.stdout.decode("utf-8")
523+
try:
524+
response = requests.get(api_url, headers=headers)
525+
response.raise_for_status()
526+
content = response.text
527+
except RequestException as e:
528+
raise ValueError(f"Error fetching file content: {e}")
539529

540530
# Check for GitLab error response (JSON instead of raw file)
541531
if content.startswith("{") and '"message":' in content:
@@ -544,17 +534,10 @@ def get_gitlab_file_content(repo_url: str, file_path: str, access_token: str = N
544534
if "message" in error_data:
545535
raise ValueError(f"GitLab API error: {error_data['message']}")
546536
except json.JSONDecodeError:
547-
# If it's not valid JSON, it's probably the file content
548537
pass
549538

550539
return content
551540

552-
except subprocess.CalledProcessError as e:
553-
error_msg = e.stderr.decode('utf-8')
554-
# Sanitize error message to remove any tokens
555-
if access_token and access_token in error_msg:
556-
error_msg = error_msg.replace(access_token, "***TOKEN***")
557-
raise ValueError(f"Error fetching file content: {error_msg}")
558541
except Exception as e:
559542
raise ValueError(f"Failed to get file content: {str(e)}")
560543

@@ -586,37 +569,30 @@ def get_bitbucket_file_content(repo_url: str, file_path: str, access_token: str
586569
# The API endpoint for getting file content is: /2.0/repositories/{owner}/{repo}/src/{branch}/{path}
587570
api_url = f"https://api.bitbucket.org/2.0/repositories/{owner}/{repo}/src/main/{file_path}"
588571

589-
# Prepare curl command with authentication if token is provided
590-
curl_cmd = ["curl", "-s"]
572+
# Fetch file content from Bitbucket API
573+
headers = {}
591574
if access_token:
592-
curl_cmd.extend(["-H", f"Authorization: Bearer {access_token}"])
593-
curl_cmd.append(api_url)
594-
575+
headers["Authorization"] = f"Bearer {access_token}"
595576
logger.info(f"Fetching file content from Bitbucket API: {api_url}")
596-
result = subprocess.run(
597-
curl_cmd,
598-
check=True,
599-
stdout=subprocess.PIPE,
600-
stderr=subprocess.PIPE,
601-
)
602-
603-
# Bitbucket API returns the raw file content directly
604-
content = result.stdout.decode("utf-8")
605-
return content
606-
607-
except subprocess.CalledProcessError as e:
608-
error_msg = e.stderr.decode('utf-8')
609-
if e.returncode == 22: # curl uses 22 to indicate an HTTP error occurred
610-
if "HTTP/1.1 404" in error_msg:
577+
try:
578+
response = requests.get(api_url, headers=headers)
579+
if response.status_code == 200:
580+
content = response.text
581+
elif response.status_code == 404:
611582
raise ValueError("File not found on Bitbucket. Please check the file path and repository.")
612-
elif "HTTP/1.1 401" in error_msg:
583+
elif response.status_code == 401:
613584
raise ValueError("Unauthorized access to Bitbucket. Please check your access token.")
614-
elif "HTTP/1.1 403" in error_msg:
585+
elif response.status_code == 403:
615586
raise ValueError("Forbidden access to Bitbucket. You might not have permission to access this file.")
616-
elif "HTTP/1.1 500" in error_msg:
587+
elif response.status_code == 500:
617588
raise ValueError("Internal server error on Bitbucket. Please try again later.")
618589
else:
619-
raise ValueError(f"Error fetching file content: {error_msg}")
590+
response.raise_for_status()
591+
content = response.text
592+
return content
593+
except RequestException as e:
594+
raise ValueError(f"Error fetching file content: {e}")
595+
620596
except Exception as e:
621597
raise ValueError(f"Failed to get file content: {str(e)}")
622598

0 commit comments

Comments
 (0)