Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# GitHub Personal Access Token
# Used for GitHub API operations: querying repository metadata,
# checking mirror existence, and creating mirror repos in your org.
#
# Required scopes:
# public_repo - for public repos (create public mirrors, read metadata)
# repo - for private repos (all of the above + private repo access)
GITHUB_TOKEN=

# Custom SSH key for private repo operations (optional)
# If not set, the system looks for default keys in ~/.ssh/
# (id_rsa, id_ecdsa, id_ed25519, etc.)
#
# Set this only if your GitHub SSH key is at a non-standard path.
# GITHUB_USER_SSH_KEY=/home/user/.ssh/id_ed25519_github
5 changes: 3 additions & 2 deletions swesmith/build_repo/try_install_py.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,9 +113,10 @@ def main(
base_cwd = os.getcwd()
try:
# Shallow clone repository at the specified commit
p._configure_ssh_env()
if not os.path.exists(p.repo):
subprocess.run(
f"git clone https://github.com/{p.owner}/{p.repo}.git",
f"git clone {p._source_read_url}",
check=True,
shell=True,
stdout=subprocess.DEVNULL,
Expand Down Expand Up @@ -205,7 +206,7 @@ def main(
"\n".join(
[
"#!/bin/bash\n",
f"git clone git@github.com:{p.owner}/{p.repo}.git",
f"git clone {p._source_read_url}",
f"git checkout {p.commit}",
]
+ install_lines
Expand Down
52 changes: 51 additions & 1 deletion swesmith/harness/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import docker
import fnmatch
import os
import threading
import traceback

from concurrent.futures import ThreadPoolExecutor, as_completed
Expand Down Expand Up @@ -35,6 +37,27 @@
from swesmith.profiles import registry
from unidiff import PatchSet

# man ssh_config(5)
_DEFAULT_SSH_KEYS = ["id_rsa", "id_ecdsa", "id_ecdsa_sk", "id_ed25519", "id_ed25519_sk"]


def _find_ssh_key() -> Path | None:
"""Find an SSH private key: explicit env var first, then default paths."""
key_path = os.getenv("GITHUB_USER_SSH_KEY")
if key_path and Path(key_path).exists():
return Path(key_path)

ssh_dir = Path.home() / ".ssh"
for key_name in _DEFAULT_SSH_KEYS:
key_file = ssh_dir / key_name
if key_file.exists():
return key_file

return None


_ssh_copy_lock = threading.Lock()


def matches_instance_filter(instance_id: str, instance_ids: list[str] | None) -> bool:
"""
Expand Down Expand Up @@ -147,10 +170,37 @@ def run_patch_in_container(
)
container.start()

# For private repos, copy SSH key into container
ssh_env = {}
if rp._is_repo_private():
key_file = _find_ssh_key()
if key_file is None:
raise ValueError(
"Repo is private but no SSH key found. "
"Set GITHUB_USER_SSH_KEY or add a key to ~/.ssh/"
)

# Prevent race condition
with _ssh_copy_lock:
copy_to_container(container, key_file, Path("/github_key"))
container.exec_run("chmod 600 /github_key", user=DOCKER_USER)
ssh_env = {
"GIT_SSH_COMMAND": "ssh -i /github_key -o StrictHostKeyChecking=accept-new -o IdentitiesOnly=yes"
}

# If provided, checkout commit in container
if commit is not None:
logger.info(f"Checking out commit {commit}")
container.exec_run("git fetch", workdir=DOCKER_WORKDIR, user=DOCKER_USER)
fetch_val = container.exec_run(
"git fetch",
workdir=DOCKER_WORKDIR,
user=DOCKER_USER,
environment=ssh_env,
)
if fetch_val.exit_code != 0:
logger.info(
f"GIT FETCH FAILED (exit={fetch_val.exit_code}): {fetch_val.output.decode(UTF8)}"
)
val = container.exec_run(
f"git checkout {commit}", workdir=DOCKER_WORKDIR, user=DOCKER_USER
)
Expand Down
130 changes: 111 additions & 19 deletions swesmith/profiles/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,13 @@
"""

import docker
import json
import os
import platform
import re
import shutil
import subprocess
import urllib.request

from abc import ABC, abstractmethod, ABCMeta
from collections import UserDict
Expand Down Expand Up @@ -106,6 +109,9 @@ def pltf(self) -> str:
_cache_test_paths = None
_cache_branches = None
_cache_mirror_exists = None
_cache_repo_private: bool | None = field(
default=None, init=False, repr=False, compare=False
)

### START: Properties, Methods that *do not* require (re-)implementation ###

Expand All @@ -117,6 +123,55 @@ def api(self) -> GhApi:
self._api = GhApi(token=token)
return self._api

def _is_repo_private(self) -> bool:
if self._cache_repo_private is not None:
return self._cache_repo_private
try:
url = f"https://api.github.com/repos/{self.owner}/{self.repo}"
headers = {"User-Agent": "swesmith"}
token = os.getenv("GITHUB_TOKEN")
if token:
headers["Authorization"] = f"token {token}"
req = urllib.request.Request(url, headers=headers)
with urllib.request.urlopen(req) as resp:
data = json.loads(resp.read())
self._cache_repo_private = data.get("private", False)
except Exception:
self._cache_repo_private = True
return self._cache_repo_private

@staticmethod
def _configure_ssh_env():
"""Bake GIT_SSH_COMMAND into os.environ if GITHUB_USER_SSH_KEY is set."""
key_path = os.getenv("GITHUB_USER_SSH_KEY")
if key_path and "GIT_SSH_COMMAND" not in os.environ:
os.environ["GIT_SSH_COMMAND"] = f"ssh -i {key_path} -o IdentitiesOnly=yes"

@property
def mirror_url(self) -> str:
if self._is_repo_private():
return f"git@github.com:{self.mirror_name}.git"
return f"https://github.com/{self.mirror_name}"

@property
def _mirror_ssh_url(self) -> str:
return f"git@github.com:{self.mirror_name}.git"

@property
def _source_read_url(self) -> str:
if self._is_repo_private():
return f"git@github.com:{self.owner}/{self.repo}.git"
return f"https://github.com/{self.owner}/{self.repo}.git"

@property
def _docker_ssh_arg(self) -> str:
key_path = os.getenv("GITHUB_USER_SSH_KEY")
if key_path:
return f"--ssh default={key_path}"
if self._is_repo_private():
return "--ssh default"
return ""

@property
def image_name(self) -> str:
return f"{self.org_dh}/swesmith.{self.arch}.{self.owner}_1776_{self.repo}.{self.commit[:8]}".lower()
Expand Down Expand Up @@ -196,16 +251,52 @@ def _mirror_exists(self):
self._cache_mirror_exists = False
return self._cache_mirror_exists

def _prepare_dockerfile(self, content: str) -> str:
"""Inject BuildKit syntax directive and SSH mount into all RUN instructions.

This ensures that SSH keys forwarded via `docker build --ssh` are
transparently available to every RUN step (e.g. git clone, git
submodule update) without requiring profile authors to remember
`--mount=type=ssh` themselves. The mount uses `required=false` so
builds still succeed when no SSH agent is forwarded.
"""
if not content.lstrip().startswith("# syntax=docker/dockerfile"):
content = "# syntax=docker/dockerfile:1\n" + content

# Inject GIT_SSH_COMMAND variable to the dockerfile. This ssh usage
# accepts the unknown host key by default and save it to ~/.ssh/.known_hosts
# which removes the user interaction requirement.
content = re.sub(
r"^(FROM\s+.+)$",
r'\1\nENV GIT_SSH_COMMAND="ssh -o StrictHostKeyChecking=accept-new"',
content,
count=1,
flags=re.MULTILINE,
)

content = re.sub(
r"^RUN\s+(?!--mount=type=ssh)",
"RUN --mount=type=ssh,required=false ",
content,
flags=re.MULTILINE,
)
return content

def build_image(self):
"""Build a Docker image (execution environment) for this repository profile."""
env_dir = LOG_DIR_ENV / self.repo_name
env_dir.mkdir(parents=True, exist_ok=True)
dockerfile_path = env_dir / "Dockerfile"
with open(dockerfile_path, "w") as f:
f.write(self.dockerfile)
f.write(self._prepare_dockerfile(self.dockerfile))

build_cmd = (
f"docker build -f {dockerfile_path} --platform {self.pltf}"
f" --no-cache {self._docker_ssh_arg} -t {self.image_name} ."
)
with open(env_dir / "build_image.log", "w") as log_file:
subprocess.run(
f"docker build -f {dockerfile_path} --platform {self.pltf} --no-cache -t {self.image_name} .",
build_cmd,
check=True,
shell=True,
stdout=log_file,
Expand All @@ -218,11 +309,15 @@ def create_mirror(self):
return
if self.repo_name in os.listdir():
shutil.rmtree(self.repo_name)
self.api.repos.create_in_org(self.org_gh, self.repo_name)
source_repo = self.api.repos.get(self.owner, self.repo)
self.api.repos.create_in_org(
self.org_gh, self.repo_name, private=source_repo.private
)

# Clone the repository
# Clone the source repository (READ operation)
self._configure_ssh_env()
subprocess.run(
f"git clone git@github.com:{self.owner}/{self.repo}.git {self.repo_name}",
f"git clone {self._source_read_url} {self.repo_name}",
shell=True,
check=True,
stdout=subprocess.DEVNULL,
Expand All @@ -239,7 +334,7 @@ def create_mirror(self):
if os.path.exists(os.path.join(self.repo_name, ".gitmodules")):
git_cmds.append("git submodule update --init --recursive")

# Add the rest of the commands
# Add the rest of the commands (WRITE → always SSH)
git_cmds.extend(
[
"rm -rf .git",
Expand Down Expand Up @@ -282,26 +377,23 @@ def clone(self, dest: str | None = None) -> tuple[str, bool]:
)
dest = self.repo_name if not dest else dest
if not os.path.exists(dest):
token = os.getenv("GITHUB_TOKEN")
if token:
base_url = (
f"https://x-access-token:{token}@github.com/{self.mirror_name}.git"
)
else:
base_url = f"git@github.com:{self.mirror_name}.git"

clone_cmd = (
f"git clone {base_url}"
if dest is None
else f"git clone {base_url} {dest}"
)
self._configure_ssh_env()
clone_cmd = f"git clone {self.mirror_url} {dest}"
subprocess.run(
clone_cmd,
check=True,
shell=True,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
# Always set SSH push URL (writes always use SSH)
subprocess.run(
f"git -C {dest} remote set-url --push origin {self._mirror_ssh_url}",
check=True,
shell=True,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
return dest, True
else:
return dest, False
Expand Down
2 changes: 1 addition & 1 deletion swesmith/profiles/golang.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ class GoProfile(RepoProfile):
@property
def dockerfile(self):
return f"""FROM golang:1.24
RUN git clone https://github.com/{self.mirror_name} /{ENV_NAME}
RUN git clone {self.mirror_url} /{ENV_NAME}
WORKDIR /{ENV_NAME}
RUN go mod tidy
RUN go test -v -count=1 ./... || true
Expand Down
Loading