diff --git a/.env.example b/.env.example new file mode 100644 index 00000000..77ac0d70 --- /dev/null +++ b/.env.example @@ -0,0 +1,15 @@ +# GitHub Personal Access Token +# Used for GitHub API operations: querying repository metadata, +# checking mirror existence, and creating mirror repos in your org. +# +# Required scopes: +# public_repo - for public repos (create public mirrors, read metadata) +# repo - for private repos (all of the above + private repo access) +GITHUB_TOKEN= + +# Custom SSH key for private repo operations (optional) +# If not set, the system looks for default keys in ~/.ssh/ +# (id_rsa, id_ecdsa, id_ed25519, etc.) +# +# Set this only if your GitHub SSH key is at a non-standard path. +# GITHUB_USER_SSH_KEY=/home/user/.ssh/id_ed25519_github diff --git a/swesmith/build_repo/try_install_py.py b/swesmith/build_repo/try_install_py.py index 19fdc355..5684c0e5 100644 --- a/swesmith/build_repo/try_install_py.py +++ b/swesmith/build_repo/try_install_py.py @@ -113,9 +113,10 @@ def main( base_cwd = os.getcwd() try: # Shallow clone repository at the specified commit + p._configure_ssh_env() if not os.path.exists(p.repo): subprocess.run( - f"git clone https://github.com/{p.owner}/{p.repo}.git", + f"git clone {p._source_read_url}", check=True, shell=True, stdout=subprocess.DEVNULL, @@ -205,7 +206,7 @@ def main( "\n".join( [ "#!/bin/bash\n", - f"git clone git@github.com:{p.owner}/{p.repo}.git", + f"git clone {p._source_read_url}", f"git checkout {p.commit}", ] + install_lines diff --git a/swesmith/harness/utils.py b/swesmith/harness/utils.py index 47af3781..1c77c43f 100644 --- a/swesmith/harness/utils.py +++ b/swesmith/harness/utils.py @@ -1,5 +1,6 @@ import docker import fnmatch +import threading import traceback from concurrent.futures import ThreadPoolExecutor, as_completed @@ -33,9 +34,13 @@ TEST_OUTPUT_START, ) from swesmith.profiles import registry +from swesmith.profiles.base import _find_ssh_key from unidiff import PatchSet +_ssh_copy_lock = threading.Lock() + + def matches_instance_filter(instance_id: str, instance_ids: list[str] | None) -> bool: """ Check if an instance_id matches the filtering criteria. @@ -147,10 +152,37 @@ def run_patch_in_container( ) container.start() + # For private repos, copy SSH key into container + ssh_env = {} + if rp._is_repo_private(): + key_file = _find_ssh_key() + if key_file is None: + raise ValueError( + "Repo is private but no SSH key found. " + "Set GITHUB_USER_SSH_KEY or add a key to ~/.ssh/" + ) + + # Prevent race condition + with _ssh_copy_lock: + copy_to_container(container, key_file, Path("/github_key")) + container.exec_run("chmod 600 /github_key", user=DOCKER_USER) + ssh_env = { + "GIT_SSH_COMMAND": "ssh -i /github_key -o StrictHostKeyChecking=accept-new -o IdentitiesOnly=yes" + } + # If provided, checkout commit in container if commit is not None: logger.info(f"Checking out commit {commit}") - container.exec_run("git fetch", workdir=DOCKER_WORKDIR, user=DOCKER_USER) + fetch_val = container.exec_run( + "git fetch", + workdir=DOCKER_WORKDIR, + user=DOCKER_USER, + environment=ssh_env, + ) + if fetch_val.exit_code != 0: + logger.info( + f"GIT FETCH FAILED (exit={fetch_val.exit_code}): {fetch_val.output.decode(UTF8)}" + ) val = container.exec_run( f"git checkout {commit}", workdir=DOCKER_WORKDIR, user=DOCKER_USER ) diff --git a/swesmith/profiles/base.py b/swesmith/profiles/base.py index 6780c282..9eb2f785 100644 --- a/swesmith/profiles/base.py +++ b/swesmith/profiles/base.py @@ -6,10 +6,15 @@ """ import docker +import json +import logging import os import platform +import re import shutil import subprocess +import urllib.error +import urllib.request from abc import ABC, abstractmethod, ABCMeta from collections import UserDict @@ -42,6 +47,25 @@ load_dotenv() +logger = logging.getLogger(__name__) + +_DEFAULT_SSH_KEYS = ["id_rsa", "id_ecdsa", "id_ecdsa_sk", "id_ed25519", "id_ed25519_sk"] + + +def _find_ssh_key() -> Path | None: + """Find an SSH private key: explicit env var first, then default paths.""" + key_path = os.getenv("GITHUB_USER_SSH_KEY") + if key_path and Path(key_path).exists(): + return Path(key_path) + + ssh_dir = Path.home() / ".ssh" + for key_name in _DEFAULT_SSH_KEYS: + key_file = ssh_dir / key_name + if key_file.exists(): + return key_file + + return None + class SingletonMeta(ABCMeta): _instances = {} @@ -106,6 +130,9 @@ def pltf(self) -> str: _cache_test_paths = None _cache_branches = None _cache_mirror_exists = None + _cache_repo_private: bool | None = field( + default=None, init=False, repr=False, compare=False + ) ### START: Properties, Methods that *do not* require (re-)implementation ### @@ -117,6 +144,63 @@ def api(self) -> GhApi: self._api = GhApi(token=token) return self._api + def _is_repo_private(self) -> bool: + if self._cache_repo_private is not None: + return self._cache_repo_private + try: + url = f"https://api.github.com/repos/{self.owner}/{self.repo}" + headers = {"User-Agent": "swesmith"} + token = os.getenv("GITHUB_TOKEN") + if token: + headers["Authorization"] = f"token {token}" + req = urllib.request.Request(url, headers=headers) + with urllib.request.urlopen(req) as resp: + data = json.loads(resp.read()) + self._cache_repo_private = data.get("private", False) + except urllib.error.HTTPError as e: + if e.code == 404: + logger.warning( + "Repo '%s/%s' returned 404 — assuming private", + self.owner, + self.repo, + ) + self._cache_repo_private = True + else: + raise + return self._cache_repo_private + + @staticmethod + def _configure_ssh_env(): + """Bake GIT_SSH_COMMAND into os.environ if GITHUB_USER_SSH_KEY is set.""" + key_path = os.getenv("GITHUB_USER_SSH_KEY") + if key_path and "GIT_SSH_COMMAND" not in os.environ: + os.environ["GIT_SSH_COMMAND"] = f"ssh -i {key_path} -o IdentitiesOnly=yes" + + @property + def mirror_url(self) -> str: + if self._is_repo_private(): + return f"git@github.com:{self.mirror_name}.git" + return f"https://github.com/{self.mirror_name}" + + @property + def _mirror_ssh_url(self) -> str: + return f"git@github.com:{self.mirror_name}.git" + + @property + def _source_read_url(self) -> str: + if self._is_repo_private(): + return f"git@github.com:{self.owner}/{self.repo}.git" + return f"https://github.com/{self.owner}/{self.repo}.git" + + @property + def _docker_ssh_arg(self) -> str: + key_file = _find_ssh_key() + if key_file: + return f"--ssh default={key_file}" + if self._is_repo_private(): + return "--ssh default" + return "" + @property def image_name(self) -> str: return f"{self.org_dh}/swesmith.{self.arch}.{self.owner}_1776_{self.repo}.{self.commit[:8]}".lower() @@ -196,16 +280,52 @@ def _mirror_exists(self): self._cache_mirror_exists = False return self._cache_mirror_exists + def _prepare_dockerfile(self, content: str) -> str: + """Inject BuildKit syntax directive and SSH mount into all RUN instructions. + + This ensures that SSH keys forwarded via `docker build --ssh` are + transparently available to every RUN step (e.g. git clone, git + submodule update) without requiring profile authors to remember + `--mount=type=ssh` themselves. The mount uses `required=false` so + builds still succeed when no SSH agent is forwarded. + """ + if not content.lstrip().startswith("# syntax=docker/dockerfile"): + content = "# syntax=docker/dockerfile:1\n" + content + + # Inject GIT_SSH_COMMAND variable to the dockerfile. This ssh usage + # accepts the unknown host key by default and save it to ~/.ssh/.known_hosts + # which removes the user interaction requirement. + content = re.sub( + r"^(FROM\s+.+)$", + r'\1\nENV GIT_SSH_COMMAND="ssh -o StrictHostKeyChecking=accept-new"', + content, + count=1, + flags=re.MULTILINE, + ) + + content = re.sub( + r"^RUN\s+(?!--mount=type=ssh)", + "RUN --mount=type=ssh,required=false ", + content, + flags=re.MULTILINE, + ) + return content + def build_image(self): """Build a Docker image (execution environment) for this repository profile.""" env_dir = LOG_DIR_ENV / self.repo_name env_dir.mkdir(parents=True, exist_ok=True) dockerfile_path = env_dir / "Dockerfile" with open(dockerfile_path, "w") as f: - f.write(self.dockerfile) + f.write(self._prepare_dockerfile(self.dockerfile)) + + build_cmd = ( + f"docker build -f {dockerfile_path} --platform {self.pltf}" + f" --no-cache {self._docker_ssh_arg} -t {self.image_name} ." + ) with open(env_dir / "build_image.log", "w") as log_file: subprocess.run( - f"docker build -f {dockerfile_path} --platform {self.pltf} --no-cache -t {self.image_name} .", + build_cmd, check=True, shell=True, stdout=log_file, @@ -218,11 +338,15 @@ def create_mirror(self): return if self.repo_name in os.listdir(): shutil.rmtree(self.repo_name) - self.api.repos.create_in_org(self.org_gh, self.repo_name) + source_repo = self.api.repos.get(self.owner, self.repo) + self.api.repos.create_in_org( + self.org_gh, self.repo_name, private=source_repo.private + ) - # Clone the repository + # Clone the source repository (READ operation) + self._configure_ssh_env() subprocess.run( - f"git clone git@github.com:{self.owner}/{self.repo}.git {self.repo_name}", + f"git clone {self._source_read_url} {self.repo_name}", shell=True, check=True, stdout=subprocess.DEVNULL, @@ -239,7 +363,7 @@ def create_mirror(self): if os.path.exists(os.path.join(self.repo_name, ".gitmodules")): git_cmds.append("git submodule update --init --recursive") - # Add the rest of the commands + # Add the rest of the commands (WRITE → always SSH) git_cmds.extend( [ "rm -rf .git", @@ -282,19 +406,8 @@ def clone(self, dest: str | None = None) -> tuple[str, bool]: ) dest = self.repo_name if not dest else dest if not os.path.exists(dest): - token = os.getenv("GITHUB_TOKEN") - if token: - base_url = ( - f"https://x-access-token:{token}@github.com/{self.mirror_name}.git" - ) - else: - base_url = f"git@github.com:{self.mirror_name}.git" - - clone_cmd = ( - f"git clone {base_url}" - if dest is None - else f"git clone {base_url} {dest}" - ) + self._configure_ssh_env() + clone_cmd = f"git clone {self.mirror_url} {dest}" subprocess.run( clone_cmd, check=True, @@ -302,6 +415,14 @@ def clone(self, dest: str | None = None) -> tuple[str, bool]: stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, ) + # Always set SSH push URL (writes always use SSH) + subprocess.run( + f"git -C {dest} remote set-url --push origin {self._mirror_ssh_url}", + check=True, + shell=True, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) return dest, True else: return dest, False diff --git a/swesmith/profiles/golang.py b/swesmith/profiles/golang.py index 01ab83c8..77d44601 100644 --- a/swesmith/profiles/golang.py +++ b/swesmith/profiles/golang.py @@ -31,7 +31,7 @@ class GoProfile(RepoProfile): @property def dockerfile(self): return f"""FROM golang:1.24 -RUN git clone https://github.com/{self.mirror_name} /{ENV_NAME} +RUN git clone {self.mirror_url} /{ENV_NAME} WORKDIR /{ENV_NAME} RUN go mod tidy RUN go test -v -count=1 ./... || true diff --git a/swesmith/profiles/javascript.py b/swesmith/profiles/javascript.py index 0a27ddb1..c5be97ec 100644 --- a/swesmith/profiles/javascript.py +++ b/swesmith/profiles/javascript.py @@ -51,10 +51,10 @@ def extract_entities( ) -def default_npm_install_dockerfile(mirror_name: str, node_version: str = "18") -> str: +def default_npm_install_dockerfile(mirror_url: str, node_version: str = "18") -> str: return f"""FROM node:{node_version}-bullseye -RUN apt update && apt install -y git -RUN git clone https://github.com/{mirror_name} /{ENV_NAME} +RUN apt update && apt install -y git +RUN git clone {mirror_url} /{ENV_NAME} WORKDIR /{ENV_NAME} RUN npm install """ @@ -331,7 +331,7 @@ class GithubReadmeStats3e974011(JavaScriptProfile): @property def dockerfile(self): - return default_npm_install_dockerfile(self.mirror_name) + return default_npm_install_dockerfile(self.mirror_url) def log_parser(self, log: str) -> dict[str, str]: return parse_log_jest(log) @@ -346,7 +346,7 @@ class Mongoose5f57a5bb(JavaScriptProfile): @property def dockerfile(self): - return default_npm_install_dockerfile(self.mirror_name) + return default_npm_install_dockerfile(self.mirror_url) def log_parser(self, log: str) -> dict[str, str]: return parse_log_mocha(log) @@ -364,7 +364,7 @@ class Axiosef36347f(JavaScriptProfile): @property def dockerfile(self): - return default_npm_install_dockerfile(self.mirror_name) + return default_npm_install_dockerfile(self.mirror_url) def log_parser(self, log: str) -> dict[str, str]: return parse_log_mocha(log) @@ -379,7 +379,7 @@ class Async23dbf76a(JavaScriptProfile): @property def dockerfile(self): - return default_npm_install_dockerfile(self.mirror_name) + return default_npm_install_dockerfile(self.mirror_url) def log_parser(self, log: str) -> dict[str, str]: return parse_log_mocha(log) @@ -394,7 +394,7 @@ class Expressef5f2e13(JavaScriptProfile): @property def dockerfile(self): - return default_npm_install_dockerfile(self.mirror_name) + return default_npm_install_dockerfile(self.mirror_url) def log_parser(self, log: str) -> dict[str, str]: return parse_log_mocha(log) @@ -409,7 +409,7 @@ class Dayjsc8a26460(JavaScriptProfile): @property def dockerfile(self): - return default_npm_install_dockerfile(self.mirror_name) + return default_npm_install_dockerfile(self.mirror_url) def log_parser(self, log: str) -> dict[str, str]: return parse_log_jest(log) @@ -447,7 +447,7 @@ class Commanderjs395cf714(JavaScriptProfile): @property def dockerfile(self): - return default_npm_install_dockerfile(self.mirror_name, node_version="20") + return default_npm_install_dockerfile(self.mirror_url, node_version="20") def log_parser(self, log: str) -> dict[str, str]: return parse_log_jest(log) @@ -577,7 +577,7 @@ def image_name(self) -> str: @property def dockerfile(self): - return default_npm_install_dockerfile(self.mirror_name, node_version="22") + return default_npm_install_dockerfile(self.mirror_url, node_version="22") def log_parser(self, log: str) -> dict[str, str]: return parse_log_mocha(log) @@ -592,7 +592,7 @@ class Qd180f4a0(JavaScriptProfile): @property def dockerfile(self): - return default_npm_install_dockerfile(self.mirror_name, node_version="22") + return default_npm_install_dockerfile(self.mirror_url, node_version="22") def log_parser(self, log: str) -> dict[str, str]: return parse_log_mocha(log) @@ -610,7 +610,7 @@ class ImmutableJS879adab5(JavaScriptProfile): @property def dockerfile(self): - return default_npm_install_dockerfile(self.mirror_name, node_version="22") + return default_npm_install_dockerfile(self.mirror_url, node_version="22") def log_parser(self, log: str) -> dict[str, str]: return parse_log_jest(log) @@ -628,7 +628,7 @@ class ThreeJS73b3f248(JavaScriptProfile): @property def dockerfile(self): - return default_npm_install_dockerfile(self.mirror_name, node_version="22") + return default_npm_install_dockerfile(self.mirror_url, node_version="22") def log_parser(self, log: str) -> dict[str, str]: return parse_log_jest(log) @@ -643,7 +643,7 @@ class Echarts6be0e145(JavaScriptProfile): @property def dockerfile(self): - return default_npm_install_dockerfile(self.mirror_name, node_version="22") + return default_npm_install_dockerfile(self.mirror_url, node_version="22") def log_parser(self, log: str) -> dict[str, str]: return parse_log_jest(log) diff --git a/swesmith/profiles/python.py b/swesmith/profiles/python.py index 3faa9b95..e1d9db17 100644 --- a/swesmith/profiles/python.py +++ b/swesmith/profiles/python.py @@ -1,7 +1,7 @@ -import docker import re -from dataclasses import dataclass, field +import subprocess +from dataclasses import dataclass, field from pathlib import Path from swebench.harness.constants import ( FAIL_TO_PASS, @@ -9,7 +9,6 @@ KEY_INSTANCE_ID, TestStatus, ) -from swebench.harness.docker_build import build_image as build_image_sweb from swebench.harness.dockerfiles import get_dockerfile_env from swesmith.constants import LOG_DIR_ENV, ENV_NAME, INSTANCE_REF, ORG_NAME_DH from swesmith.profiles.base import RepoProfile, registry @@ -49,14 +48,13 @@ def build_image(self): HEREDOC_DELIMITER = "EOF_59812759871" PATH_TO_REQS = "swesmith_environment.yml" - client = docker.from_env() with open(self._env_yml) as f: reqs = f.read() setup_commands = [ "#!/bin/bash", "set -euxo pipefail", - f"git clone -o origin https://github.com/{self.mirror_name} /{ENV_NAME}", + f"git clone -o origin {self.mirror_url} /{ENV_NAME}", f"cd /{ENV_NAME}", "source /opt/miniconda3/bin/activate", f"cat <<'{HEREDOC_DELIMITER}' > {PATH_TO_REQS}\n{reqs}\n{HEREDOC_DELIMITER}", @@ -66,18 +64,31 @@ def build_image(self): f"conda activate {ENV_NAME}", 'echo "Current environment: $CONDA_DEFAULT_ENV"', ] + self.install_cmds + dockerfile = get_dockerfile_env( self.pltf, self.arch, "py", base_image_key=BASE_IMAGE_KEY ) - - build_image_sweb( - image_name=self.image_name, - setup_scripts={"setup_env.sh": "\n".join(setup_commands) + "\n"}, - dockerfile=dockerfile, - platform=self.pltf, - client=client, - build_dir=LOG_DIR_ENV / self.repo_name, + dockerfile = self._prepare_dockerfile(dockerfile) + + env_dir = LOG_DIR_ENV / self.repo_name + env_dir.mkdir(parents=True, exist_ok=True) + with open(env_dir / "setup_env.sh", "w") as f: + f.write("\n".join(setup_commands) + "\n") + with open(env_dir / "Dockerfile", "w") as f: + f.write(dockerfile) + + build_cmd = ( + f"docker build --platform {self.pltf} --no-cache" + f" {self._docker_ssh_arg} -t {self.image_name} {env_dir}" ) + with open(env_dir / "build_image.log", "w") as log_file: + subprocess.run( + build_cmd, + check=True, + shell=True, + stdout=log_file, + stderr=subprocess.STDOUT, + ) def log_parser(self, log: str) -> dict[str, str]: """Parser for test logs generated with PyTest framework""" diff --git a/swesmith/profiles/rust.py b/swesmith/profiles/rust.py index df3fdf16..6922c74e 100644 --- a/swesmith/profiles/rust.py +++ b/swesmith/profiles/rust.py @@ -36,7 +36,7 @@ def dockerfile(self): RUN apt update && apt install -y wget git build-essential \ && rm -rf /var/lib/apt/lists/* -RUN git clone https://github.com/{self.mirror_name} /{ENV_NAME} +RUN git clone {self.mirror_url} /{ENV_NAME} WORKDIR /{ENV_NAME} RUN {self.test_cmd} || true """ diff --git a/swesmith/profiles/typescript.py b/swesmith/profiles/typescript.py index 3f6469d7..3968a28b 100644 --- a/swesmith/profiles/typescript.py +++ b/swesmith/profiles/typescript.py @@ -50,22 +50,22 @@ def extract_entities( ) -def default_npm_install_dockerfile(mirror_name: str, node_version: str = "20") -> str: +def default_npm_install_dockerfile(mirror_url: str, node_version: str = "20") -> str: """Default Dockerfile for TypeScript projects using npm.""" return f"""FROM node:{node_version}-bullseye RUN apt update && apt install -y git -RUN git clone https://github.com/{mirror_name} /{ENV_NAME} +RUN git clone {mirror_url} /{ENV_NAME} WORKDIR /{ENV_NAME} RUN npm install """ -def default_pnpm_install_dockerfile(mirror_name: str, node_version: str = "20") -> str: +def default_pnpm_install_dockerfile(mirror_url: str, node_version: str = "20") -> str: """Default Dockerfile for TypeScript projects using pnpm.""" return f"""FROM node:{node_version}-bullseye RUN apt update && apt install -y git RUN npm install -g pnpm -RUN git clone https://github.com/{mirror_name} /{ENV_NAME} +RUN git clone {mirror_url} /{ENV_NAME} WORKDIR /{ENV_NAME} RUN pnpm install """ diff --git a/tests/harness/test_utils_ssh.py b/tests/harness/test_utils_ssh.py new file mode 100644 index 00000000..aabb1fa6 --- /dev/null +++ b/tests/harness/test_utils_ssh.py @@ -0,0 +1,295 @@ +"""Tests for SSH/private-repo support in swesmith.harness.utils.""" + +import os +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest + +from swesmith.profiles.base import _find_ssh_key + + +class TestFindSshKey: + """Tests for the _find_ssh_key helper.""" + + def test_returns_env_var_path_when_exists(self, tmp_path): + key_file = tmp_path / "my_key" + key_file.write_text("fake key") + with patch.dict(os.environ, {"GITHUB_USER_SSH_KEY": str(key_file)}): + result = _find_ssh_key() + assert result == Path(key_file) + + def test_ignores_env_var_when_file_missing(self): + with patch.dict(os.environ, {"GITHUB_USER_SSH_KEY": "/nonexistent/key"}): + with patch("pathlib.Path.exists", return_value=False): + result = _find_ssh_key() + assert result is None + + def test_falls_back_to_default_ssh_keys(self, tmp_path): + ssh_dir = tmp_path / ".ssh" + ssh_dir.mkdir() + ed25519_key = ssh_dir / "id_ed25519" + ed25519_key.write_text("fake key") + + with ( + patch.dict(os.environ, {}, clear=False), + patch("pathlib.Path.home", return_value=tmp_path), + ): + os.environ.pop("GITHUB_USER_SSH_KEY", None) + result = _find_ssh_key() + assert result == ed25519_key + + def test_returns_first_matching_default_key(self, tmp_path): + ssh_dir = tmp_path / ".ssh" + ssh_dir.mkdir() + rsa_key = ssh_dir / "id_rsa" + rsa_key.write_text("fake rsa key") + ed25519_key = ssh_dir / "id_ed25519" + ed25519_key.write_text("fake ed25519 key") + + with ( + patch.dict(os.environ, {}, clear=False), + patch("pathlib.Path.home", return_value=tmp_path), + ): + os.environ.pop("GITHUB_USER_SSH_KEY", None) + result = _find_ssh_key() + assert result == rsa_key + + def test_returns_none_when_no_keys_exist(self, tmp_path): + ssh_dir = tmp_path / ".ssh" + ssh_dir.mkdir() + + with ( + patch.dict(os.environ, {}, clear=False), + patch("pathlib.Path.home", return_value=tmp_path), + ): + os.environ.pop("GITHUB_USER_SSH_KEY", None) + result = _find_ssh_key() + assert result is None + + +class TestRunPatchInContainerSsh: + """Tests for the private-repo SSH logic inside run_patch_in_container.""" + + @pytest.fixture + def mock_env(self, tmp_path): + """Set up the common mocks needed for run_patch_in_container.""" + mock_container = MagicMock() + mock_container.exec_run.return_value = MagicMock(exit_code=0, output=b"ok") + + mock_client = MagicMock() + mock_client.containers.create.return_value = mock_container + + mock_profile = MagicMock() + mock_profile.image_name = "test-image" + mock_profile._is_repo_private.return_value = False + + mock_logger = MagicMock() + mock_logger.log_file = str(tmp_path / "test.log") + + return { + "container": mock_container, + "client": mock_client, + "profile": mock_profile, + "logger": mock_logger, + "log_dir": tmp_path, + } + + def test_private_repo_copies_ssh_key(self, mock_env, tmp_path): + """When repo is private, SSH key should be copied into container.""" + mock_env["profile"]._is_repo_private.return_value = True + + ssh_key = tmp_path / "id_rsa" + ssh_key.write_text("fake key") + + with ( + patch("swesmith.harness.utils.docker") as mock_docker, + patch("swesmith.harness.utils.registry") as mock_registry, + patch( + "swesmith.harness.utils.setup_logger", return_value=mock_env["logger"] + ), + patch("swesmith.harness.utils._find_ssh_key", return_value=ssh_key), + patch("swesmith.harness.utils.copy_to_container") as mock_copy, + patch( + "swesmith.harness.utils.exec_run_with_timeout", + return_value=("output", False, 1.0), + ), + patch("swesmith.harness.utils.cleanup_container"), + ): + mock_docker.from_env.return_value = mock_env["client"] + mock_registry.get_from_inst.return_value = mock_env["profile"] + + from swesmith.harness.utils import run_patch_in_container + + instance = {"instance_id": "test_instance"} + run_patch_in_container( + instance=instance, + run_id="run1", + log_dir=tmp_path, + timeout=60, + ) + + mock_copy.assert_called_once_with( + mock_env["container"], ssh_key, Path("/github_key") + ) + mock_env["container"].exec_run.assert_any_call( + "chmod 600 /github_key", user="root" + ) + + def test_private_repo_no_key_raises(self, mock_env, tmp_path): + """When repo is private and no SSH key found, should raise ValueError.""" + mock_env["profile"]._is_repo_private.return_value = True + + with ( + patch("swesmith.harness.utils.docker") as mock_docker, + patch("swesmith.harness.utils.registry") as mock_registry, + patch( + "swesmith.harness.utils.setup_logger", return_value=mock_env["logger"] + ), + patch("swesmith.harness.utils._find_ssh_key", return_value=None), + patch("swesmith.harness.utils.cleanup_container"), + ): + mock_docker.from_env.return_value = mock_env["client"] + mock_registry.get_from_inst.return_value = mock_env["profile"] + + from swesmith.harness.utils import run_patch_in_container + + instance = {"instance_id": "test_instance"} + result = run_patch_in_container( + instance=instance, + run_id="run1", + log_dir=tmp_path, + timeout=60, + ) + assert result is not None + logger, timed_out = result + assert timed_out is False + + def test_public_repo_skips_ssh(self, mock_env, tmp_path): + """When repo is public, no SSH key logic should be triggered.""" + mock_env["profile"]._is_repo_private.return_value = False + + with ( + patch("swesmith.harness.utils.docker") as mock_docker, + patch("swesmith.harness.utils.registry") as mock_registry, + patch( + "swesmith.harness.utils.setup_logger", return_value=mock_env["logger"] + ), + patch("swesmith.harness.utils._find_ssh_key") as mock_find_key, + patch("swesmith.harness.utils.copy_to_container") as mock_copy, + patch( + "swesmith.harness.utils.exec_run_with_timeout", + return_value=("output", False, 1.0), + ), + patch("swesmith.harness.utils.cleanup_container"), + ): + mock_docker.from_env.return_value = mock_env["client"] + mock_registry.get_from_inst.return_value = mock_env["profile"] + + from swesmith.harness.utils import run_patch_in_container + + instance = {"instance_id": "test_instance"} + run_patch_in_container( + instance=instance, + run_id="run1", + log_dir=tmp_path, + timeout=60, + ) + + mock_find_key.assert_not_called() + mock_copy.assert_not_called() + + def test_git_fetch_with_ssh_env_when_private(self, mock_env, tmp_path): + """When repo is private and commit is given, git fetch should receive ssh_env.""" + mock_env["profile"]._is_repo_private.return_value = True + + ssh_key = tmp_path / "id_rsa" + ssh_key.write_text("fake key") + + with ( + patch("swesmith.harness.utils.docker") as mock_docker, + patch("swesmith.harness.utils.registry") as mock_registry, + patch( + "swesmith.harness.utils.setup_logger", return_value=mock_env["logger"] + ), + patch("swesmith.harness.utils._find_ssh_key", return_value=ssh_key), + patch("swesmith.harness.utils.copy_to_container"), + patch( + "swesmith.harness.utils.exec_run_with_timeout", + return_value=("output", False, 1.0), + ), + patch("swesmith.harness.utils.cleanup_container"), + ): + mock_docker.from_env.return_value = mock_env["client"] + mock_registry.get_from_inst.return_value = mock_env["profile"] + + from swesmith.harness.utils import run_patch_in_container + + instance = {"instance_id": "test_instance"} + run_patch_in_container( + instance=instance, + run_id="run1", + log_dir=tmp_path, + timeout=60, + commit="abc123", + ) + + fetch_calls = [ + c + for c in mock_env["container"].exec_run.call_args_list + if c.args + and c.args[0] == "git fetch" + or (c.kwargs.get("cmd") == "git fetch") + ] + + found_ssh_env = False + for c in mock_env["container"].exec_run.call_args_list: + if len(c.args) > 0 and c.args[0] == "git fetch": + env = c.kwargs.get("environment", {}) + if "GIT_SSH_COMMAND" in env: + found_ssh_env = True + assert found_ssh_env, ( + "git fetch should have been called with ssh_env containing GIT_SSH_COMMAND" + ) + + def test_git_fetch_failure_logged(self, mock_env, tmp_path): + """When git fetch fails, the failure should be logged.""" + mock_env["profile"]._is_repo_private.return_value = False + + def exec_run_side_effect(cmd, **kwargs): + if cmd == "git fetch": + return MagicMock(exit_code=1, output=b"fetch error") + if isinstance(cmd, str) and cmd.startswith("git checkout"): + return MagicMock(exit_code=0, output=b"ok") + return MagicMock(exit_code=0, output=b"ok") + + mock_env["container"].exec_run.side_effect = exec_run_side_effect + + with ( + patch("swesmith.harness.utils.docker") as mock_docker, + patch("swesmith.harness.utils.registry") as mock_registry, + patch( + "swesmith.harness.utils.setup_logger", return_value=mock_env["logger"] + ), + patch( + "swesmith.harness.utils.exec_run_with_timeout", + return_value=("output", False, 1.0), + ), + patch("swesmith.harness.utils.cleanup_container"), + ): + mock_docker.from_env.return_value = mock_env["client"] + mock_registry.get_from_inst.return_value = mock_env["profile"] + + from swesmith.harness.utils import run_patch_in_container + + instance = {"instance_id": "test_instance"} + run_patch_in_container( + instance=instance, + run_id="run1", + log_dir=tmp_path, + timeout=60, + commit="abc123", + ) + + logged_messages = [str(c) for c in mock_env["logger"].info.call_args_list] + assert any("GIT FETCH FAILED" in msg for msg in logged_messages) diff --git a/tests/profiles/test_base.py b/tests/profiles/test_base.py index 9df92295..0cb4f456 100644 --- a/tests/profiles/test_base.py +++ b/tests/profiles/test_base.py @@ -1,8 +1,10 @@ import subprocess +import urllib.error import pytest import os import shutil from dataclasses import dataclass +from pathlib import Path from swebench.harness.constants import FAIL_TO_PASS, KEY_INSTANCE_ID from swesmith.bug_gen.mirror.generate import INSTANCE_REF @@ -10,7 +12,7 @@ from swesmith.constants import ORG_NAME_GH from swesmith.profiles import registry, RepoProfile from swesmith.profiles.utils import INSTALL_CMAKE, INSTALL_BAZEL -from unittest.mock import patch +from unittest.mock import MagicMock, patch @pytest.fixture(autouse=True) @@ -50,53 +52,58 @@ def test_image_name(): def test_repo_profile_clone(): """Test the RepoProfile.clone method, adapted from the original clone_repo test.""" repo_profile = registry.get("mewwts__addict.75284f95") + mirror_ssh = f"git@github.com:{repo_profile.mirror_name}.git" - # Test with default dest (should use repo_name) - # Patch GITHUB_TOKEN to None to ensure SSH URL format is used + # Test public repo clone (HTTPS read URL, SSH push URL) expected_dest = repo_profile.repo_name - expected_cmd = f"git clone git@github.com:{repo_profile.mirror_name}.git {repo_profile.repo_name}" - with ( - patch.dict(os.environ, {}, clear=False), - patch("os.getenv", return_value=None), + patch.object(repo_profile, "_is_repo_private", return_value=False), patch("os.path.exists", return_value=False) as mock_exists, patch("subprocess.run") as mock_run, ): result, cloned = repo_profile.clone() mock_exists.assert_called_once_with(expected_dest) - mock_run.assert_called_once_with( - expected_cmd, - check=True, - shell=True, - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, + assert mock_run.call_count == 2 + clone_call, seturl_call = mock_run.call_args_list + assert ( + clone_call.args[0] + == f"git clone https://github.com/{repo_profile.mirror_name} {expected_dest}" + ) + assert ( + seturl_call.args[0] + == f"git -C {expected_dest} remote set-url --push origin {mirror_ssh}" ) assert result == expected_dest - assert cloned == True + assert cloned is True - # Test with custom dest specified - custom_dest = "some_dir" - expected_cmd_with_dest = ( - f"git clone git@github.com:{repo_profile.mirror_name}.git {custom_dest}" - ) + # Test private repo clone (SSH for both read and push) + with ( + patch.object(repo_profile, "_is_repo_private", return_value=True), + patch("os.path.exists", return_value=False), + patch("subprocess.run") as mock_run, + ): + result, cloned = repo_profile.clone() + assert mock_run.call_count == 2 + clone_call, seturl_call = mock_run.call_args_list + assert clone_call.args[0] == f"git clone {mirror_ssh} {expected_dest}" + assert ( + seturl_call.args[0] + == f"git -C {expected_dest} remote set-url --push origin {mirror_ssh}" + ) + assert cloned is True + # Test with custom dest + custom_dest = "some_dir" with ( - patch.dict(os.environ, {}, clear=False), - patch("os.getenv", return_value=None), - patch("os.path.exists", return_value=False) as mock_exists, + patch.object(repo_profile, "_is_repo_private", return_value=False), + patch("os.path.exists", return_value=False), patch("subprocess.run") as mock_run, ): result, cloned = repo_profile.clone(custom_dest) - mock_exists.assert_called_once_with(custom_dest) - mock_run.assert_called_once_with( - expected_cmd_with_dest, - check=True, - shell=True, - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - ) + clone_call = mock_run.call_args_list[0] + assert custom_dest in clone_call.args[0] assert result == custom_dest - assert cloned == True + assert cloned is True # Test when repo already exists with ( @@ -107,7 +114,7 @@ def test_repo_profile_clone(): mock_exists.assert_called_once_with(custom_dest) mock_run.assert_not_called() assert result == custom_dest - assert cloned == False + assert cloned is False def test_python_log_parser(): @@ -312,7 +319,7 @@ def test_create_mirror(): mock_repos.create_in_org.assert_not_called() mock_run.assert_not_called() - # Test creating new mirror + # Test creating new mirror (private source repo) with ( patch.object(repo_profile, "_mirror_exists", return_value=False), patch("os.listdir", return_value=[repo_profile.repo_name]), @@ -320,10 +327,14 @@ def test_create_mirror(): patch.object(repo_profile.api, "repos") as mock_repos, patch("subprocess.run") as mock_run, ): + mock_repos.get.return_value = MagicMock(private=True) repo_profile.create_mirror() - # Should create mirror and run git commands - mock_repos.create_in_org.assert_called_once() + # Should query source repo visibility and create mirror with matching visibility + mock_repos.get.assert_called_once_with(repo_profile.owner, repo_profile.repo) + mock_repos.create_in_org.assert_called_once_with( + repo_profile.org_gh, repo_profile.repo_name, private=True + ) assert mock_run.call_count == 3 # Three git commands @@ -629,6 +640,123 @@ def my_function(x, y): assert any(getattr(e, "is_function", False) for e in entities) +def test_is_repo_private_404_assumes_private(): + """Test _is_repo_private returns True when GitHub API returns 404.""" + repo_profile = registry.get("mewwts__addict.75284f95") + repo_profile._cache_repo_private = None + + error = urllib.error.HTTPError("url", 404, "Not Found", {}, None) + with patch("urllib.request.urlopen", side_effect=error): + assert repo_profile._is_repo_private() is True + + +def test_is_repo_private_non_404_raises(): + """Test _is_repo_private raises on non-404 HTTP errors (e.g. rate limit).""" + repo_profile = registry.get("mewwts__addict.75284f95") + repo_profile._cache_repo_private = None + + error = urllib.error.HTTPError("url", 403, "Forbidden", {}, None) + with patch("urllib.request.urlopen", side_effect=error): + with pytest.raises(urllib.error.HTTPError): + repo_profile._is_repo_private() + + +def test_is_repo_private_network_error_raises(): + """Test _is_repo_private raises on network errors.""" + repo_profile = registry.get("mewwts__addict.75284f95") + repo_profile._cache_repo_private = None + + with patch( + "urllib.request.urlopen", side_effect=urllib.error.URLError("Connection error") + ): + with pytest.raises(urllib.error.URLError): + repo_profile._is_repo_private() + + +def test_configure_ssh_env_sets_git_ssh_command(): + """Test _configure_ssh_env sets GIT_SSH_COMMAND when GITHUB_USER_SSH_KEY is set.""" + saved = os.environ.pop("GIT_SSH_COMMAND", None) + try: + with patch.dict(os.environ, {"GITHUB_USER_SSH_KEY": "/path/to/key"}): + os.environ.pop("GIT_SSH_COMMAND", None) + RepoProfile._configure_ssh_env() + assert ( + os.environ["GIT_SSH_COMMAND"] + == "ssh -i /path/to/key -o IdentitiesOnly=yes" + ) + finally: + if saved is not None: + os.environ["GIT_SSH_COMMAND"] = saved + else: + os.environ.pop("GIT_SSH_COMMAND", None) + + +def test_configure_ssh_env_does_not_overwrite(): + """Test _configure_ssh_env does not overwrite existing GIT_SSH_COMMAND.""" + with patch.dict( + os.environ, + {"GITHUB_USER_SSH_KEY": "/path/to/key", "GIT_SSH_COMMAND": "existing"}, + ): + RepoProfile._configure_ssh_env() + assert os.environ["GIT_SSH_COMMAND"] == "existing" + + +def test_prepare_dockerfile(): + """Test _prepare_dockerfile injects BuildKit syntax and SSH mounts.""" + repo_profile = registry.get("mewwts__addict.75284f95") + input_dockerfile = "FROM python:3.10\nRUN pip install -e ." + + result = repo_profile._prepare_dockerfile(input_dockerfile) + + assert result.startswith("# syntax=docker/dockerfile:1") + assert 'ENV GIT_SSH_COMMAND="ssh -o StrictHostKeyChecking=accept-new"' in result + assert "RUN --mount=type=ssh,required=false pip install -e ." in result + + +def test_prepare_dockerfile_idempotent_syntax(): + """Test _prepare_dockerfile does not duplicate the syntax directive.""" + repo_profile = registry.get("mewwts__addict.75284f95") + input_dockerfile = ( + "# syntax=docker/dockerfile:1\nFROM python:3.10\nRUN pip install -e ." + ) + + result = repo_profile._prepare_dockerfile(input_dockerfile) + + assert result.count("# syntax=docker/dockerfile") == 1 + + +def test_docker_ssh_arg_with_key_found(): + """Test _docker_ssh_arg when _find_ssh_key discovers a key.""" + repo_profile = registry.get("mewwts__addict.75284f95") + with patch( + "swesmith.profiles.base._find_ssh_key", + return_value=Path("/home/user/.ssh/id_ed25519"), + ): + assert ( + repo_profile._docker_ssh_arg == "--ssh default=/home/user/.ssh/id_ed25519" + ) + + +def test_docker_ssh_arg_private_repo_no_key(): + """Test _docker_ssh_arg when repo is private but no key found anywhere.""" + repo_profile = registry.get("mewwts__addict.75284f95") + with ( + patch("swesmith.profiles.base._find_ssh_key", return_value=None), + patch.object(repo_profile, "_is_repo_private", return_value=True), + ): + assert repo_profile._docker_ssh_arg == "--ssh default" + + +def test_docker_ssh_arg_public_repo_no_key(): + """Test _docker_ssh_arg when repo is public and no key found.""" + repo_profile = registry.get("mewwts__addict.75284f95") + with ( + patch("swesmith.profiles.base._find_ssh_key", return_value=None), + patch.object(repo_profile, "_is_repo_private", return_value=False), + ): + assert repo_profile._docker_ssh_arg == "" + + def test_is_test_path_cases(tmp_path): """Test the _is_test_path method for various file and directory patterns and extensions.""" # Use MockRepoProfile with a dummy directory diff --git a/tests/profiles/test_profiles_javascript.py b/tests/profiles/test_profiles_javascript.py index a529edd9..6eee5faf 100644 --- a/tests/profiles/test_profiles_javascript.py +++ b/tests/profiles/test_profiles_javascript.py @@ -1,4 +1,14 @@ -from swesmith.profiles.javascript import parse_log_karma, parse_log_jasmine +from unittest.mock import patch + +from swesmith.constants import ENV_NAME +from swesmith.profiles.javascript import ( + default_npm_install_dockerfile, + parse_log_karma, + parse_log_jasmine, + GithubReadmeStats3e974011, + Commanderjs395cf714, + Colorfef7b619, +) from swebench.harness.constants import TestStatus @@ -67,3 +77,55 @@ def test_parse_log_jasmine_no_matches(): """ result = parse_log_jasmine(log) assert result == {} + + +# --- Tests for default_npm_install_dockerfile and mirror_url usage --- + + +def test_default_npm_install_dockerfile_default_node(): + result = default_npm_install_dockerfile("https://github.com/org/repo") + assert "FROM node:18-bullseye" in result + assert f"git clone https://github.com/org/repo /{ENV_NAME}" in result + assert "npm install" in result + + +def test_default_npm_install_dockerfile_custom_node(): + result = default_npm_install_dockerfile( + "https://github.com/org/repo", node_version="22" + ) + assert "FROM node:22-bullseye" in result + + +def test_default_npm_install_dockerfile_ssh_url(): + result = default_npm_install_dockerfile("git@github.com:org/repo.git") + assert f"git clone git@github.com:org/repo.git /{ENV_NAME}" in result + + +def test_github_readme_stats_dockerfile_uses_mirror_url(): + profile = GithubReadmeStats3e974011() + with patch.object(type(profile), "_is_repo_private", return_value=False): + dockerfile = profile.dockerfile + assert f"https://github.com/{profile.mirror_name}" in dockerfile + + +def test_github_readme_stats_dockerfile_ssh_when_private(): + profile = GithubReadmeStats3e974011() + with patch.object(type(profile), "_is_repo_private", return_value=True): + dockerfile = profile.dockerfile + assert f"git@github.com:{profile.mirror_name}.git" in dockerfile + + +def test_commanderjs_uses_node_20(): + profile = Commanderjs395cf714() + with patch.object(type(profile), "_is_repo_private", return_value=False): + dockerfile = profile.dockerfile + assert "FROM node:20-bullseye" in dockerfile + assert f"https://github.com/{profile.mirror_name}" in dockerfile + + +def test_color_uses_node_22(): + profile = Colorfef7b619() + with patch.object(type(profile), "_is_repo_private", return_value=False): + dockerfile = profile.dockerfile + assert "FROM node:22-bullseye" in dockerfile + assert f"https://github.com/{profile.mirror_name}" in dockerfile diff --git a/tests/profiles/test_profiles_python.py b/tests/profiles/test_profiles_python.py index e3c86317..4f5ce368 100644 --- a/tests/profiles/test_profiles_python.py +++ b/tests/profiles/test_profiles_python.py @@ -26,30 +26,28 @@ def test_python_profile_build_image(): """Test PythonProfile.build_image method""" profile = Addict75284f95() - mock_client = MagicMock() mock_env_yml_content = "name: test_env\ndependencies:\n - python=3.10" + m = mock_open(read_data=mock_env_yml_content) with ( - patch("docker.from_env", return_value=mock_client), - patch("builtins.open", mock_open(read_data=mock_env_yml_content)), - patch("swesmith.profiles.python.build_image_sweb") as mock_build, + patch("builtins.open", m), patch("swesmith.profiles.python.get_dockerfile_env", return_value="FROM test"), + patch("pathlib.Path.mkdir"), + patch("subprocess.run") as mock_run, ): profile.build_image() - # Verify build_image_sweb was called with correct parameters - mock_build.assert_called_once() - call_args = mock_build.call_args - assert call_args[1]["image_name"] == profile.image_name - assert call_args[1]["platform"] == profile.pltf - assert call_args[1]["client"] == mock_client + # Verify docker build was called via subprocess + mock_run.assert_called_once() + build_cmd = mock_run.call_args.args[0] + assert "docker build" in build_cmd + assert profile.image_name in build_cmd - # Verify setup script contains expected commands - setup_script = call_args[1]["setup_scripts"]["setup_env.sh"] - assert "git clone" in setup_script - assert "conda env create" in setup_script - assert "conda activate" in setup_script - assert profile.install_cmds[0] in setup_script + # Verify that the Dockerfile was written via open() + m.assert_called() + written_content = "" + for c in m().write.call_args_list: + written_content += c.args[0] def test_python_profile_log_parser(): @@ -223,8 +221,15 @@ def test_python_profile_build_image_error_handling(): """Test PythonProfile.build_image error handling""" profile = Addict75284f95() - with patch("docker.from_env", side_effect=Exception("Docker error")): - with pytest.raises(Exception, match="Docker error"): + mock_env_yml_content = "name: test_env\ndependencies:\n - python=3.10" + + with ( + patch("builtins.open", mock_open(read_data=mock_env_yml_content)), + patch("swesmith.profiles.python.get_dockerfile_env", return_value="FROM test"), + patch("pathlib.Path.mkdir"), + patch("subprocess.run", side_effect=Exception("Build failed")), + ): + with pytest.raises(Exception, match="Build failed"): profile.build_image()