Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions pydriller/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import logging
import os
from pathlib import Path
from typing import List, Dict, Optional, Set, Generator
from typing import List, Dict, Optional, Set, Generator, Union

from git import Repo, GitCommandError
from git.objects import Commit as GitCommit
Expand All @@ -36,11 +36,11 @@ class Git:
PyDriller: obtaining the list of commits, checkout, reset, etc.
"""

def __init__(self, path: str, conf=None):
def __init__(self, path: Union[str, os.PathLike], conf=None):
"""
Init the Git Repository.

:param str path: path to the repository
:param os.PathLike path: path to the repository
"""
self.path = Path(path).expanduser().resolve()
self.project_name = self.path.name
Expand All @@ -50,7 +50,7 @@ def __init__(self, path: str, conf=None):
# with just "path_to_repo" inside.
if conf is None:
conf = Conf({
"path_to_repo": str(self.path),
"path_to_repo": self.path,
"git": self
})

Expand Down Expand Up @@ -84,7 +84,7 @@ def clear(self):
self.repo.git.clear_cache()

def _open_repository(self):
self._repo = Repo(str(self.path))
self._repo = Repo(self.path)
self._repo.config_writer().set_value("blame", "markUnblamableLines", "true").release()
if self._conf.get("main_branch") is None:
self._discover_main_branch(self._repo)
Expand Down Expand Up @@ -165,7 +165,7 @@ def files(self) -> List[str]:
:return: List[str], the list of the files
"""
_all = []
for path, _, files in os.walk(str(self.path)):
for path, _, files in os.walk(self.path):
if '.git' in path:
continue
for name in files:
Expand Down
42 changes: 25 additions & 17 deletions pydriller/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ class Repository:
This is the main class of PyDriller, responsible for running the study.
"""

def __init__(self, path_to_repo: Union[str, List[str]],
def __init__(self, path_to_repo: Union[str, os.PathLike, List[str], List[os.PathLike]],
single: Optional[str] = None,
since: Optional[datetime] = None, since_as_filter: Optional[datetime] = None, to: Optional[datetime] = None,
from_commit: Optional[str] = None, to_commit: Optional[str] = None,
Expand All @@ -59,7 +59,7 @@ def __init__(self, path_to_repo: Union[str, List[str]],
include_deleted_files: bool = False,
histogram_diff: bool = False,
skip_whitespaces: bool = False,
clone_repo_to: Optional[str] = None,
clone_repo_to: Optional[Union[str, os.PathLike]] = None,
order: Optional[str] = None,
use_mailmap: bool = False):
"""
Expand All @@ -73,8 +73,8 @@ def __init__(self, path_to_repo: Union[str, List[str]],
repo; if you pass an URL, PyDriller will clone the repo in a
temporary folder, run the study, and delete the temporary folder.

:param Union[str,List[str]] path_to_repo: absolute path (or list of
absolute paths) to the repository(ies) to analyze
:param Union[os.PathLike, List[os.PathLike] path_to_repo: PathLike object (or list of
PathLike objects) to the repository(ies) to analyze
:param str single: hash of a single commit to analyze
:param datetime since: starting date
:param datetime since_as_filter: starting date (scans all commits, does not stop at first commit with date < since_as_filter)
Expand All @@ -98,7 +98,7 @@ def __init__(self, path_to_repo: Union[str, List[str]],
:param bool only_releases: analyze only tagged commits
:param bool histogram_diff: add the "--histogram" option when asking for the diff
:param bool skip_whitespaces: add the "-w" option when asking for the diff
:param str clone_repo_to: if the repo under analysis is remote, clone the repo to the specified directory
:param Optional[os.PathLike] clone_repo_to: if the repo under analysis is remote, clone the repo to the specified directory
:param str filepath: only commits that modified this file will be analyzed
:param bool include_deleted_files: include commits modifying a deleted file (useful when analyzing a deleted `filepath`)
:param str order: order of commits. It can be one of: 'date-order',
Expand All @@ -115,9 +115,17 @@ def __init__(self, path_to_repo: Union[str, List[str]],
else set(only_commits)
)

try:
if isinstance(path_to_repo, list):
path_to_repos = [os.fspath(path) for path in path_to_repo]
else:
path_to_repos = [os.fspath(path_to_repo)]
Comment on lines +119 to +122
Copy link
Owner

@ishepard ishepard Oct 18, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why don't we just transform everything in Path here instead of leaving it as strings? We could just transform it to string later when we need it, like in _is_remote. Wdyt?
I feel like it would be much easier to read and play with just Path, rather than a mix. But let me know since you worked on it 😄

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I think this was the first change that I coded, when I thought I will just keep using strings internally. Then I realised to used PathLike also internally. So I can make it that Repository always converts init parameters to Path objects, and all internal functions assume they are handling Path objects as well.

except TypeError:
raise AttributeError("Path to repo must be PathLike or list of PathLike")

options = {
"git": None,
"path_to_repo": path_to_repo,
"path_to_repo": path_to_repos,
"from_commit": from_commit,
"to_commit": to_commit,
"from_tag": from_tag,
Expand Down Expand Up @@ -152,36 +160,36 @@ def __init__(self, path_to_repo: Union[str, List[str]],
self._cleanup = False if clone_repo_to is not None else True

@staticmethod
def _is_remote(repo: str) -> bool:
return repo.startswith(("git@", "https://", "http://", "git://"))
def _is_remote(repo: os.PathLike) -> bool:
return os.fspath(repo).startswith(("git@", "https://", "http://", "git://"))

def _clone_remote_repo(self, tmp_folder: str, repo: str) -> str:
repo_folder = os.path.join(tmp_folder, self._get_repo_name_from_url(repo))
def _clone_remote_repo(self, tmp_folder: os.PathLike, repo: os.PathLike) -> os.PathLike:
repo_folder = os.path.join(tmp_folder, self._get_repo_name_from_url(os.fspath(repo)))
if os.path.isdir(repo_folder):
logger.info(f"Reusing folder {repo_folder} for {repo}")
else:
logger.info(f"Cloning {repo} in temporary folder {repo_folder}")
Repo.clone_from(url=repo, to_path=repo_folder)

return repo_folder
return Path(repo_folder)

def _clone_folder(self) -> str:
if self._conf.get('clone_repo_to'):
clone_folder = str(Path(self._conf.get('clone_repo_to')))
def _clone_folder(self) -> os.PathLike:
clone_folder = self._conf.get('clone_repo_to')
if clone_folder is not None:
if not os.path.isdir(clone_folder):
raise Exception("Not a directory: {0}".format(clone_folder))
raise ValueError("clone_repo_to must be an existing directory")
else:
# Save the temporary directory so we can clean it up later
self._tmp_dir = tempfile.TemporaryDirectory()
clone_folder = self._tmp_dir.name
return clone_folder

@contextmanager
def _prep_repo(self, path_repo: str) -> Generator[Git, None, None]:
def _prep_repo(self, path_repo: os.PathLike) -> Generator[Git, None, None]:
local_path_repo = path_repo
if self._is_remote(path_repo):
local_path_repo = self._clone_remote_repo(self._clone_folder(), path_repo)
local_path_repo = str(Path(local_path_repo).expanduser().resolve())
local_path_repo = Path(local_path_repo).expanduser().resolve()

# when multiple repos are given in input, this variable will serve as a reminder
# of which one we are currently analyzing
Expand Down
18 changes: 3 additions & 15 deletions pydriller/utils/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,10 @@ def __init__(self, options: Dict[str, Any]) -> None:
for key, val in options.items():
self._options[key] = val

self._sanity_check_repos(self.get('path_to_repo'))
if isinstance(self.get('path_to_repo'), str):
self.set_value('path_to_repos', [self.get('path_to_repo')])
else:
if isinstance(self.get('path_to_repo'), list):
self.set_value('path_to_repos', self.get('path_to_repo'))
else:
self.set_value('path_to_repos', [self.get('path_to_repo')])

if self._options.get("use_mailmap"):
self.set_value("developer_factory", MailmapDeveloperFactory(self))
Expand All @@ -58,17 +57,6 @@ def get(self, key: str) -> Any:
"""
return self._options.get(key, None)

@staticmethod
def _sanity_check_repos(path_to_repo: Union[str, List[str]]) -> None:
"""
Checks if repo is of type str or list.

@param path_to_repo: path to the repo as provided by the user.
@return:
"""
if not isinstance(path_to_repo, str) and not isinstance(path_to_repo, list):
raise Exception("The path to the repo has to be of type 'string' or 'list of strings'!")

def _check_only_one_from_commit(self) -> None:
if not self.only_one_filter([self.get('since'),
self.get('since_as_filter'),
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def get_version():
url='https://github.com/ishepard/pydriller',
license='Apache License',
package_dir={'pydriller': 'pydriller'},
python_requires='>=3.5',
python_requires='>=3.6',
install_requires=requirements,
tests_require=requirements + test_requirements,
classifiers=[
Expand Down
Loading