Skip to content

Commit 3af4eba

Browse files
committed
Add Repository.hashfile()
This exposes libgit2's `git_repository_hashfile()`. `pygit2.hashfile()` already exists, but this lets you hash files using the repository's filters.
1 parent b810d9a commit 3af4eba

File tree

4 files changed

+127
-0
lines changed

4 files changed

+127
-0
lines changed

pygit2/decl/repository.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ int git_repository_set_head_detached(
8383
git_repository* repo,
8484
const git_oid* commitish);
8585

86+
int git_repository_hashfile(git_oid *out, git_repository *repo, const char *path, git_object_t type, const char *as_path);
8687
int git_repository_ident(const char **name, const char **email, const git_repository *repo);
8788
int git_repository_set_ident(git_repository *repo, const char *name, const char *email);
8889
int git_repository_index(git_index **out, git_repository *repo);

pygit2/decl/types.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,3 +57,14 @@ typedef enum {
5757
GIT_SUBMODULE_IGNORE_DIRTY = 3,
5858
GIT_SUBMODULE_IGNORE_ALL = 4,
5959
} git_submodule_ignore_t;
60+
61+
typedef enum {
62+
GIT_OBJECT_ANY = ...,
63+
GIT_OBJECT_INVALID = ...,
64+
GIT_OBJECT_COMMIT = ...,
65+
GIT_OBJECT_TREE = ...,
66+
GIT_OBJECT_BLOB = ...,
67+
GIT_OBJECT_TAG = ...,
68+
GIT_OBJECT_OFS_DELTA = ...,
69+
GIT_OBJECT_REF_DELTA = ...,
70+
} git_object_t;

pygit2/repository.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
MergeFavor,
5252
MergeFileFlag,
5353
MergeFlag,
54+
ObjectType,
5455
RepositoryOpenFlag,
5556
RepositoryState,
5657
)
@@ -129,6 +130,58 @@ def pack_all_objects(pack_builder):
129130

130131
return builder.written_objects_count
131132

133+
def hashfile(
134+
self,
135+
path: str,
136+
object_type: ObjectType = ObjectType.BLOB,
137+
as_path: typing.Optional[str] = None,
138+
):
139+
"""Calculate the hash of a file using repository filtering rules.
140+
141+
If you simply want to calculate the hash of a file on disk with no filters,
142+
you can just use `pygit2.hashfile()`. However, if you want to hash a file
143+
in the repository and you want to apply filtering rules (e.g. crlf filters)
144+
before generating the SHA, then use this function.
145+
146+
Note: if the repository has `core.safecrlf` set to fail and the filtering
147+
triggers that failure, then this function will raise an error and not
148+
calculate the hash of the file.
149+
150+
Returns: Output value of calculated SHA (Oid)
151+
152+
Parameters:
153+
154+
path
155+
Path to file on disk whose contents should be hashed. This may be
156+
an absolute path or a relative path, in which case it will be treated
157+
as a path within the working directory.
158+
159+
object_type
160+
The object type to hash (e.g. enums.ObjectType.BLOB)
161+
162+
as_path
163+
The path to use to look up filtering rules. If this is an empty string
164+
then no filters will be applied when calculating the hash.
165+
If this is `None` and the `path` parameter is a file within the
166+
repository's working directory, then the `path` will be used.
167+
"""
168+
c_path = to_bytes(path)
169+
170+
if as_path is None:
171+
c_as_path = ffi.NULL
172+
else:
173+
c_as_path = to_bytes(as_path)
174+
175+
c_oid = ffi.new('git_oid *')
176+
177+
err = C.git_repository_hashfile(
178+
c_oid, self._repo, c_path, int(object_type), c_as_path
179+
)
180+
check_error(err)
181+
182+
oid = Oid(raw=bytes(ffi.buffer(c_oid.id)[:]))
183+
return oid
184+
132185
def __iter__(self):
133186
return iter(self.odb)
134187

test/test_repository.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -898,3 +898,65 @@ def test_is_shallow(testrepo):
898898
f.write('abcdef0123456789abcdef0123456789abcdef00\n')
899899

900900
assert testrepo.is_shallow
901+
902+
903+
def test_repository_hashfile(testrepo):
904+
original_hash = testrepo.index['hello.txt'].id
905+
906+
# Test simple use
907+
h = testrepo.hashfile('hello.txt')
908+
assert h == original_hash
909+
910+
# Test absolute path
911+
h = testrepo.hashfile(str(Path(testrepo.workdir, 'hello.txt')))
912+
assert h == original_hash
913+
914+
# Test missing path
915+
with pytest.raises(KeyError):
916+
testrepo.hashfile('missing-file')
917+
918+
# Test invalid object type
919+
with pytest.raises(pygit2.GitError):
920+
testrepo.hashfile('hello.txt', ObjectType.OFS_DELTA)
921+
922+
923+
def test_repository_hashfile_filter(testrepo):
924+
original_hash = testrepo.index['hello.txt'].id
925+
926+
with open(Path(testrepo.workdir, 'hello.txt'), 'rb') as f:
927+
original_text = f.read()
928+
929+
crlf_data = original_text.replace(b'\n', b'\r\n')
930+
crlf_hash = utils.gen_blob_sha1(crlf_data)
931+
assert crlf_hash != original_hash
932+
933+
# Write hellocrlf.txt as a copy of hello.txt with CRLF line endings
934+
with open(Path(testrepo.workdir, 'hellocrlf.txt'), 'wb') as f:
935+
f.write(crlf_data)
936+
937+
# Set up a CRLF filter
938+
testrepo.config['core.autocrlf'] = True
939+
with open(Path(testrepo.workdir, '.gitattributes'), 'wt') as f:
940+
f.write('*.txt text\n*.bin binary\n\n')
941+
942+
# By default, hellocrlf.txt should have the same hash as the original,
943+
# due to core.autocrlf=True
944+
h = testrepo.hashfile('hellocrlf.txt')
945+
assert h == original_hash
946+
947+
# Treat absolute path with filters
948+
h = testrepo.hashfile(str(Path(testrepo.workdir, 'hellocrlf.txt')))
949+
assert h == original_hash
950+
951+
# Bypass filters
952+
h = testrepo.hashfile('hellocrlf.txt', as_path='')
953+
assert h == crlf_hash
954+
955+
# Bypass filters via .gitattributes
956+
h = testrepo.hashfile('hellocrlf.txt', as_path='foobar.bin')
957+
assert h == crlf_hash
958+
959+
# If core.safecrlf=fail, hashing a non-CRLF file will fail
960+
testrepo.config['core.safecrlf'] = 'fail'
961+
with pytest.raises(pygit2.GitError):
962+
h = testrepo.hashfile('hello.txt')

0 commit comments

Comments
 (0)