Skip to content

Commit 5a6dce7

Browse files
bk2204gitster
authored andcommitted
hash: set, copy, and use algo field in struct object_id
Now that struct object_id has an algorithm field, we should populate it. This will allow us to handle object IDs in any supported algorithm and distinguish between them. Ensure that the field is written whenever we write an object ID by storing it explicitly every time we write an object. Set values for the empty blob and tree values as well. In addition, use the algorithm field to compare object IDs. Note that because we zero-initialize struct object_id in many places throughout the codebase, we default to the default algorithm in cases where the algorithm field is zero rather than explicitly initialize all of those locations. This leads to a branch on every comparison, but the alternative is to compare the entire buffer each time and padding the buffer for SHA-1. That alternative ranges up to 3.9% worse than this approach on the perf t0001, t1450, and t1451. Signed-off-by: brian m. carlson <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 0e5e228 commit 5a6dce7

File tree

4 files changed

+54
-13
lines changed

4 files changed

+54
-13
lines changed

hash.h

Lines changed: 34 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -192,36 +192,56 @@ static inline int hash_algo_by_ptr(const struct git_hash_algo *p)
192192

193193
extern const struct object_id null_oid;
194194

195-
static inline int hashcmp(const unsigned char *sha1, const unsigned char *sha2)
195+
static inline int hashcmp_algop(const unsigned char *sha1, const unsigned char *sha2, const struct git_hash_algo *algop)
196196
{
197197
/*
198198
* Teach the compiler that there are only two possibilities of hash size
199199
* here, so that it can optimize for this case as much as possible.
200200
*/
201-
if (the_hash_algo->rawsz == GIT_MAX_RAWSZ)
201+
if (algop->rawsz == GIT_MAX_RAWSZ)
202202
return memcmp(sha1, sha2, GIT_MAX_RAWSZ);
203203
return memcmp(sha1, sha2, GIT_SHA1_RAWSZ);
204204
}
205205

206+
static inline int hashcmp(const unsigned char *sha1, const unsigned char *sha2)
207+
{
208+
return hashcmp_algop(sha1, sha2, the_hash_algo);
209+
}
210+
206211
static inline int oidcmp(const struct object_id *oid1, const struct object_id *oid2)
207212
{
208-
return hashcmp(oid1->hash, oid2->hash);
213+
const struct git_hash_algo *algop;
214+
if (!oid1->algo)
215+
algop = the_hash_algo;
216+
else
217+
algop = &hash_algos[oid1->algo];
218+
return hashcmp_algop(oid1->hash, oid2->hash, algop);
209219
}
210220

211-
static inline int hasheq(const unsigned char *sha1, const unsigned char *sha2)
221+
static inline int hasheq_algop(const unsigned char *sha1, const unsigned char *sha2, const struct git_hash_algo *algop)
212222
{
213223
/*
214224
* We write this here instead of deferring to hashcmp so that the
215225
* compiler can properly inline it and avoid calling memcmp.
216226
*/
217-
if (the_hash_algo->rawsz == GIT_MAX_RAWSZ)
227+
if (algop->rawsz == GIT_MAX_RAWSZ)
218228
return !memcmp(sha1, sha2, GIT_MAX_RAWSZ);
219229
return !memcmp(sha1, sha2, GIT_SHA1_RAWSZ);
220230
}
221231

232+
static inline int hasheq(const unsigned char *sha1, const unsigned char *sha2)
233+
{
234+
return hasheq_algop(sha1, sha2, the_hash_algo);
235+
}
236+
222237
static inline int oideq(const struct object_id *oid1, const struct object_id *oid2)
223238
{
224-
return hasheq(oid1->hash, oid2->hash);
239+
const struct git_hash_algo *algop;
240+
if (!oid1->algo)
241+
algop = the_hash_algo;
242+
else
243+
algop = &hash_algos[oid1->algo];
244+
return hasheq_algop(oid1->hash, oid2->hash, algop);
225245
}
226246

227247
static inline int is_null_oid(const struct object_id *oid)
@@ -237,6 +257,7 @@ static inline void hashcpy(unsigned char *sha_dst, const unsigned char *sha_src)
237257
static inline void oidcpy(struct object_id *dst, const struct object_id *src)
238258
{
239259
memcpy(dst->hash, src->hash, GIT_MAX_RAWSZ);
260+
dst->algo = src->algo;
240261
}
241262

242263
static inline struct object_id *oiddup(const struct object_id *src)
@@ -254,11 +275,13 @@ static inline void hashclr(unsigned char *hash)
254275
static inline void oidclr(struct object_id *oid)
255276
{
256277
memset(oid->hash, 0, GIT_MAX_RAWSZ);
278+
oid->algo = hash_algo_by_ptr(the_hash_algo);
257279
}
258280

259281
static inline void oidread(struct object_id *oid, const unsigned char *hash)
260282
{
261283
memcpy(oid->hash, hash, the_hash_algo->rawsz);
284+
oid->algo = hash_algo_by_ptr(the_hash_algo);
262285
}
263286

264287
static inline int is_empty_blob_sha1(const unsigned char *sha1)
@@ -281,6 +304,11 @@ static inline int is_empty_tree_oid(const struct object_id *oid)
281304
return oideq(oid, the_hash_algo->empty_tree);
282305
}
283306

307+
static inline void oid_set_algo(struct object_id *oid, const struct git_hash_algo *algop)
308+
{
309+
oid->algo = hash_algo_by_ptr(algop);
310+
}
311+
284312
const char *empty_tree_oid_hex(void);
285313
const char *empty_blob_oid_hex(void);
286314

hex.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,10 @@ int get_sha1_hex(const char *hex, unsigned char *sha1)
6969
int get_oid_hex_algop(const char *hex, struct object_id *oid,
7070
const struct git_hash_algo *algop)
7171
{
72-
return get_hash_hex_algop(hex, oid->hash, algop);
72+
int ret = get_hash_hex_algop(hex, oid->hash, algop);
73+
if (!ret)
74+
oid_set_algo(oid, algop);
75+
return ret;
7376
}
7477

7578
/*
@@ -80,7 +83,7 @@ int get_oid_hex_any(const char *hex, struct object_id *oid)
8083
{
8184
int i;
8285
for (i = GIT_HASH_NALGOS - 1; i > 0; i--) {
83-
if (!get_hash_hex_algop(hex, oid->hash, &hash_algos[i]))
86+
if (!get_oid_hex_algop(hex, oid, &hash_algos[i]))
8487
return i;
8588
}
8689
return GIT_HASH_UNKNOWN;
@@ -95,7 +98,7 @@ int parse_oid_hex_algop(const char *hex, struct object_id *oid,
9598
const char **end,
9699
const struct git_hash_algo *algop)
97100
{
98-
int ret = get_hash_hex_algop(hex, oid->hash, algop);
101+
int ret = get_oid_hex_algop(hex, oid, algop);
99102
if (!ret)
100103
*end = hex + algop->hexsz;
101104
return ret;

notes.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -455,6 +455,8 @@ static void load_subtree(struct notes_tree *t, struct leaf_node *subtree,
455455
CALLOC_ARRAY(l, 1);
456456
oidcpy(&l->key_oid, &object_oid);
457457
oidcpy(&l->val_oid, &entry.oid);
458+
oid_set_algo(&l->key_oid, the_hash_algo);
459+
oid_set_algo(&l->val_oid, the_hash_algo);
458460
if (note_tree_insert(t, node, n, l, type,
459461
combine_notes_concatenate))
460462
die("Failed to load %s %s into notes tree "
@@ -484,6 +486,7 @@ static void load_subtree(struct notes_tree *t, struct leaf_node *subtree,
484486
strbuf_addch(&non_note_path, '/');
485487
}
486488
strbuf_addstr(&non_note_path, entry.path);
489+
oid_set_algo(&entry.oid, the_hash_algo);
487490
add_non_note(t, strbuf_detach(&non_note_path, NULL),
488491
entry.mode, entry.oid.hash);
489492
}

object-file.c

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -57,16 +57,20 @@
5757

5858
const struct object_id null_oid;
5959
static const struct object_id empty_tree_oid = {
60-
EMPTY_TREE_SHA1_BIN_LITERAL
60+
.hash = EMPTY_TREE_SHA1_BIN_LITERAL,
61+
.algo = GIT_HASH_SHA1,
6162
};
6263
static const struct object_id empty_blob_oid = {
63-
EMPTY_BLOB_SHA1_BIN_LITERAL
64+
.hash = EMPTY_BLOB_SHA1_BIN_LITERAL,
65+
.algo = GIT_HASH_SHA1,
6466
};
6567
static const struct object_id empty_tree_oid_sha256 = {
66-
EMPTY_TREE_SHA256_BIN_LITERAL
68+
.hash = EMPTY_TREE_SHA256_BIN_LITERAL,
69+
.algo = GIT_HASH_SHA256,
6770
};
6871
static const struct object_id empty_blob_oid_sha256 = {
69-
EMPTY_BLOB_SHA256_BIN_LITERAL
72+
.hash = EMPTY_BLOB_SHA256_BIN_LITERAL,
73+
.algo = GIT_HASH_SHA256,
7074
};
7175

7276
static void git_hash_sha1_init(git_hash_ctx *ctx)
@@ -93,6 +97,7 @@ static void git_hash_sha1_final_oid(struct object_id *oid, git_hash_ctx *ctx)
9397
{
9498
git_SHA1_Final(oid->hash, &ctx->sha1);
9599
memset(oid->hash + GIT_SHA1_RAWSZ, 0, GIT_MAX_RAWSZ - GIT_SHA1_RAWSZ);
100+
oid->algo = GIT_HASH_SHA1;
96101
}
97102

98103

@@ -124,6 +129,7 @@ static void git_hash_sha256_final_oid(struct object_id *oid, git_hash_ctx *ctx)
124129
* but keep it in case we extend the hash size again.
125130
*/
126131
memset(oid->hash + GIT_SHA256_RAWSZ, 0, GIT_MAX_RAWSZ - GIT_SHA256_RAWSZ);
132+
oid->algo = GIT_HASH_SHA256;
127133
}
128134

129135
static void git_hash_unknown_init(git_hash_ctx *ctx)
@@ -2340,6 +2346,7 @@ int for_each_file_in_obj_subdir(unsigned int subdir_nr,
23402346
if (namelen == the_hash_algo->hexsz - 2 &&
23412347
!hex_to_bytes(oid.hash + 1, de->d_name,
23422348
the_hash_algo->rawsz - 1)) {
2349+
oid_set_algo(&oid, the_hash_algo);
23432350
if (obj_cb) {
23442351
r = obj_cb(&oid, path->buf, data);
23452352
if (r)

0 commit comments

Comments
 (0)