Skip to content

Commit cb52b49

Browse files
committed
Merge branch 'ds/find-unique-abbrev-optim'
Optimize the code to find shortest unique prefix of object names. * ds/find-unique-abbrev-optim: sha1_name: minimize OID comparisons during disambiguation sha1_name: parse less while finding common prefix sha1_name: unroll len loop in find_unique_abbrev_r() p4211-line-log.sh: add log --online --raw --parents perf test
2 parents fb4cd88 + 0e87b85 commit cb52b49

File tree

2 files changed

+123
-16
lines changed

2 files changed

+123
-16
lines changed

sha1_name.c

Lines changed: 119 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,9 @@ static void unique_in_pack(struct packed_git *p,
153153
uint32_t num, last, i, first = 0;
154154
const struct object_id *current = NULL;
155155

156-
open_pack_index(p);
156+
if (open_pack_index(p) || !p->num_objects)
157+
return;
158+
157159
num = p->num_objects;
158160
last = num;
159161
while (first < last) {
@@ -474,10 +476,104 @@ static unsigned msb(unsigned long val)
474476
return r;
475477
}
476478

477-
int find_unique_abbrev_r(char *hex, const unsigned char *sha1, int len)
479+
struct min_abbrev_data {
480+
unsigned int init_len;
481+
unsigned int cur_len;
482+
char *hex;
483+
const unsigned char *hash;
484+
};
485+
486+
static inline char get_hex_char_from_oid(const struct object_id *oid,
487+
unsigned int pos)
478488
{
479-
int status, exists;
489+
static const char hex[] = "0123456789abcdef";
480490

491+
if ((pos & 1) == 0)
492+
return hex[oid->hash[pos >> 1] >> 4];
493+
else
494+
return hex[oid->hash[pos >> 1] & 0xf];
495+
}
496+
497+
static int extend_abbrev_len(const struct object_id *oid, void *cb_data)
498+
{
499+
struct min_abbrev_data *mad = cb_data;
500+
501+
unsigned int i = mad->init_len;
502+
while (mad->hex[i] && mad->hex[i] == get_hex_char_from_oid(oid, i))
503+
i++;
504+
505+
if (i < GIT_MAX_RAWSZ && i >= mad->cur_len)
506+
mad->cur_len = i + 1;
507+
508+
return 0;
509+
}
510+
511+
static void find_abbrev_len_for_pack(struct packed_git *p,
512+
struct min_abbrev_data *mad)
513+
{
514+
int match = 0;
515+
uint32_t num, last, first = 0;
516+
struct object_id oid;
517+
518+
if (open_pack_index(p) || !p->num_objects)
519+
return;
520+
521+
num = p->num_objects;
522+
last = num;
523+
while (first < last) {
524+
uint32_t mid = first + (last - first) / 2;
525+
const unsigned char *current;
526+
int cmp;
527+
528+
current = nth_packed_object_sha1(p, mid);
529+
cmp = hashcmp(mad->hash, current);
530+
if (!cmp) {
531+
match = 1;
532+
first = mid;
533+
break;
534+
}
535+
if (cmp > 0) {
536+
first = mid + 1;
537+
continue;
538+
}
539+
last = mid;
540+
}
541+
542+
/*
543+
* first is now the position in the packfile where we would insert
544+
* mad->hash if it does not exist (or the position of mad->hash if
545+
* it does exist). Hence, we consider a maximum of three objects
546+
* nearby for the abbreviation length.
547+
*/
548+
mad->init_len = 0;
549+
if (!match) {
550+
nth_packed_object_oid(&oid, p, first);
551+
extend_abbrev_len(&oid, mad);
552+
} else if (first < num - 1) {
553+
nth_packed_object_oid(&oid, p, first + 1);
554+
extend_abbrev_len(&oid, mad);
555+
}
556+
if (first > 0) {
557+
nth_packed_object_oid(&oid, p, first - 1);
558+
extend_abbrev_len(&oid, mad);
559+
}
560+
mad->init_len = mad->cur_len;
561+
}
562+
563+
static void find_abbrev_len_packed(struct min_abbrev_data *mad)
564+
{
565+
struct packed_git *p;
566+
567+
prepare_packed_git();
568+
for (p = packed_git; p; p = p->next)
569+
find_abbrev_len_for_pack(p, mad);
570+
}
571+
572+
int find_unique_abbrev_r(char *hex, const unsigned char *sha1, int len)
573+
{
574+
struct disambiguate_state ds;
575+
struct min_abbrev_data mad;
576+
struct object_id oid_ret;
481577
if (len < 0) {
482578
unsigned long count = approximate_object_count();
483579
/*
@@ -503,19 +599,26 @@ int find_unique_abbrev_r(char *hex, const unsigned char *sha1, int len)
503599
sha1_to_hex_r(hex, sha1);
504600
if (len == GIT_SHA1_HEXSZ || !len)
505601
return GIT_SHA1_HEXSZ;
506-
exists = has_sha1_file(sha1);
507-
while (len < GIT_SHA1_HEXSZ) {
508-
struct object_id oid_ret;
509-
status = get_short_oid(hex, len, &oid_ret, GET_OID_QUIETLY);
510-
if (exists
511-
? !status
512-
: status == SHORT_NAME_NOT_FOUND) {
513-
hex[len] = 0;
514-
return len;
515-
}
516-
len++;
517-
}
518-
return len;
602+
603+
mad.init_len = len;
604+
mad.cur_len = len;
605+
mad.hex = hex;
606+
mad.hash = sha1;
607+
608+
find_abbrev_len_packed(&mad);
609+
610+
if (init_object_disambiguation(hex, mad.cur_len, &ds) < 0)
611+
return -1;
612+
613+
ds.fn = extend_abbrev_len;
614+
ds.always_call_fn = 1;
615+
ds.cb_data = (void *)&mad;
616+
617+
find_short_object_filename(&ds);
618+
(void)finish_object_disambiguation(&ds, &oid_ret);
619+
620+
hex[mad.cur_len] = 0;
621+
return mad.cur_len;
519622
}
520623

521624
const char *find_unique_abbrev(const unsigned char *sha1, int len)

t/perf/p4211-line-log.sh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,4 +31,8 @@ test_perf 'git log -L (renames on)' '
3131
git log -M -L 1:"$file" >/dev/null
3232
'
3333

34+
test_perf 'git log --oneline --raw --parents' '
35+
git log --oneline --raw --parents >/dev/null
36+
'
37+
3438
test_done

0 commit comments

Comments
 (0)