Skip to content

Commit d6ec087

Browse files
rscharfegitster
authored andcommitted
commit: convert pop_most_recent_commit() to prio_queue
pop_most_recent_commit() calls commit_list_insert_by_date() for parent commits, which is itself called in a loop. This can lead to quadratic complexity if there are many merges. Replace the commit_list with a prio_queue to ensure logarithmic worst case complexity and convert all three users. Add a performance test that exercises one of them using a pathological history that consists of 50% merges and 50% root commits to demonstrate the speedup: Test v2.50.1 HEAD ---------------------------------------------------------------------- 1501.2: rev-parse ':/65535' 2.48(2.47+0.00) 0.20(0.19+0.00) -91.9% Alas, sane histories don't benefit from the conversion much, and traversing Git's own history takes a 1% performance hit on my machine: $ hyperfine -w3 -L git ./git_2.50.1,./git '{git} rev-parse :/^Initial.revision' Benchmark 1: ./git_2.50.1 rev-parse :/^Initial.revision Time (mean ± σ): 1.071 s ± 0.004 s [User: 1.052 s, System: 0.017 s] Range (min … max): 1.067 s … 1.078 s 10 runs Benchmark 2: ./git rev-parse :/^Initial.revision Time (mean ± σ): 1.079 s ± 0.003 s [User: 1.060 s, System: 0.017 s] Range (min … max): 1.074 s … 1.083 s 10 runs Summary ./git_2.50.1 rev-parse :/^Initial.revision ran 1.01 ± 0.00 times faster than ./git rev-parse :/^Initial.revision Signed-off-by: René Scharfe <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 16bd9f2 commit d6ec087

File tree

7 files changed

+100
-21
lines changed

7 files changed

+100
-21
lines changed

commit.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include "parse.h"
3232
#include "object-file.h"
3333
#include "object-file-convert.h"
34+
#include "prio-queue.h"
3435

3536
static struct commit_extra_header *read_commit_extra_header_lines(const char *buf, size_t len, const char **);
3637

@@ -738,17 +739,17 @@ void commit_list_sort_by_date(struct commit_list **list)
738739
commit_list_sort(list, commit_list_compare_by_date);
739740
}
740741

741-
struct commit *pop_most_recent_commit(struct commit_list **list,
742+
struct commit *pop_most_recent_commit(struct prio_queue *queue,
742743
unsigned int mark)
743744
{
744-
struct commit *ret = pop_commit(list);
745+
struct commit *ret = prio_queue_get(queue);
745746
struct commit_list *parents = ret->parents;
746747

747748
while (parents) {
748749
struct commit *commit = parents->item;
749750
if (!repo_parse_commit(the_repository, commit) && !(commit->object.flags & mark)) {
750751
commit->object.flags |= mark;
751-
commit_list_insert_by_date(commit, list);
752+
prio_queue_put(queue, commit);
752753
}
753754
parents = parents->next;
754755
}

commit.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -201,10 +201,10 @@ const char *repo_logmsg_reencode(struct repository *r,
201201

202202
const char *skip_blank_lines(const char *msg);
203203

204-
/** Removes the first commit from a list sorted by date, and adds all
205-
* of its parents.
206-
**/
207-
struct commit *pop_most_recent_commit(struct commit_list **list,
204+
struct prio_queue;
205+
206+
/* Removes the first commit from a prio_queue and adds its parents. */
207+
struct commit *pop_most_recent_commit(struct prio_queue *queue,
208208
unsigned int mark);
209209

210210
struct commit *pop_commit(struct commit_list **stack);

fetch-pack.c

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
#include "commit-graph.h"
3535
#include "sigchain.h"
3636
#include "mergesort.h"
37+
#include "prio-queue.h"
3738

3839
static int transfer_unpack_limit = -1;
3940
static int fetch_unpack_limit = -1;
@@ -600,15 +601,15 @@ static int find_common(struct fetch_negotiator *negotiator,
600601
return count ? retval : 0;
601602
}
602603

603-
static struct commit_list *complete;
604+
static struct prio_queue complete = { compare_commits_by_commit_date };
604605

605606
static int mark_complete(const struct object_id *oid)
606607
{
607608
struct commit *commit = deref_without_lazy_fetch(oid, 1);
608609

609610
if (commit && !(commit->object.flags & COMPLETE)) {
610611
commit->object.flags |= COMPLETE;
611-
commit_list_insert(commit, &complete);
612+
prio_queue_put(&complete, commit);
612613
}
613614
return 0;
614615
}
@@ -625,9 +626,12 @@ static int mark_complete_oid(const char *refname UNUSED,
625626
static void mark_recent_complete_commits(struct fetch_pack_args *args,
626627
timestamp_t cutoff)
627628
{
628-
while (complete && cutoff <= complete->item->date) {
629+
while (complete.nr) {
630+
struct commit *item = prio_queue_peek(&complete);
631+
if (item->date < cutoff)
632+
break;
629633
print_verbose(args, _("Marking %s as complete"),
630-
oid_to_hex(&complete->item->object.oid));
634+
oid_to_hex(&item->object.oid));
631635
pop_most_recent_commit(&complete, COMPLETE);
632636
}
633637
}
@@ -797,7 +801,6 @@ static void mark_complete_and_common_ref(struct fetch_negotiator *negotiator,
797801
refs_for_each_rawref(get_main_ref_store(the_repository),
798802
mark_complete_oid, NULL);
799803
for_each_cached_alternate(NULL, mark_alternate_complete);
800-
commit_list_sort_by_date(&complete);
801804
if (cutoff)
802805
mark_recent_complete_commits(args, cutoff);
803806
}

object-name.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include "commit-reach.h"
2929
#include "date.h"
3030
#include "object-file-convert.h"
31+
#include "prio-queue.h"
3132

3233
static int get_oid_oneline(struct repository *r, const char *, struct object_id *,
3334
const struct commit_list *);
@@ -1457,7 +1458,7 @@ static int get_oid_oneline(struct repository *r,
14571458
const char *prefix, struct object_id *oid,
14581459
const struct commit_list *list)
14591460
{
1460-
struct commit_list *copy = NULL, **copy_tail = &copy;
1461+
struct prio_queue copy = { compare_commits_by_commit_date };
14611462
const struct commit_list *l;
14621463
int found = 0;
14631464
int negative = 0;
@@ -1479,9 +1480,9 @@ static int get_oid_oneline(struct repository *r,
14791480

14801481
for (l = list; l; l = l->next) {
14811482
l->item->object.flags |= ONELINE_SEEN;
1482-
copy_tail = &commit_list_insert(l->item, copy_tail)->next;
1483+
prio_queue_put(&copy, l->item);
14831484
}
1484-
while (copy) {
1485+
while (copy.nr) {
14851486
const char *p, *buf;
14861487
struct commit *commit;
14871488
int matches;
@@ -1503,7 +1504,7 @@ static int get_oid_oneline(struct repository *r,
15031504
regfree(&regex);
15041505
for (l = list; l; l = l->next)
15051506
clear_commit_marks(l->item, ONELINE_SEEN);
1506-
free_commit_list(copy);
1507+
clear_prio_queue(&copy);
15071508
return found ? 0 : -1;
15081509
}
15091510

@@ -2057,7 +2058,6 @@ static enum get_oid_result get_oid_with_context_1(struct repository *repo,
20572058
cb.list = &list;
20582059
refs_for_each_ref(get_main_ref_store(repo), handle_one_ref, &cb);
20592060
refs_head_ref(get_main_ref_store(repo), handle_one_ref, &cb);
2060-
commit_list_sort_by_date(&list);
20612061
ret = get_oid_oneline(repo, name + 2, oid, list);
20622062

20632063
free_commit_list(list);

t/meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1117,6 +1117,7 @@ benchmarks = [
11171117
'perf/p1450-fsck.sh',
11181118
'perf/p1451-fsck-skip-list.sh',
11191119
'perf/p1500-graph-walks.sh',
1120+
'perf/p1501-rev-parse-oneline.sh',
11201121
'perf/p2000-sparse-operations.sh',
11211122
'perf/p3400-rebase.sh',
11221123
'perf/p3404-rebase-interactive.sh',

t/perf/p1501-rev-parse-oneline.sh

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
#!/bin/sh
2+
3+
test_description='Test :/ object name notation'
4+
5+
. ./perf-lib.sh
6+
7+
test_perf_fresh_repo
8+
9+
#
10+
# Creates lots of merges to make history traversal costly. In
11+
# particular it creates 2^($max_level-1)-1 2-way merges on top of
12+
# 2^($max_level-1) root commits. E.g., the commit history looks like
13+
# this for a $max_level of 3:
14+
#
15+
# _1_
16+
# / \
17+
# 2 3
18+
# / \ / \
19+
# 4 5 6 7
20+
#
21+
# The numbers are the fast-import marks, which also are the commit
22+
# messages. 1 is the HEAD commit and a merge, 2 and 3 are also merges,
23+
# 4-7 are the root commits.
24+
#
25+
build_history () {
26+
local max_level="$1" &&
27+
local level="${2:-1}" &&
28+
local mark="${3:-1}" &&
29+
if test $level -eq $max_level
30+
then
31+
echo "reset refs/heads/master" &&
32+
echo "from $ZERO_OID" &&
33+
echo "commit refs/heads/master" &&
34+
echo "mark :$mark" &&
35+
echo "committer C <[email protected]> 1234567890 +0000" &&
36+
echo "data <<EOF" &&
37+
echo "$mark" &&
38+
echo "EOF"
39+
else
40+
local level1=$((level+1)) &&
41+
local mark1=$((2*mark)) &&
42+
local mark2=$((2*mark+1)) &&
43+
build_history $max_level $level1 $mark1 &&
44+
build_history $max_level $level1 $mark2 &&
45+
echo "commit refs/heads/master" &&
46+
echo "mark :$mark" &&
47+
echo "committer C <[email protected]> 1234567890 +0000" &&
48+
echo "data <<EOF" &&
49+
echo "$mark" &&
50+
echo "EOF" &&
51+
echo "from :$mark1" &&
52+
echo "merge :$mark2"
53+
fi
54+
}
55+
56+
test_expect_success 'setup' '
57+
build_history 16 | git fast-import &&
58+
git log --format="%H %s" --reverse >commits &&
59+
sed -n -e "s/ .*$//p" -e "q" <commits >expect &&
60+
sed -n -e "s/^.* //p" -e "q" <commits >needle
61+
'
62+
63+
test_perf "rev-parse :/$(cat needle)" '
64+
git rev-parse :/$(cat needle) >actual
65+
'
66+
67+
test_expect_success 'verify result' '
68+
test_cmp expect actual
69+
'
70+
71+
test_done

walker.c

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "blob.h"
1515
#include "refs.h"
1616
#include "progress.h"
17+
#include "prio-queue.h"
1718

1819
static struct object_id current_commit_oid;
1920

@@ -78,7 +79,7 @@ static int process_tree(struct walker *walker, struct tree *tree)
7879
#define SEEN (1U << 1)
7980
#define TO_SCAN (1U << 2)
8081

81-
static struct commit_list *complete = NULL;
82+
static struct prio_queue complete = { compare_commits_by_commit_date };
8283

8384
static int process_commit(struct walker *walker, struct commit *commit)
8485
{
@@ -87,7 +88,10 @@ static int process_commit(struct walker *walker, struct commit *commit)
8788
if (repo_parse_commit(the_repository, commit))
8889
return -1;
8990

90-
while (complete && complete->item->date >= commit->date) {
91+
while (complete.nr) {
92+
struct commit *item = prio_queue_peek(&complete);
93+
if (item->date < commit->date)
94+
break;
9195
pop_most_recent_commit(&complete, COMPLETE);
9296
}
9397

@@ -233,7 +237,7 @@ static int mark_complete(const char *path UNUSED,
233237

234238
if (commit) {
235239
commit->object.flags |= COMPLETE;
236-
commit_list_insert(commit, &complete);
240+
prio_queue_put(&complete, commit);
237241
}
238242
return 0;
239243
}
@@ -302,7 +306,6 @@ int walker_fetch(struct walker *walker, int targets, char **target,
302306
if (!walker->get_recover) {
303307
refs_for_each_ref(get_main_ref_store(the_repository),
304308
mark_complete, NULL);
305-
commit_list_sort_by_date(&complete);
306309
}
307310

308311
for (i = 0; i < targets; i++) {

0 commit comments

Comments
 (0)