Skip to content

Commit 2a5fe25

Browse files
committed
Merge branch 'jc/rename'
* 'jc/rename' (early part): Optimize rename detection for a huge diff
2 parents 018465d + 6d24ad9 commit 2a5fe25

File tree

1 file changed

+58
-22
lines changed

1 file changed

+58
-22
lines changed

diffcore-rename.c

Lines changed: 58 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -112,8 +112,8 @@ static int basename_same(struct diff_filespec *src, struct diff_filespec *dst)
112112
struct diff_score {
113113
int src; /* index in rename_src */
114114
int dst; /* index in rename_dst */
115-
int score;
116-
int name_score;
115+
unsigned short score;
116+
short name_score;
117117
};
118118

119119
static int estimate_similarity(struct diff_filespec *src,
@@ -223,6 +223,12 @@ static int score_compare(const void *a_, const void *b_)
223223
{
224224
const struct diff_score *a = a_, *b = b_;
225225

226+
/* sink the unused ones to the bottom */
227+
if (a->dst < 0)
228+
return (0 <= b->dst);
229+
else if (b->dst < 0)
230+
return -1;
231+
226232
if (a->score == b->score)
227233
return b->name_score - a->name_score;
228234

@@ -387,6 +393,22 @@ static int find_exact_renames(void)
387393
return i;
388394
}
389395

396+
#define NUM_CANDIDATE_PER_DST 4
397+
static void record_if_better(struct diff_score m[], struct diff_score *o)
398+
{
399+
int i, worst;
400+
401+
/* find the worst one */
402+
worst = 0;
403+
for (i = 1; i < NUM_CANDIDATE_PER_DST; i++)
404+
if (score_compare(&m[i], &m[worst]) > 0)
405+
worst = i;
406+
407+
/* is it better than the worst one? */
408+
if (score_compare(&m[worst], o) > 0)
409+
m[worst] = *o;
410+
}
411+
390412
void diffcore_rename(struct diff_options *options)
391413
{
392414
int detect_rename = options->detect_rename;
@@ -474,47 +496,61 @@ void diffcore_rename(struct diff_options *options)
474496
goto cleanup;
475497
}
476498

477-
mx = xmalloc(sizeof(*mx) * num_create * num_src);
499+
mx = xcalloc(num_create * NUM_CANDIDATE_PER_DST, sizeof(*mx));
478500
for (dst_cnt = i = 0; i < rename_dst_nr; i++) {
479-
int base = dst_cnt * num_src;
480501
struct diff_filespec *two = rename_dst[i].two;
502+
struct diff_score *m;
503+
481504
if (rename_dst[i].pair)
482505
continue; /* dealt with exact match already. */
506+
507+
m = &mx[dst_cnt * NUM_CANDIDATE_PER_DST];
508+
for (j = 0; j < NUM_CANDIDATE_PER_DST; j++)
509+
m[j].dst = -1;
510+
483511
for (j = 0; j < rename_src_nr; j++) {
484512
struct diff_filespec *one = rename_src[j].one;
485-
struct diff_score *m = &mx[base+j];
486-
m->src = j;
487-
m->dst = i;
488-
m->score = estimate_similarity(one, two,
489-
minimum_score);
490-
m->name_score = basename_same(one, two);
513+
struct diff_score this_src;
514+
this_src.score = estimate_similarity(one, two,
515+
minimum_score);
516+
this_src.name_score = basename_same(one, two);
517+
this_src.dst = i;
518+
this_src.src = j;
519+
record_if_better(m, &this_src);
491520
diff_free_filespec_blob(one);
492521
}
493522
/* We do not need the text anymore */
494523
diff_free_filespec_blob(two);
495524
dst_cnt++;
496525
}
526+
497527
/* cost matrix sorted by most to least similar pair */
498-
qsort(mx, num_create * num_src, sizeof(*mx), score_compare);
499-
for (i = 0; i < num_create * num_src; i++) {
500-
struct diff_rename_dst *dst = &rename_dst[mx[i].dst];
501-
struct diff_filespec *src;
528+
qsort(mx, dst_cnt * NUM_CANDIDATE_PER_DST, sizeof(*mx), score_compare);
529+
530+
for (i = 0; i < dst_cnt * NUM_CANDIDATE_PER_DST; i++) {
531+
struct diff_rename_dst *dst;
532+
533+
if ((mx[i].dst < 0) ||
534+
(mx[i].score < minimum_score))
535+
break; /* there is no more usable pair. */
536+
dst = &rename_dst[mx[i].dst];
502537
if (dst->pair)
503538
continue; /* already done, either exact or fuzzy. */
504-
if (mx[i].score < minimum_score)
505-
break; /* there is no more usable pair. */
506-
src = rename_src[mx[i].src].one;
507-
if (src->rename_used)
539+
if (rename_src[mx[i].src].one->rename_used)
508540
continue;
509541
record_rename_pair(mx[i].dst, mx[i].src, mx[i].score);
510542
rename_count++;
511543
}
512-
for (i = 0; i < num_create * num_src; i++) {
513-
struct diff_rename_dst *dst = &rename_dst[mx[i].dst];
544+
545+
for (i = 0; i < dst_cnt * NUM_CANDIDATE_PER_DST; i++) {
546+
struct diff_rename_dst *dst;
547+
548+
if ((mx[i].dst < 0) ||
549+
(mx[i].score < minimum_score))
550+
break; /* there is no more usable pair. */
551+
dst = &rename_dst[mx[i].dst];
514552
if (dst->pair)
515553
continue; /* already done, either exact or fuzzy. */
516-
if (mx[i].score < minimum_score)
517-
break; /* there is no more usable pair. */
518554
record_rename_pair(mx[i].dst, mx[i].src, mx[i].score);
519555
rename_count++;
520556
}

0 commit comments

Comments
 (0)