Skip to content

Commit 7c85f8a

Browse files
kbleesgitster
authored andcommitted
diffcore-rename.c: simplify finding exact renames
The find_exact_renames function currently only uses the hash table for grouping, i.e.: 1. add sources 2. add destinations 3. iterate all buckets, per bucket: 4. split sources from destinations 5. iterate destinations, per destination: 6. iterate sources to find best match This can be simplified by utilizing the lookup functionality of the hash table, i.e.: 1. add sources 2. iterate destinations, per destination: 3. lookup sources matching the current destination 4. iterate sources to find best match This saves several iterations and file_similarity allocations for the destinations. Signed-off-by: Karsten Blees <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 48f6407 commit 7c85f8a

File tree

1 file changed

+20
-55
lines changed

1 file changed

+20
-55
lines changed

diffcore-rename.c

Lines changed: 20 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,7 @@ static int score_compare(const void *a_, const void *b_)
243243
}
244244

245245
struct file_similarity {
246-
int src_dst, index;
246+
int index;
247247
struct diff_filespec *filespec;
248248
struct file_similarity *next;
249249
};
@@ -260,25 +260,21 @@ static unsigned int hash_filespec(struct diff_filespec *filespec)
260260
return hash;
261261
}
262262

263-
static int find_identical_files(struct file_similarity *src,
264-
struct file_similarity *dst,
263+
static int find_identical_files(struct hash_table *srcs,
264+
int dst_index,
265265
struct diff_options *options)
266266
{
267267
int renames = 0;
268268

269-
/*
270-
* Walk over all the destinations ...
271-
*/
272-
do {
273-
struct diff_filespec *target = dst->filespec;
269+
struct diff_filespec *target = rename_dst[dst_index].two;
274270
struct file_similarity *p, *best;
275271
int i = 100, best_score = -1;
276272

277273
/*
278-
* .. to find the best source match
274+
* Find the best source match for specified destination.
279275
*/
280276
best = NULL;
281-
for (p = src; p; p = p->next) {
277+
for (p = lookup_hash(hash_filespec(target), srcs); p; p = p->next) {
282278
int score;
283279
struct diff_filespec *source = p->filespec;
284280

@@ -307,61 +303,28 @@ static int find_identical_files(struct file_similarity *src,
307303
break;
308304
}
309305
if (best) {
310-
record_rename_pair(dst->index, best->index, MAX_SCORE);
306+
record_rename_pair(dst_index, best->index, MAX_SCORE);
311307
renames++;
312308
}
313-
} while ((dst = dst->next) != NULL);
314309
return renames;
315310
}
316311

317-
static void free_similarity_list(struct file_similarity *p)
312+
static int free_similarity_list(void *p, void *unused)
318313
{
319314
while (p) {
320315
struct file_similarity *entry = p;
321-
p = p->next;
316+
p = entry->next;
322317
free(entry);
323318
}
319+
return 0;
324320
}
325321

326-
static int find_same_files(void *ptr, void *data)
327-
{
328-
int ret;
329-
struct file_similarity *p = ptr;
330-
struct file_similarity *src = NULL, *dst = NULL;
331-
struct diff_options *options = data;
332-
333-
/* Split the hash list up into sources and destinations */
334-
do {
335-
struct file_similarity *entry = p;
336-
p = p->next;
337-
if (entry->src_dst < 0) {
338-
entry->next = src;
339-
src = entry;
340-
} else {
341-
entry->next = dst;
342-
dst = entry;
343-
}
344-
} while (p);
345-
346-
/*
347-
* If we have both sources *and* destinations, see if
348-
* we can match them up
349-
*/
350-
ret = (src && dst) ? find_identical_files(src, dst, options) : 0;
351-
352-
/* Free the hashes and return the number of renames found */
353-
free_similarity_list(src);
354-
free_similarity_list(dst);
355-
return ret;
356-
}
357-
358-
static void insert_file_table(struct hash_table *table, int src_dst, int index, struct diff_filespec *filespec)
322+
static void insert_file_table(struct hash_table *table, int index, struct diff_filespec *filespec)
359323
{
360324
void **pos;
361325
unsigned int hash;
362326
struct file_similarity *entry = xmalloc(sizeof(*entry));
363327

364-
entry->src_dst = src_dst;
365328
entry->index = index;
366329
entry->filespec = filespec;
367330
entry->next = NULL;
@@ -385,24 +348,26 @@ static void insert_file_table(struct hash_table *table, int src_dst, int index,
385348
*/
386349
static int find_exact_renames(struct diff_options *options)
387350
{
388-
int i;
351+
int i, renames = 0;
389352
struct hash_table file_table;
390353

354+
/* Add all sources to the hash table */
391355
init_hash(&file_table);
392-
preallocate_hash(&file_table, rename_src_nr + rename_dst_nr);
356+
preallocate_hash(&file_table, rename_src_nr);
393357
for (i = 0; i < rename_src_nr; i++)
394-
insert_file_table(&file_table, -1, i, rename_src[i].p->one);
358+
insert_file_table(&file_table, i, rename_src[i].p->one);
395359

360+
/* Walk the destinations and find best source match */
396361
for (i = 0; i < rename_dst_nr; i++)
397-
insert_file_table(&file_table, 1, i, rename_dst[i].two);
362+
renames += find_identical_files(&file_table, i, options);
398363

399-
/* Find the renames */
400-
i = for_each_hash(&file_table, find_same_files, options);
364+
/* Free source file_similarity chains */
365+
for_each_hash(&file_table, free_similarity_list, options);
401366

402367
/* .. and free the hash data structure */
403368
free_hash(&file_table);
404369

405-
return i;
370+
return renames;
406371
}
407372

408373
#define NUM_CANDIDATE_PER_DST 4

0 commit comments

Comments
 (0)