Skip to content

Commit 8c912ee

Browse files
rctaygitster
authored andcommitted
teach --histogram to diff
Port JGit's HistogramDiff algorithm over to C. Rough numbers (TODO) show that it is faster than its --patience cousin, as well as the default Meyers algorithm. The implementation has been reworked to use structs and pointers, instead of bitmasks, thus doing away with JGit's 2^28 line limit. We also use xdiff's default hash table implementation (xdl_hash_bits() with XDL_HASHLONG()) for convenience. Signed-off-by: Tay Ray Chuan <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 46c8f29 commit 8c912ee

File tree

8 files changed

+407
-1
lines changed

8 files changed

+407
-1
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1838,7 +1838,7 @@ ifndef NO_CURL
18381838
GIT_OBJS += http.o http-walker.o remote-curl.o
18391839
endif
18401840
XDIFF_OBJS = xdiff/xdiffi.o xdiff/xprepare.o xdiff/xutils.o xdiff/xemit.o \
1841-
xdiff/xmerge.o xdiff/xpatience.o
1841+
xdiff/xmerge.o xdiff/xpatience.o xdiff/xhistogram.o
18421842
VCSSVN_OBJS = vcs-svn/string_pool.o vcs-svn/line_buffer.o \
18431843
vcs-svn/repo_tree.o vcs-svn/fast_export.o vcs-svn/svndump.o
18441844
VCSSVN_TEST_OBJS = test-obj-pool.o test-string-pool.o \

diff.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3369,6 +3369,8 @@ int diff_opt_parse(struct diff_options *options, const char **av, int ac)
33693369
DIFF_XDL_SET(options, IGNORE_WHITESPACE_AT_EOL);
33703370
else if (!strcmp(arg, "--patience"))
33713371
DIFF_XDL_SET(options, PATIENCE_DIFF);
3372+
else if (!strcmp(arg, "--histogram"))
3373+
DIFF_XDL_SET(options, HISTOGRAM_DIFF);
33723374

33733375
/* flags options */
33743376
else if (!strcmp(arg, "--binary")) {

merge-recursive.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1759,6 +1759,8 @@ int parse_merge_opt(struct merge_options *o, const char *s)
17591759
o->subtree_shift = s + strlen("subtree=");
17601760
else if (!strcmp(s, "patience"))
17611761
o->xdl_opts |= XDF_PATIENCE_DIFF;
1762+
else if (!strcmp(s, "histogram"))
1763+
o->xdl_opts |= XDF_HISTOGRAM_DIFF;
17621764
else if (!strcmp(s, "ignore-space-change"))
17631765
o->xdl_opts |= XDF_IGNORE_WHITESPACE_CHANGE;
17641766
else if (!strcmp(s, "ignore-all-space"))

t/t4049-diff-histogram.sh

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
#!/bin/sh
2+
3+
test_description='histogram diff algorithm'
4+
5+
. ./test-lib.sh
6+
. "$TEST_DIRECTORY"/lib-diff-alternative.sh
7+
8+
test_diff_frobnitz "histogram"
9+
10+
test_diff_unique "histogram"
11+
12+
test_done

xdiff/xdiff.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ extern "C" {
3333
#define XDF_IGNORE_WHITESPACE_CHANGE (1 << 3)
3434
#define XDF_IGNORE_WHITESPACE_AT_EOL (1 << 4)
3535
#define XDF_PATIENCE_DIFF (1 << 5)
36+
#define XDF_HISTOGRAM_DIFF (1 << 6)
3637
#define XDF_WHITESPACE_FLAGS (XDF_IGNORE_WHITESPACE | XDF_IGNORE_WHITESPACE_CHANGE | XDF_IGNORE_WHITESPACE_AT_EOL)
3738

3839
#define XDL_PATCH_NORMAL '-'

xdiff/xdiffi.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -331,6 +331,9 @@ int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
331331
if (xpp->flags & XDF_PATIENCE_DIFF)
332332
return xdl_do_patience_diff(mf1, mf2, xpp, xe);
333333

334+
if (xpp->flags & XDF_HISTOGRAM_DIFF)
335+
return xdl_do_histogram_diff(mf1, mf2, xpp, xe);
336+
334337
if (xdl_prepare_env(mf1, mf2, xpp, xe) < 0) {
335338

336339
return -1;

xdiff/xdiffi.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,5 +57,7 @@ int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
5757
xdemitconf_t const *xecfg);
5858
int xdl_do_patience_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
5959
xdfenv_t *env);
60+
int xdl_do_histogram_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
61+
xdfenv_t *env);
6062

6163
#endif /* #if !defined(XDIFFI_H) */

0 commit comments

Comments
 (0)