Skip to content

Commit abfef3b

Browse files
stoklundEric Wong
authored andcommitted
git-svn: only look at the new parts of svn:mergeinfo
In a Subversion repository where many feature branches are merged into a trunk, the svn:mergeinfo property can grow very large. This severely slows down git-svn's make_log_entry() because it is checking all mergeinfo entries every time the property changes. In most cases, the additions to svn:mergeinfo since the last commit are pretty small, and there is nothing to gain by checking merges that were already checked for the last commit in the branch. Add a mergeinfo_changes() function which computes the set of interesting changes to svn:mergeinfo since the last commit. Filter out merged branches whose ranges haven't changed, and remove a common prefix of ranges from other merged branches. This speeds up "git svn fetch" by several orders of magnitude on a large repository where thousands of feature branches have been merged. Signed-off-by: Jakob Stoklund Olesen <[email protected]> Signed-off-by: Eric Wong <[email protected]>
1 parent fbecd99 commit abfef3b

File tree

1 file changed

+72
-12
lines changed

1 file changed

+72
-12
lines changed

perl/Git/SVN.pm

Lines changed: 72 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1178,7 +1178,7 @@ sub find_parent_branch {
11781178
or die "SVN connection failed somewhere...\n";
11791179
}
11801180
print STDERR "Successfully followed parent\n" unless $::_q > 1;
1181-
return $self->make_log_entry($rev, [$parent], $ed);
1181+
return $self->make_log_entry($rev, [$parent], $ed, $r0, $branch_from);
11821182
}
11831183
return undef;
11841184
}
@@ -1210,7 +1210,7 @@ sub do_fetch {
12101210
unless ($self->ra->gs_do_update($last_rev, $rev, $self, $ed)) {
12111211
die "SVN connection failed somewhere...\n";
12121212
}
1213-
$self->make_log_entry($rev, \@parents, $ed);
1213+
$self->make_log_entry($rev, \@parents, $ed, $last_rev);
12141214
}
12151215

12161216
sub mkemptydirs {
@@ -1478,9 +1478,9 @@ sub find_extra_svk_parents {
14781478
sub lookup_svn_merge {
14791479
my $uuid = shift;
14801480
my $url = shift;
1481-
my $merge = shift;
1481+
my $source = shift;
1482+
my $revs = shift;
14821483

1483-
my ($source, $revs) = split ":", $merge;
14841484
my $path = $source;
14851485
$path =~ s{^/}{};
14861486
my $gs = Git::SVN->find_by_url($url.$source, $url, $path);
@@ -1702,6 +1702,62 @@ sub parents_exclude {
17021702
return @excluded;
17031703
}
17041704

1705+
# Compute what's new in svn:mergeinfo.
1706+
sub mergeinfo_changes {
1707+
my ($self, $old_path, $old_rev, $path, $rev, $mergeinfo_prop) = @_;
1708+
my %minfo = map {split ":", $_ } split "\n", $mergeinfo_prop;
1709+
my $old_minfo = {};
1710+
1711+
# Initialize cache on the first call.
1712+
unless (defined $self->{cached_mergeinfo_rev}) {
1713+
$self->{cached_mergeinfo_rev} = {};
1714+
$self->{cached_mergeinfo} = {};
1715+
}
1716+
1717+
my $cached_rev = $self->{cached_mergeinfo_rev}{$old_path};
1718+
if (defined $cached_rev && $cached_rev == $old_rev) {
1719+
$old_minfo = $self->{cached_mergeinfo}{$old_path};
1720+
} else {
1721+
my $ra = $self->ra;
1722+
# Give up if $old_path isn't in the repo.
1723+
# This is probably a merge on a subtree.
1724+
if ($ra->check_path($old_path, $old_rev) != $SVN::Node::dir) {
1725+
warn "W: ignoring svn:mergeinfo on $old_path, ",
1726+
"directory didn't exist in r$old_rev\n";
1727+
return {};
1728+
}
1729+
my (undef, undef, $props) =
1730+
$self->ra->get_dir($old_path, $old_rev);
1731+
if (defined $props->{"svn:mergeinfo"}) {
1732+
my %omi = map {split ":", $_ } split "\n",
1733+
$props->{"svn:mergeinfo"};
1734+
$old_minfo = \%omi;
1735+
}
1736+
$self->{cached_mergeinfo}{$old_path} = $old_minfo;
1737+
$self->{cached_mergeinfo_rev}{$old_path} = $old_rev;
1738+
}
1739+
1740+
# Cache the new mergeinfo.
1741+
$self->{cached_mergeinfo}{$path} = \%minfo;
1742+
$self->{cached_mergeinfo_rev}{$path} = $rev;
1743+
1744+
my %changes = ();
1745+
foreach my $p (keys %minfo) {
1746+
my $a = $old_minfo->{$p} || "";
1747+
my $b = $minfo{$p};
1748+
# Omit merged branches whose ranges lists are unchanged.
1749+
next if $a eq $b;
1750+
# Remove any common range list prefix.
1751+
($a ^ $b) =~ /^[\0]*/;
1752+
my $common_prefix = rindex $b, ",", $+[0] - 1;
1753+
$changes{$p} = substr $b, $common_prefix + 1;
1754+
}
1755+
print STDERR "Checking svn:mergeinfo changes since r$old_rev: ",
1756+
scalar(keys %minfo), " sources, ",
1757+
scalar(keys %changes), " changed\n";
1758+
1759+
return \%changes;
1760+
}
17051761

17061762
# note: this function should only be called if the various dirprops
17071763
# have actually changed
@@ -1715,14 +1771,15 @@ sub find_extra_svn_parents {
17151771
# history. Then, we figure out which git revisions are in
17161772
# that tip, but not this revision. If all of those revisions
17171773
# are now marked as merge, we can add the tip as a parent.
1718-
my @merges = split "\n", $mergeinfo;
1774+
my @merges = sort keys %$mergeinfo;
17191775
my @merge_tips;
17201776
my $url = $self->url;
17211777
my $uuid = $self->ra_uuid;
17221778
my @all_ranges;
17231779
for my $merge ( @merges ) {
17241780
my ($tip_commit, @ranges) =
1725-
lookup_svn_merge( $uuid, $url, $merge );
1781+
lookup_svn_merge( $uuid, $url,
1782+
$merge, $mergeinfo->{$merge} );
17261783
unless (!$tip_commit or
17271784
grep { $_ eq $tip_commit } @$parents ) {
17281785
push @merge_tips, $tip_commit;
@@ -1738,8 +1795,9 @@ sub find_extra_svn_parents {
17381795
# check merge tips for new parents
17391796
my @new_parents;
17401797
for my $merge_tip ( @merge_tips ) {
1741-
my $spec = shift @merges;
1798+
my $merge = shift @merges;
17421799
next unless $merge_tip and $excluded{$merge_tip};
1800+
my $spec = "$merge:$mergeinfo->{$merge}";
17431801

17441802
# check out 'new' tips
17451803
my $merge_base;
@@ -1770,7 +1828,7 @@ sub find_extra_svn_parents {
17701828
.@incomplete." commit(s) (eg $incomplete[0])\n";
17711829
} else {
17721830
warn
1773-
"Found merge parent (svn:mergeinfo prop): ",
1831+
"Found merge parent ($spec): ",
17741832
$merge_tip, "\n";
17751833
push @new_parents, $merge_tip;
17761834
}
@@ -1797,7 +1855,7 @@ sub find_extra_svn_parents {
17971855
}
17981856

17991857
sub make_log_entry {
1800-
my ($self, $rev, $parents, $ed) = @_;
1858+
my ($self, $rev, $parents, $ed, $parent_rev, $parent_path) = @_;
18011859
my $untracked = $self->get_untracked($ed);
18021860

18031861
my @parents = @$parents;
@@ -1809,10 +1867,12 @@ sub make_log_entry {
18091867
($ed, $props->{"svk:merge"}, \@parents);
18101868
}
18111869
if ( $props->{"svn:mergeinfo"} ) {
1870+
my $mi_changes = $self->mergeinfo_changes
1871+
($parent_path || $path, $parent_rev,
1872+
$path, $rev,
1873+
$props->{"svn:mergeinfo"});
18121874
$self->find_extra_svn_parents
1813-
($ed,
1814-
$props->{"svn:mergeinfo"},
1815-
\@parents);
1875+
($ed, $mi_changes, \@parents);
18161876
}
18171877
}
18181878

0 commit comments

Comments
 (0)