Skip to content

Commit 658b57a

Browse files
Matthew Ogilviegitster
authored andcommitted
cvsserver: add misc commit lookup, file meta data, and file listing functions
These will be used soon, but not yet. PERFORMANCE NOTE: getMetaFromCommithash() does not scale well as currently implemented. See comment for possible optimization strategies. Fortunately, it will only be used in cases that would not have worked at all before this change. Signed-off-by: Matthew Ogilvie <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 51a7e6d commit 658b57a

File tree

1 file changed

+365
-1
lines changed

1 file changed

+365
-1
lines changed

git-cvsserver.perl

Lines changed: 365 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2986,6 +2986,9 @@ sub new
29862986

29872987
die "Git repo '$self->{git_path}' doesn't exist" unless ( -d $self->{git_path} );
29882988

2989+
# Stores full sha1's for various branch/tag names, abbreviations, etc:
2990+
$self->{commitRefCache} = {};
2991+
29892992
$self->{dbdriver} = $cfg->{gitcvs}{$state->{method}}{dbdriver} ||
29902993
$cfg->{gitcvs}{dbdriver} || "SQLite";
29912994
$self->{dbname} = $cfg->{gitcvs}{$state->{method}}{dbname} ||
@@ -3446,7 +3449,7 @@ sub update
34463449
);
34473450
}
34483451
# invalidate the gethead cache
3449-
$self->{gethead_cache} = undef;
3452+
$self->clearCommitRefCaches();
34503453

34513454

34523455
# Ending exclusive lock here
@@ -3648,6 +3651,169 @@ sub gethead
36483651
return $tree;
36493652
}
36503653

3654+
=head2 getAnyHead
3655+
3656+
Returns a reference to an array of getmeta structures, one
3657+
per file in the specified tree hash.
3658+
3659+
=cut
3660+
3661+
sub getAnyHead
3662+
{
3663+
my ($self,$hash) = @_;
3664+
3665+
if(!defined($hash))
3666+
{
3667+
return $self->gethead();
3668+
}
3669+
3670+
my @files;
3671+
{
3672+
open(my $filePipe, '-|', 'git', 'ls-tree', '-z', '-r', $hash)
3673+
or die("Cannot call git-ls-tree : $!");
3674+
local $/ = "\0";
3675+
@files=<$filePipe>;
3676+
close $filePipe;
3677+
}
3678+
3679+
my $tree=[];
3680+
my($line);
3681+
foreach $line (@files)
3682+
{
3683+
$line=~s/\0$//;
3684+
unless ( $line=~/^(\d+)\s+(\w+)\s+([a-zA-Z0-9]+)\t(.*)$/o )
3685+
{
3686+
die("Couldn't process git-ls-tree line : $_");
3687+
}
3688+
3689+
my($mode, $git_type, $git_hash, $git_filename) = ($1, $2, $3, $4);
3690+
push @$tree, $self->getMetaFromCommithash($git_filename,$hash);
3691+
}
3692+
3693+
return $tree;
3694+
}
3695+
3696+
=head2 getRevisionDirMap
3697+
3698+
A "revision dir map" contains all the plain-file filenames associated
3699+
with a particular revision (treeish), organized by directory:
3700+
3701+
$type = $out->{$dir}{$fullName}
3702+
3703+
The type of each is "F" (for ordinary file) or "D" (for directory,
3704+
for which the map $out->{$fullName} will also exist).
3705+
3706+
=cut
3707+
3708+
sub getRevisionDirMap
3709+
{
3710+
my ($self,$ver)=@_;
3711+
3712+
if(!defined($self->{revisionDirMapCache}))
3713+
{
3714+
$self->{revisionDirMapCache}={};
3715+
}
3716+
3717+
# Get file list (previously cached results are dependent on HEAD,
3718+
# but are early in each case):
3719+
my $cacheKey;
3720+
my (@fileList);
3721+
if( !defined($ver) || $ver eq "" )
3722+
{
3723+
$cacheKey="";
3724+
if( defined($self->{revisionDirMapCache}{$cacheKey}) )
3725+
{
3726+
return $self->{revisionDirMapCache}{$cacheKey};
3727+
}
3728+
3729+
my @head = @{$self->gethead()};
3730+
foreach my $file ( @head )
3731+
{
3732+
next if ( $file->{filehash} eq "deleted" );
3733+
3734+
push @fileList,$file->{name};
3735+
}
3736+
}
3737+
else
3738+
{
3739+
my ($hash)=$self->lookupCommitRef($ver);
3740+
if( !defined($hash) )
3741+
{
3742+
return undef;
3743+
}
3744+
3745+
$cacheKey=$hash;
3746+
if( defined($self->{revisionDirMapCache}{$cacheKey}) )
3747+
{
3748+
return $self->{revisionDirMapCache}{$cacheKey};
3749+
}
3750+
3751+
open(my $filePipe, '-|', 'git', 'ls-tree', '-z', '-r', $hash)
3752+
or die("Cannot call git-ls-tree : $!");
3753+
local $/ = "\0";
3754+
while ( <$filePipe> )
3755+
{
3756+
chomp;
3757+
unless ( /^(\d+)\s+(\w+)\s+([a-zA-Z0-9]+)\t(.*)$/o )
3758+
{
3759+
die("Couldn't process git-ls-tree line : $_");
3760+
}
3761+
3762+
my($mode, $git_type, $git_hash, $git_filename) = ($1, $2, $3, $4);
3763+
3764+
push @fileList, $git_filename;
3765+
}
3766+
close $filePipe;
3767+
}
3768+
3769+
# Convert to normalized form:
3770+
my %revMap;
3771+
my $file;
3772+
foreach $file (@fileList)
3773+
{
3774+
my($dir) = ($file=~m%^(?:(.*)/)?([^/]*)$%);
3775+
$dir='' if(!defined($dir));
3776+
3777+
# parent directories:
3778+
# ... create empty dir maps for parent dirs:
3779+
my($td)=$dir;
3780+
while(!defined($revMap{$td}))
3781+
{
3782+
$revMap{$td}={};
3783+
3784+
my($tp)=($td=~m%^(?:(.*)/)?([^/]*)$%);
3785+
$tp='' if(!defined($tp));
3786+
$td=$tp;
3787+
}
3788+
# ... add children to parent maps (now that they exist):
3789+
$td=$dir;
3790+
while($td ne "")
3791+
{
3792+
my($tp)=($td=~m%^(?:(.*)/)?([^/]*)$%);
3793+
$tp='' if(!defined($tp));
3794+
3795+
if(defined($revMap{$tp}{$td}))
3796+
{
3797+
if($revMap{$tp}{$td} ne 'D')
3798+
{
3799+
die "Weird file/directory inconsistency in $cacheKey";
3800+
}
3801+
last; # loop exit
3802+
}
3803+
$revMap{$tp}{$td}='D';
3804+
3805+
$td=$tp;
3806+
}
3807+
3808+
# file
3809+
$revMap{$dir}{$file}='F';
3810+
}
3811+
3812+
# Save in cache:
3813+
$self->{revisionDirMapCache}{$cacheKey}=\%revMap;
3814+
return $self->{revisionDirMapCache}{$cacheKey};
3815+
}
3816+
36513817
=head2 getlog
36523818
36533819
See also gethistorydense().
@@ -3742,6 +3908,204 @@ sub getmeta
37423908
return $meta;
37433909
}
37443910

3911+
sub getMetaFromCommithash
3912+
{
3913+
my $self = shift;
3914+
my $filename = shift;
3915+
my $revCommit = shift;
3916+
3917+
# NOTE: This function doesn't scale well (lots of forks), especially
3918+
# if you have many files that have not been modified for many commits
3919+
# (each git-rev-parse redoes a lot of work for each file
3920+
# that theoretically could be done in parallel by smarter
3921+
# graph traversal).
3922+
#
3923+
# TODO: Possible optimization strategies:
3924+
# - Solve the issue of assigning and remembering "real" CVS
3925+
# revision numbers for branches, and ensure the
3926+
# data structure can do this efficiently. Perhaps something
3927+
# similar to "git notes", and carefully structured to take
3928+
# advantage same-sha1-is-same-contents, to roll the same
3929+
# unmodified subdirectory data onto multiple commits?
3930+
# - Write and use a C tool that is like git-blame, but
3931+
# operates on multiple files with file granularity, instead
3932+
# of one file with line granularity. Cache
3933+
# most-recently-modified in $self->{commitRefCache}{$revCommit}.
3934+
# Try to be intelligent about how many files we do with
3935+
# one fork (perhaps one directory at a time, without recursion,
3936+
# and/or include directory as one line item, recurse from here
3937+
# instead of in C tool?).
3938+
# - Perhaps we could ask the DB for (filename,fileHash),
3939+
# and just guess that it is correct (that the file hadn't
3940+
# changed between $revCommit and the found commit, then
3941+
# changed back, confusing anything trying to interpret
3942+
# history). Probably need to add another index to revisions
3943+
# DB table for this.
3944+
# - NOTE: Trying to store all (commit,file) keys in DB [to
3945+
# find "lastModfiedCommit] (instead of
3946+
# just files that changed in each commit as we do now) is
3947+
# probably not practical from a disk space perspective.
3948+
3949+
# Does the file exist in $revCommit?
3950+
# TODO: Include file hash in dirmap cache.
3951+
my($dirMap)=$self->getRevisionDirMap($revCommit);
3952+
my($dir,$file)=($filename=~m%^(?:(.*)/)?([^/]*$)%);
3953+
if(!defined($dir))
3954+
{
3955+
$dir="";
3956+
}
3957+
if( !defined($dirMap->{$dir}) ||
3958+
!defined($dirMap->{$dir}{$filename}) )
3959+
{
3960+
my($fileHash)="deleted";
3961+
3962+
my($retVal)={};
3963+
$retVal->{name}=$filename;
3964+
$retVal->{filehash}=$fileHash;
3965+
3966+
# not needed and difficult to compute:
3967+
$retVal->{revision}="0"; # $revision;
3968+
$retVal->{commithash}=$revCommit;
3969+
#$retVal->{author}=$commit->{author};
3970+
#$retVal->{modified}=convertToCvsDate($commit->{date});
3971+
#$retVal->{mode}=convertToDbMode($mode);
3972+
3973+
return $retVal;
3974+
}
3975+
3976+
my($fileHash)=safe_pipe_capture("git","rev-parse","$revCommit:$filename");
3977+
chomp $fileHash;
3978+
if(!($fileHash=~/^[0-9a-f]{40}$/))
3979+
{
3980+
die "Invalid fileHash '$fileHash' looking up"
3981+
." '$revCommit:$filename'\n";
3982+
}
3983+
3984+
# information about most recent commit to modify $filename:
3985+
open(my $gitLogPipe, '-|', 'git', 'rev-list',
3986+
'--max-count=1', '--pretty', '--parents',
3987+
$revCommit, '--', $filename)
3988+
or die "Cannot call git-rev-list: $!";
3989+
my @commits=readCommits($gitLogPipe);
3990+
close $gitLogPipe;
3991+
if(scalar(@commits)!=1)
3992+
{
3993+
die "Can't find most recent commit changing $filename\n";
3994+
}
3995+
my($commit)=$commits[0];
3996+
if( !defined($commit) || !defined($commit->{hash}) )
3997+
{
3998+
return undef;
3999+
}
4000+
4001+
# does this (commit,file) have a real assigned CVS revision number?
4002+
my $tablename_rev = $self->tablename("revision");
4003+
my $db_query;
4004+
$db_query = $self->{dbh}->prepare_cached(
4005+
"SELECT * FROM $tablename_rev WHERE name=? AND commithash=?",
4006+
{},1);
4007+
$db_query->execute($filename, $commit->{hash});
4008+
my($meta)=$db_query->fetchrow_hashref;
4009+
if($meta)
4010+
{
4011+
$meta->{revision} = "1.$meta->{revision}";
4012+
return $meta;
4013+
}
4014+
4015+
# fall back on special revision number
4016+
my($revision)=$commit->{hash};
4017+
$revision=~s/(..)/'.' . (hex($1)+100)/eg;
4018+
$revision="2.1.1.2000$revision";
4019+
4020+
# meta data about $filename:
4021+
open(my $filePipe, '-|', 'git', 'ls-tree', '-z',
4022+
$commit->{hash}, '--', $filename)
4023+
or die("Cannot call git-ls-tree : $!");
4024+
local $/ = "\0";
4025+
my $line;
4026+
$line=<$filePipe>;
4027+
if(defined(<$filePipe>))
4028+
{
4029+
die "Expected only a single file for git-ls-tree $filename\n";
4030+
}
4031+
close $filePipe;
4032+
4033+
chomp $line;
4034+
unless ( $line=~m/^(\d+)\s+(\w+)\s+([a-zA-Z0-9]+)\t(.*)$/o )
4035+
{
4036+
die("Couldn't process git-ls-tree line : $line\n");
4037+
}
4038+
my ( $mode, $git_type, $git_hash, $git_filename ) = ( $1, $2, $3, $4 );
4039+
4040+
# save result:
4041+
my($retVal)={};
4042+
$retVal->{name}=$filename;
4043+
$retVal->{revision}=$revision;
4044+
$retVal->{filehash}=$fileHash;
4045+
$retVal->{commithash}=$revCommit;
4046+
$retVal->{author}=$commit->{author};
4047+
$retVal->{modified}=convertToCvsDate($commit->{date});
4048+
$retVal->{mode}=convertToDbMode($mode);
4049+
4050+
return $retVal;
4051+
}
4052+
4053+
=head2 lookupCommitRef
4054+
4055+
Convert tag/branch/abbreviation/etc into a commit sha1 hash. Caches
4056+
the result so looking it up again is fast.
4057+
4058+
=cut
4059+
4060+
sub lookupCommitRef
4061+
{
4062+
my $self = shift;
4063+
my $ref = shift;
4064+
4065+
my $commitHash = $self->{commitRefCache}{$ref};
4066+
if(defined($commitHash))
4067+
{
4068+
return $commitHash;
4069+
}
4070+
4071+
$commitHash=safe_pipe_capture("git","rev-parse","--verify","--quiet",
4072+
$self->unescapeRefName($ref));
4073+
$commitHash=~s/\s*$//;
4074+
if(!($commitHash=~/^[0-9a-f]{40}$/))
4075+
{
4076+
$commitHash=undef;
4077+
}
4078+
4079+
if( defined($commitHash) )
4080+
{
4081+
my $type=safe_pipe_capture("git","cat-file","-t",$commitHash);
4082+
if( ! ($type=~/^commit\s*$/ ) )
4083+
{
4084+
$commitHash=undef;
4085+
}
4086+
}
4087+
if(defined($commitHash))
4088+
{
4089+
$self->{commitRefCache}{$ref}=$commitHash;
4090+
}
4091+
return $commitHash;
4092+
}
4093+
4094+
=head2 clearCommitRefCaches
4095+
4096+
Clears cached commit cache (sha1's for various tags/abbeviations/etc),
4097+
and related caches.
4098+
4099+
=cut
4100+
4101+
sub clearCommitRefCaches
4102+
{
4103+
my $self = shift;
4104+
$self->{commitRefCache} = {};
4105+
$self->{revisionDirMapCache} = undef;
4106+
$self->{gethead_cache} = undef;
4107+
}
4108+
37454109
=head2 commitmessage
37464110
37474111
this function takes a commithash and returns the commit message for that commit

0 commit comments

Comments
 (0)