@@ -2986,6 +2986,9 @@ sub new
29862986
29872987 die " Git repo '$self ->{git_path}' doesn't exist" unless ( -d $self -> {git_path } );
29882988
2989+ # Stores full sha1's for various branch/tag names, abbreviations, etc:
2990+ $self -> {commitRefCache } = {};
2991+
29892992 $self -> {dbdriver } = $cfg -> {gitcvs }{$state -> {method }}{dbdriver } ||
29902993 $cfg -> {gitcvs }{dbdriver } || " SQLite" ;
29912994 $self -> {dbname } = $cfg -> {gitcvs }{$state -> {method }}{dbname } ||
@@ -3446,7 +3449,7 @@ sub update
34463449 );
34473450 }
34483451 # invalidate the gethead cache
3449- $self -> { gethead_cache } = undef ;
3452+ $self -> clearCommitRefCaches() ;
34503453
34513454
34523455 # Ending exclusive lock here
@@ -3648,6 +3651,169 @@ sub gethead
36483651 return $tree ;
36493652}
36503653
3654+ =head2 getAnyHead
3655+
3656+ Returns a reference to an array of getmeta structures, one
3657+ per file in the specified tree hash.
3658+
3659+ =cut
3660+
3661+ sub getAnyHead
3662+ {
3663+ my ($self ,$hash ) = @_ ;
3664+
3665+ if (!defined ($hash ))
3666+ {
3667+ return $self -> gethead();
3668+ }
3669+
3670+ my @files ;
3671+ {
3672+ open (my $filePipe , ' -|' , ' git' , ' ls-tree' , ' -z' , ' -r' , $hash )
3673+ or die (" Cannot call git-ls-tree : $! " );
3674+ local $/ = " \0 " ;
3675+ @files =<$filePipe >;
3676+ close $filePipe ;
3677+ }
3678+
3679+ my $tree =[];
3680+ my ($line );
3681+ foreach $line (@files )
3682+ {
3683+ $line =~s /\0 $// ;
3684+ unless ( $line =~/ ^(\d +)\s +(\w +)\s +([a-zA-Z0-9]+)\t (.*)$ /o )
3685+ {
3686+ die (" Couldn't process git-ls-tree line : $_ " );
3687+ }
3688+
3689+ my ($mode , $git_type , $git_hash , $git_filename ) = ($1 , $2 , $3 , $4 );
3690+ push @$tree , $self -> getMetaFromCommithash($git_filename ,$hash );
3691+ }
3692+
3693+ return $tree ;
3694+ }
3695+
3696+ =head2 getRevisionDirMap
3697+
3698+ A "revision dir map" contains all the plain-file filenames associated
3699+ with a particular revision (treeish), organized by directory:
3700+
3701+ $type = $out->{$dir}{$fullName}
3702+
3703+ The type of each is "F" (for ordinary file) or "D" (for directory,
3704+ for which the map $out->{$fullName} will also exist).
3705+
3706+ =cut
3707+
3708+ sub getRevisionDirMap
3709+ {
3710+ my ($self ,$ver )=@_ ;
3711+
3712+ if (!defined ($self -> {revisionDirMapCache }))
3713+ {
3714+ $self -> {revisionDirMapCache }={};
3715+ }
3716+
3717+ # Get file list (previously cached results are dependent on HEAD,
3718+ # but are early in each case):
3719+ my $cacheKey ;
3720+ my (@fileList );
3721+ if ( !defined ($ver ) || $ver eq " " )
3722+ {
3723+ $cacheKey =" " ;
3724+ if ( defined ($self -> {revisionDirMapCache }{$cacheKey }) )
3725+ {
3726+ return $self -> {revisionDirMapCache }{$cacheKey };
3727+ }
3728+
3729+ my @head = @{$self -> gethead()};
3730+ foreach my $file ( @head )
3731+ {
3732+ next if ( $file -> {filehash } eq " deleted" );
3733+
3734+ push @fileList ,$file -> {name };
3735+ }
3736+ }
3737+ else
3738+ {
3739+ my ($hash )=$self -> lookupCommitRef($ver );
3740+ if ( !defined ($hash ) )
3741+ {
3742+ return undef ;
3743+ }
3744+
3745+ $cacheKey =$hash ;
3746+ if ( defined ($self -> {revisionDirMapCache }{$cacheKey }) )
3747+ {
3748+ return $self -> {revisionDirMapCache }{$cacheKey };
3749+ }
3750+
3751+ open (my $filePipe , ' -|' , ' git' , ' ls-tree' , ' -z' , ' -r' , $hash )
3752+ or die (" Cannot call git-ls-tree : $! " );
3753+ local $/ = " \0 " ;
3754+ while ( <$filePipe > )
3755+ {
3756+ chomp ;
3757+ unless ( / ^(\d +)\s +(\w +)\s +([a-zA-Z0-9]+)\t (.*)$ /o )
3758+ {
3759+ die (" Couldn't process git-ls-tree line : $_ " );
3760+ }
3761+
3762+ my ($mode , $git_type , $git_hash , $git_filename ) = ($1 , $2 , $3 , $4 );
3763+
3764+ push @fileList , $git_filename ;
3765+ }
3766+ close $filePipe ;
3767+ }
3768+
3769+ # Convert to normalized form:
3770+ my %revMap ;
3771+ my $file ;
3772+ foreach $file (@fileList )
3773+ {
3774+ my ($dir ) = ($file =~m % ^(?:(.*)/)?([^/]*)$ % );
3775+ $dir =' ' if (!defined ($dir ));
3776+
3777+ # parent directories:
3778+ # ... create empty dir maps for parent dirs:
3779+ my ($td )=$dir ;
3780+ while (!defined ($revMap {$td }))
3781+ {
3782+ $revMap {$td }={};
3783+
3784+ my ($tp )=($td =~m % ^(?:(.*)/)?([^/]*)$ % );
3785+ $tp =' ' if (!defined ($tp ));
3786+ $td =$tp ;
3787+ }
3788+ # ... add children to parent maps (now that they exist):
3789+ $td =$dir ;
3790+ while ($td ne " " )
3791+ {
3792+ my ($tp )=($td =~m % ^(?:(.*)/)?([^/]*)$ % );
3793+ $tp =' ' if (!defined ($tp ));
3794+
3795+ if (defined ($revMap {$tp }{$td }))
3796+ {
3797+ if ($revMap {$tp }{$td } ne ' D' )
3798+ {
3799+ die " Weird file/directory inconsistency in $cacheKey " ;
3800+ }
3801+ last ; # loop exit
3802+ }
3803+ $revMap {$tp }{$td }=' D' ;
3804+
3805+ $td =$tp ;
3806+ }
3807+
3808+ # file
3809+ $revMap {$dir }{$file }=' F' ;
3810+ }
3811+
3812+ # Save in cache:
3813+ $self -> {revisionDirMapCache }{$cacheKey }=\%revMap ;
3814+ return $self -> {revisionDirMapCache }{$cacheKey };
3815+ }
3816+
36513817=head2 getlog
36523818
36533819See also gethistorydense().
@@ -3742,6 +3908,204 @@ sub getmeta
37423908 return $meta ;
37433909}
37443910
3911+ sub getMetaFromCommithash
3912+ {
3913+ my $self = shift ;
3914+ my $filename = shift ;
3915+ my $revCommit = shift ;
3916+
3917+ # NOTE: This function doesn't scale well (lots of forks), especially
3918+ # if you have many files that have not been modified for many commits
3919+ # (each git-rev-parse redoes a lot of work for each file
3920+ # that theoretically could be done in parallel by smarter
3921+ # graph traversal).
3922+ #
3923+ # TODO: Possible optimization strategies:
3924+ # - Solve the issue of assigning and remembering "real" CVS
3925+ # revision numbers for branches, and ensure the
3926+ # data structure can do this efficiently. Perhaps something
3927+ # similar to "git notes", and carefully structured to take
3928+ # advantage same-sha1-is-same-contents, to roll the same
3929+ # unmodified subdirectory data onto multiple commits?
3930+ # - Write and use a C tool that is like git-blame, but
3931+ # operates on multiple files with file granularity, instead
3932+ # of one file with line granularity. Cache
3933+ # most-recently-modified in $self->{commitRefCache}{$revCommit}.
3934+ # Try to be intelligent about how many files we do with
3935+ # one fork (perhaps one directory at a time, without recursion,
3936+ # and/or include directory as one line item, recurse from here
3937+ # instead of in C tool?).
3938+ # - Perhaps we could ask the DB for (filename,fileHash),
3939+ # and just guess that it is correct (that the file hadn't
3940+ # changed between $revCommit and the found commit, then
3941+ # changed back, confusing anything trying to interpret
3942+ # history). Probably need to add another index to revisions
3943+ # DB table for this.
3944+ # - NOTE: Trying to store all (commit,file) keys in DB [to
3945+ # find "lastModfiedCommit] (instead of
3946+ # just files that changed in each commit as we do now) is
3947+ # probably not practical from a disk space perspective.
3948+
3949+ # Does the file exist in $revCommit?
3950+ # TODO: Include file hash in dirmap cache.
3951+ my ($dirMap )=$self -> getRevisionDirMap($revCommit );
3952+ my ($dir ,$file )=($filename =~m % ^(?:(.*)/)?([^/]*$ )% );
3953+ if (!defined ($dir ))
3954+ {
3955+ $dir =" " ;
3956+ }
3957+ if ( !defined ($dirMap -> {$dir }) ||
3958+ !defined ($dirMap -> {$dir }{$filename }) )
3959+ {
3960+ my ($fileHash )=" deleted" ;
3961+
3962+ my ($retVal )={};
3963+ $retVal -> {name }=$filename ;
3964+ $retVal -> {filehash }=$fileHash ;
3965+
3966+ # not needed and difficult to compute:
3967+ $retVal -> {revision }=" 0" ; # $revision;
3968+ $retVal -> {commithash }=$revCommit ;
3969+ # $retVal->{author}=$commit->{author};
3970+ # $retVal->{modified}=convertToCvsDate($commit->{date});
3971+ # $retVal->{mode}=convertToDbMode($mode);
3972+
3973+ return $retVal ;
3974+ }
3975+
3976+ my ($fileHash )=safe_pipe_capture(" git" ," rev-parse" ," $revCommit :$filename " );
3977+ chomp $fileHash ;
3978+ if (!($fileHash =~/ ^[0-9a-f]{40}$ / ))
3979+ {
3980+ die " Invalid fileHash '$fileHash ' looking up"
3981+ ." '$revCommit :$filename '\n " ;
3982+ }
3983+
3984+ # information about most recent commit to modify $filename:
3985+ open (my $gitLogPipe , ' -|' , ' git' , ' rev-list' ,
3986+ ' --max-count=1' , ' --pretty' , ' --parents' ,
3987+ $revCommit , ' --' , $filename )
3988+ or die " Cannot call git-rev-list: $! " ;
3989+ my @commits =readCommits($gitLogPipe );
3990+ close $gitLogPipe ;
3991+ if (scalar (@commits )!=1)
3992+ {
3993+ die " Can't find most recent commit changing $filename \n " ;
3994+ }
3995+ my ($commit )=$commits [0];
3996+ if ( !defined ($commit ) || !defined ($commit -> {hash }) )
3997+ {
3998+ return undef ;
3999+ }
4000+
4001+ # does this (commit,file) have a real assigned CVS revision number?
4002+ my $tablename_rev = $self -> tablename(" revision" );
4003+ my $db_query ;
4004+ $db_query = $self -> {dbh }-> prepare_cached(
4005+ " SELECT * FROM $tablename_rev WHERE name=? AND commithash=?" ,
4006+ {},1);
4007+ $db_query -> execute($filename , $commit -> {hash });
4008+ my ($meta )=$db_query -> fetchrow_hashref;
4009+ if ($meta )
4010+ {
4011+ $meta -> {revision } = " 1.$meta ->{revision}" ;
4012+ return $meta ;
4013+ }
4014+
4015+ # fall back on special revision number
4016+ my ($revision )=$commit -> {hash };
4017+ $revision =~s / (..)/ '.' . (hex($1 )+100)/ eg ;
4018+ $revision =" 2.1.1.2000$revision " ;
4019+
4020+ # meta data about $filename:
4021+ open (my $filePipe , ' -|' , ' git' , ' ls-tree' , ' -z' ,
4022+ $commit -> {hash }, ' --' , $filename )
4023+ or die (" Cannot call git-ls-tree : $! " );
4024+ local $/ = " \0 " ;
4025+ my $line ;
4026+ $line =<$filePipe >;
4027+ if (defined (<$filePipe >))
4028+ {
4029+ die " Expected only a single file for git-ls-tree $filename \n " ;
4030+ }
4031+ close $filePipe ;
4032+
4033+ chomp $line ;
4034+ unless ( $line =~m / ^(\d +)\s +(\w +)\s +([a-zA-Z0-9]+)\t (.*)$ / o )
4035+ {
4036+ die (" Couldn't process git-ls-tree line : $line \n " );
4037+ }
4038+ my ( $mode , $git_type , $git_hash , $git_filename ) = ( $1 , $2 , $3 , $4 );
4039+
4040+ # save result:
4041+ my ($retVal )={};
4042+ $retVal -> {name }=$filename ;
4043+ $retVal -> {revision }=$revision ;
4044+ $retVal -> {filehash }=$fileHash ;
4045+ $retVal -> {commithash }=$revCommit ;
4046+ $retVal -> {author }=$commit -> {author };
4047+ $retVal -> {modified }=convertToCvsDate($commit -> {date });
4048+ $retVal -> {mode }=convertToDbMode($mode );
4049+
4050+ return $retVal ;
4051+ }
4052+
4053+ =head2 lookupCommitRef
4054+
4055+ Convert tag/branch/abbreviation/etc into a commit sha1 hash. Caches
4056+ the result so looking it up again is fast.
4057+
4058+ =cut
4059+
4060+ sub lookupCommitRef
4061+ {
4062+ my $self = shift ;
4063+ my $ref = shift ;
4064+
4065+ my $commitHash = $self -> {commitRefCache }{$ref };
4066+ if (defined ($commitHash ))
4067+ {
4068+ return $commitHash ;
4069+ }
4070+
4071+ $commitHash =safe_pipe_capture(" git" ," rev-parse" ," --verify" ," --quiet" ,
4072+ $self -> unescapeRefName($ref ));
4073+ $commitHash =~s /\s *$// ;
4074+ if (!($commitHash =~/ ^[0-9a-f]{40}$ / ))
4075+ {
4076+ $commitHash =undef ;
4077+ }
4078+
4079+ if ( defined ($commitHash ) )
4080+ {
4081+ my $type =safe_pipe_capture(" git" ," cat-file" ," -t" ,$commitHash );
4082+ if ( ! ($type =~/ ^commit\s *$ / ) )
4083+ {
4084+ $commitHash =undef ;
4085+ }
4086+ }
4087+ if (defined ($commitHash ))
4088+ {
4089+ $self -> {commitRefCache }{$ref }=$commitHash ;
4090+ }
4091+ return $commitHash ;
4092+ }
4093+
4094+ =head2 clearCommitRefCaches
4095+
4096+ Clears cached commit cache (sha1's for various tags/abbeviations/etc),
4097+ and related caches.
4098+
4099+ =cut
4100+
4101+ sub clearCommitRefCaches
4102+ {
4103+ my $self = shift ;
4104+ $self -> {commitRefCache } = {};
4105+ $self -> {revisionDirMapCache } = undef ;
4106+ $self -> {gethead_cache } = undef ;
4107+ }
4108+
37454109=head2 commitmessage
37464110
37474111this function takes a commithash and returns the commit message for that commit
0 commit comments