@@ -2986,6 +2986,9 @@ sub new
2986
2986
2987
2987
die " Git repo '$self ->{git_path}' doesn't exist" unless ( -d $self -> {git_path } );
2988
2988
2989
+ # Stores full sha1's for various branch/tag names, abbreviations, etc:
2990
+ $self -> {commitRefCache } = {};
2991
+
2989
2992
$self -> {dbdriver } = $cfg -> {gitcvs }{$state -> {method }}{dbdriver } ||
2990
2993
$cfg -> {gitcvs }{dbdriver } || " SQLite" ;
2991
2994
$self -> {dbname } = $cfg -> {gitcvs }{$state -> {method }}{dbname } ||
@@ -3446,7 +3449,7 @@ sub update
3446
3449
);
3447
3450
}
3448
3451
# invalidate the gethead cache
3449
- $self -> { gethead_cache } = undef ;
3452
+ $self -> clearCommitRefCaches() ;
3450
3453
3451
3454
3452
3455
# Ending exclusive lock here
@@ -3648,6 +3651,169 @@ sub gethead
3648
3651
return $tree ;
3649
3652
}
3650
3653
3654
+ =head2 getAnyHead
3655
+
3656
+ Returns a reference to an array of getmeta structures, one
3657
+ per file in the specified tree hash.
3658
+
3659
+ =cut
3660
+
3661
+ sub getAnyHead
3662
+ {
3663
+ my ($self ,$hash ) = @_ ;
3664
+
3665
+ if (!defined ($hash ))
3666
+ {
3667
+ return $self -> gethead();
3668
+ }
3669
+
3670
+ my @files ;
3671
+ {
3672
+ open (my $filePipe , ' -|' , ' git' , ' ls-tree' , ' -z' , ' -r' , $hash )
3673
+ or die (" Cannot call git-ls-tree : $! " );
3674
+ local $/ = " \0 " ;
3675
+ @files =<$filePipe >;
3676
+ close $filePipe ;
3677
+ }
3678
+
3679
+ my $tree =[];
3680
+ my ($line );
3681
+ foreach $line (@files )
3682
+ {
3683
+ $line =~s /\0 $// ;
3684
+ unless ( $line =~/ ^(\d +)\s +(\w +)\s +([a-zA-Z0-9]+)\t (.*)$ /o )
3685
+ {
3686
+ die (" Couldn't process git-ls-tree line : $_ " );
3687
+ }
3688
+
3689
+ my ($mode , $git_type , $git_hash , $git_filename ) = ($1 , $2 , $3 , $4 );
3690
+ push @$tree , $self -> getMetaFromCommithash($git_filename ,$hash );
3691
+ }
3692
+
3693
+ return $tree ;
3694
+ }
3695
+
3696
+ =head2 getRevisionDirMap
3697
+
3698
+ A "revision dir map" contains all the plain-file filenames associated
3699
+ with a particular revision (treeish), organized by directory:
3700
+
3701
+ $type = $out->{$dir}{$fullName}
3702
+
3703
+ The type of each is "F" (for ordinary file) or "D" (for directory,
3704
+ for which the map $out->{$fullName} will also exist).
3705
+
3706
+ =cut
3707
+
3708
+ sub getRevisionDirMap
3709
+ {
3710
+ my ($self ,$ver )=@_ ;
3711
+
3712
+ if (!defined ($self -> {revisionDirMapCache }))
3713
+ {
3714
+ $self -> {revisionDirMapCache }={};
3715
+ }
3716
+
3717
+ # Get file list (previously cached results are dependent on HEAD,
3718
+ # but are early in each case):
3719
+ my $cacheKey ;
3720
+ my (@fileList );
3721
+ if ( !defined ($ver ) || $ver eq " " )
3722
+ {
3723
+ $cacheKey =" " ;
3724
+ if ( defined ($self -> {revisionDirMapCache }{$cacheKey }) )
3725
+ {
3726
+ return $self -> {revisionDirMapCache }{$cacheKey };
3727
+ }
3728
+
3729
+ my @head = @{$self -> gethead()};
3730
+ foreach my $file ( @head )
3731
+ {
3732
+ next if ( $file -> {filehash } eq " deleted" );
3733
+
3734
+ push @fileList ,$file -> {name };
3735
+ }
3736
+ }
3737
+ else
3738
+ {
3739
+ my ($hash )=$self -> lookupCommitRef($ver );
3740
+ if ( !defined ($hash ) )
3741
+ {
3742
+ return undef ;
3743
+ }
3744
+
3745
+ $cacheKey =$hash ;
3746
+ if ( defined ($self -> {revisionDirMapCache }{$cacheKey }) )
3747
+ {
3748
+ return $self -> {revisionDirMapCache }{$cacheKey };
3749
+ }
3750
+
3751
+ open (my $filePipe , ' -|' , ' git' , ' ls-tree' , ' -z' , ' -r' , $hash )
3752
+ or die (" Cannot call git-ls-tree : $! " );
3753
+ local $/ = " \0 " ;
3754
+ while ( <$filePipe > )
3755
+ {
3756
+ chomp ;
3757
+ unless ( / ^(\d +)\s +(\w +)\s +([a-zA-Z0-9]+)\t (.*)$ /o )
3758
+ {
3759
+ die (" Couldn't process git-ls-tree line : $_ " );
3760
+ }
3761
+
3762
+ my ($mode , $git_type , $git_hash , $git_filename ) = ($1 , $2 , $3 , $4 );
3763
+
3764
+ push @fileList , $git_filename ;
3765
+ }
3766
+ close $filePipe ;
3767
+ }
3768
+
3769
+ # Convert to normalized form:
3770
+ my %revMap ;
3771
+ my $file ;
3772
+ foreach $file (@fileList )
3773
+ {
3774
+ my ($dir ) = ($file =~m % ^(?:(.*)/)?([^/]*)$ % );
3775
+ $dir =' ' if (!defined ($dir ));
3776
+
3777
+ # parent directories:
3778
+ # ... create empty dir maps for parent dirs:
3779
+ my ($td )=$dir ;
3780
+ while (!defined ($revMap {$td }))
3781
+ {
3782
+ $revMap {$td }={};
3783
+
3784
+ my ($tp )=($td =~m % ^(?:(.*)/)?([^/]*)$ % );
3785
+ $tp =' ' if (!defined ($tp ));
3786
+ $td =$tp ;
3787
+ }
3788
+ # ... add children to parent maps (now that they exist):
3789
+ $td =$dir ;
3790
+ while ($td ne " " )
3791
+ {
3792
+ my ($tp )=($td =~m % ^(?:(.*)/)?([^/]*)$ % );
3793
+ $tp =' ' if (!defined ($tp ));
3794
+
3795
+ if (defined ($revMap {$tp }{$td }))
3796
+ {
3797
+ if ($revMap {$tp }{$td } ne ' D' )
3798
+ {
3799
+ die " Weird file/directory inconsistency in $cacheKey " ;
3800
+ }
3801
+ last ; # loop exit
3802
+ }
3803
+ $revMap {$tp }{$td }=' D' ;
3804
+
3805
+ $td =$tp ;
3806
+ }
3807
+
3808
+ # file
3809
+ $revMap {$dir }{$file }=' F' ;
3810
+ }
3811
+
3812
+ # Save in cache:
3813
+ $self -> {revisionDirMapCache }{$cacheKey }=\%revMap ;
3814
+ return $self -> {revisionDirMapCache }{$cacheKey };
3815
+ }
3816
+
3651
3817
=head2 getlog
3652
3818
3653
3819
See also gethistorydense().
@@ -3742,6 +3908,204 @@ sub getmeta
3742
3908
return $meta ;
3743
3909
}
3744
3910
3911
+ sub getMetaFromCommithash
3912
+ {
3913
+ my $self = shift ;
3914
+ my $filename = shift ;
3915
+ my $revCommit = shift ;
3916
+
3917
+ # NOTE: This function doesn't scale well (lots of forks), especially
3918
+ # if you have many files that have not been modified for many commits
3919
+ # (each git-rev-parse redoes a lot of work for each file
3920
+ # that theoretically could be done in parallel by smarter
3921
+ # graph traversal).
3922
+ #
3923
+ # TODO: Possible optimization strategies:
3924
+ # - Solve the issue of assigning and remembering "real" CVS
3925
+ # revision numbers for branches, and ensure the
3926
+ # data structure can do this efficiently. Perhaps something
3927
+ # similar to "git notes", and carefully structured to take
3928
+ # advantage same-sha1-is-same-contents, to roll the same
3929
+ # unmodified subdirectory data onto multiple commits?
3930
+ # - Write and use a C tool that is like git-blame, but
3931
+ # operates on multiple files with file granularity, instead
3932
+ # of one file with line granularity. Cache
3933
+ # most-recently-modified in $self->{commitRefCache}{$revCommit}.
3934
+ # Try to be intelligent about how many files we do with
3935
+ # one fork (perhaps one directory at a time, without recursion,
3936
+ # and/or include directory as one line item, recurse from here
3937
+ # instead of in C tool?).
3938
+ # - Perhaps we could ask the DB for (filename,fileHash),
3939
+ # and just guess that it is correct (that the file hadn't
3940
+ # changed between $revCommit and the found commit, then
3941
+ # changed back, confusing anything trying to interpret
3942
+ # history). Probably need to add another index to revisions
3943
+ # DB table for this.
3944
+ # - NOTE: Trying to store all (commit,file) keys in DB [to
3945
+ # find "lastModfiedCommit] (instead of
3946
+ # just files that changed in each commit as we do now) is
3947
+ # probably not practical from a disk space perspective.
3948
+
3949
+ # Does the file exist in $revCommit?
3950
+ # TODO: Include file hash in dirmap cache.
3951
+ my ($dirMap )=$self -> getRevisionDirMap($revCommit );
3952
+ my ($dir ,$file )=($filename =~m % ^(?:(.*)/)?([^/]*$ )% );
3953
+ if (!defined ($dir ))
3954
+ {
3955
+ $dir =" " ;
3956
+ }
3957
+ if ( !defined ($dirMap -> {$dir }) ||
3958
+ !defined ($dirMap -> {$dir }{$filename }) )
3959
+ {
3960
+ my ($fileHash )=" deleted" ;
3961
+
3962
+ my ($retVal )={};
3963
+ $retVal -> {name }=$filename ;
3964
+ $retVal -> {filehash }=$fileHash ;
3965
+
3966
+ # not needed and difficult to compute:
3967
+ $retVal -> {revision }=" 0" ; # $revision;
3968
+ $retVal -> {commithash }=$revCommit ;
3969
+ # $retVal->{author}=$commit->{author};
3970
+ # $retVal->{modified}=convertToCvsDate($commit->{date});
3971
+ # $retVal->{mode}=convertToDbMode($mode);
3972
+
3973
+ return $retVal ;
3974
+ }
3975
+
3976
+ my ($fileHash )=safe_pipe_capture(" git" ," rev-parse" ," $revCommit :$filename " );
3977
+ chomp $fileHash ;
3978
+ if (!($fileHash =~/ ^[0-9a-f]{40}$ / ))
3979
+ {
3980
+ die " Invalid fileHash '$fileHash ' looking up"
3981
+ ." '$revCommit :$filename '\n " ;
3982
+ }
3983
+
3984
+ # information about most recent commit to modify $filename:
3985
+ open (my $gitLogPipe , ' -|' , ' git' , ' rev-list' ,
3986
+ ' --max-count=1' , ' --pretty' , ' --parents' ,
3987
+ $revCommit , ' --' , $filename )
3988
+ or die " Cannot call git-rev-list: $! " ;
3989
+ my @commits =readCommits($gitLogPipe );
3990
+ close $gitLogPipe ;
3991
+ if (scalar (@commits )!=1)
3992
+ {
3993
+ die " Can't find most recent commit changing $filename \n " ;
3994
+ }
3995
+ my ($commit )=$commits [0];
3996
+ if ( !defined ($commit ) || !defined ($commit -> {hash }) )
3997
+ {
3998
+ return undef ;
3999
+ }
4000
+
4001
+ # does this (commit,file) have a real assigned CVS revision number?
4002
+ my $tablename_rev = $self -> tablename(" revision" );
4003
+ my $db_query ;
4004
+ $db_query = $self -> {dbh }-> prepare_cached(
4005
+ " SELECT * FROM $tablename_rev WHERE name=? AND commithash=?" ,
4006
+ {},1);
4007
+ $db_query -> execute($filename , $commit -> {hash });
4008
+ my ($meta )=$db_query -> fetchrow_hashref;
4009
+ if ($meta )
4010
+ {
4011
+ $meta -> {revision } = " 1.$meta ->{revision}" ;
4012
+ return $meta ;
4013
+ }
4014
+
4015
+ # fall back on special revision number
4016
+ my ($revision )=$commit -> {hash };
4017
+ $revision =~s / (..)/ '.' . (hex($1 )+100)/ eg ;
4018
+ $revision =" 2.1.1.2000$revision " ;
4019
+
4020
+ # meta data about $filename:
4021
+ open (my $filePipe , ' -|' , ' git' , ' ls-tree' , ' -z' ,
4022
+ $commit -> {hash }, ' --' , $filename )
4023
+ or die (" Cannot call git-ls-tree : $! " );
4024
+ local $/ = " \0 " ;
4025
+ my $line ;
4026
+ $line =<$filePipe >;
4027
+ if (defined (<$filePipe >))
4028
+ {
4029
+ die " Expected only a single file for git-ls-tree $filename \n " ;
4030
+ }
4031
+ close $filePipe ;
4032
+
4033
+ chomp $line ;
4034
+ unless ( $line =~m / ^(\d +)\s +(\w +)\s +([a-zA-Z0-9]+)\t (.*)$ / o )
4035
+ {
4036
+ die (" Couldn't process git-ls-tree line : $line \n " );
4037
+ }
4038
+ my ( $mode , $git_type , $git_hash , $git_filename ) = ( $1 , $2 , $3 , $4 );
4039
+
4040
+ # save result:
4041
+ my ($retVal )={};
4042
+ $retVal -> {name }=$filename ;
4043
+ $retVal -> {revision }=$revision ;
4044
+ $retVal -> {filehash }=$fileHash ;
4045
+ $retVal -> {commithash }=$revCommit ;
4046
+ $retVal -> {author }=$commit -> {author };
4047
+ $retVal -> {modified }=convertToCvsDate($commit -> {date });
4048
+ $retVal -> {mode }=convertToDbMode($mode );
4049
+
4050
+ return $retVal ;
4051
+ }
4052
+
4053
+ =head2 lookupCommitRef
4054
+
4055
+ Convert tag/branch/abbreviation/etc into a commit sha1 hash. Caches
4056
+ the result so looking it up again is fast.
4057
+
4058
+ =cut
4059
+
4060
+ sub lookupCommitRef
4061
+ {
4062
+ my $self = shift ;
4063
+ my $ref = shift ;
4064
+
4065
+ my $commitHash = $self -> {commitRefCache }{$ref };
4066
+ if (defined ($commitHash ))
4067
+ {
4068
+ return $commitHash ;
4069
+ }
4070
+
4071
+ $commitHash =safe_pipe_capture(" git" ," rev-parse" ," --verify" ," --quiet" ,
4072
+ $self -> unescapeRefName($ref ));
4073
+ $commitHash =~s /\s *$// ;
4074
+ if (!($commitHash =~/ ^[0-9a-f]{40}$ / ))
4075
+ {
4076
+ $commitHash =undef ;
4077
+ }
4078
+
4079
+ if ( defined ($commitHash ) )
4080
+ {
4081
+ my $type =safe_pipe_capture(" git" ," cat-file" ," -t" ,$commitHash );
4082
+ if ( ! ($type =~/ ^commit\s *$ / ) )
4083
+ {
4084
+ $commitHash =undef ;
4085
+ }
4086
+ }
4087
+ if (defined ($commitHash ))
4088
+ {
4089
+ $self -> {commitRefCache }{$ref }=$commitHash ;
4090
+ }
4091
+ return $commitHash ;
4092
+ }
4093
+
4094
+ =head2 clearCommitRefCaches
4095
+
4096
+ Clears cached commit cache (sha1's for various tags/abbeviations/etc),
4097
+ and related caches.
4098
+
4099
+ =cut
4100
+
4101
+ sub clearCommitRefCaches
4102
+ {
4103
+ my $self = shift ;
4104
+ $self -> {commitRefCache } = {};
4105
+ $self -> {revisionDirMapCache } = undef ;
4106
+ $self -> {gethead_cache } = undef ;
4107
+ }
4108
+
3745
4109
=head2 commitmessage
3746
4110
3747
4111
this function takes a commithash and returns the commit message for that commit
0 commit comments