Skip to content

Commit 730e554

Browse files
committed
initial code commit 🎉
1 parent 06721df commit 730e554

File tree

5 files changed

+401
-0
lines changed

5 files changed

+401
-0
lines changed

README.md

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
## gitpert
2+
3+
`gitpert` measures the "pertinence" of git authors as a time-decayed measure of LOC added and removed to a repository (or a set of files in a repository).
4+
It's meant to help identify who the most relevant contributors are based on commit recency, frequency and impact.
5+
6+
- **impact** in this context is lines of code added plus lines of code removed by a commit. Vendored dependency files are ignored (on a best effort basis).
7+
- **decay rate** determines how long it takes for the impact of a commit to halve, based on how recently the commit was made. If the decay rate is 10 days, a commit that added 100 lines of code, authored 10 days ago, will be scored at 50. It is a half-life, and can be supplied as a config parameter.
8+
- **score** is the sum of the time decayed impact of every commit in a repository, for a given author.
9+
10+
The net effect *should* be a ranked list of authors (contributors) where those who have more recently and more frequently contributed "larger" commits surface to the top. An author who committed the initial code many years ago (maybe higher impact) will likely rank lower than an author who has contributed less impactfully, but much more recently (depending on the decay rate and absolute numbers, or course).
11+
12+
This could be useful for identifying who the best person to review a new code change might be, or who the best person to ask questions or seek help from might be. Scores can be done at the repository level, and also for individual files (the most pertinent author for a repository might not be the most pertinent for a directory or file within that repository).
13+
14+
15+
### Installation
16+
17+
TODO
18+
19+
### Usage
20+
21+
TODO
22+
23+
### FAQ
24+
25+
#### What about git-blame?
26+
27+
`git-blame` will tell you about the last modification to lines in a file (the author and revision), and is certainly useful. This tool hopes to provide a higher level view of the net effect of authorship in a repository over time.
28+
29+
#### Why are changes to "vendored" dependencies ignored?
30+
31+
Authoring a commit that introduces a large diff because it adds or removes many dependencies (think the `vendor/` directory in golang projects), though in most contexts an important contribution, gives an outsized "impact" to that commit and author which doesn't necessarily reflect how well they "impact" the code of the project itself in that commit.
32+
33+
#### Should LOC added be weighed the same as LOC removed?
34+
35+
Maybe. This could be worth exposing as a config parameter. One could argue that a LOC added should weigh some amount more than a LOC removed.

cmd/root.go

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
package cmd
2+
3+
import (
4+
"fmt"
5+
"log"
6+
"math"
7+
"os"
8+
"sort"
9+
"text/tabwriter"
10+
"time"
11+
12+
"github.com/go-git/go-git/v5"
13+
"github.com/go-git/go-git/v5/plumbing/object"
14+
"github.com/go-git/go-git/v5/storage/memory"
15+
"github.com/spf13/cobra"
16+
"github.com/src-d/enry/v2"
17+
)
18+
19+
func handleError(err error) {
20+
if err != nil {
21+
log.Fatalln(err)
22+
}
23+
}
24+
25+
var (
26+
remote bool
27+
decayDays int
28+
)
29+
30+
func init() {
31+
rootCmd.Flags().BoolVarP(&remote, "remote", "r", false, "whether or not this is a remote repository")
32+
rootCmd.Flags().IntVarP(&decayDays, "drop-off", "d", 30, "drop off duration in days")
33+
}
34+
35+
var rootCmd = &cobra.Command{
36+
Use: "gitpert",
37+
Short: "gitpert ranks committers ",
38+
Args: cobra.RangeArgs(0, 2),
39+
Run: func(cmd *cobra.Command, args []string) {
40+
41+
// if first argument exists, it's the repoPath
42+
var repoPath string
43+
if len(args) > 0 {
44+
repoPath = args[0]
45+
} else { // otherwise, use the working directory
46+
p, err := os.Getwd()
47+
handleError(err)
48+
repoPath = p
49+
}
50+
51+
var repo *git.Repository
52+
// if the remote flag is set, clone the repo (using repoPath) into memory
53+
if remote {
54+
r, err := git.Clone(memory.NewStorage(), nil, &git.CloneOptions{
55+
URL: repoPath,
56+
})
57+
handleError(err)
58+
repo = r
59+
} else { // otherwise, open the specified repo
60+
r, err := git.PlainOpen(repoPath)
61+
handleError(err)
62+
repo = r
63+
}
64+
65+
var fileName *string
66+
if len(args) > 1 {
67+
fileName = &args[1]
68+
}
69+
70+
// TODO (patrickdevivo) at some point this entire scoring logic should be brought out into a subpackage with some tests
71+
// this could also make it possibe for other projects to import the implementation.
72+
decayHours := 24 * decayDays
73+
// since := time.Now().Add(-(time.Duration(decayHours) * time.Hour * 10))
74+
commitIter, err := repo.Log(&git.LogOptions{
75+
Order: git.LogOrderCommitterTime,
76+
FileName: fileName,
77+
// Since: &since,
78+
})
79+
handleError(err)
80+
defer commitIter.Close()
81+
82+
type authorAggregate struct {
83+
email string
84+
name string
85+
commits []*object.Commit
86+
impact int
87+
score float64
88+
}
89+
authors := map[string]*authorAggregate{}
90+
var authorEmails []string
91+
commitIter.ForEach(func(commit *object.Commit) error {
92+
authorEmail := commit.Author.Email
93+
if _, ok := authors[authorEmail]; !ok {
94+
authors[authorEmail] = &authorAggregate{
95+
email: authorEmail,
96+
name: commit.Author.Name,
97+
commits: make([]*object.Commit, 0),
98+
}
99+
authorEmails = append(authorEmails, authorEmail)
100+
}
101+
102+
agg := authors[authorEmail]
103+
agg.commits = append(authors[authorEmail].commits, commit)
104+
105+
fileStats, err := commit.Stats()
106+
handleError(err)
107+
108+
var additions int
109+
var deletions int
110+
for _, stat := range fileStats {
111+
// ignore diffs in vendor files
112+
if enry.IsVendor(stat.Name) {
113+
continue
114+
}
115+
additions += stat.Addition
116+
deletions += stat.Deletion
117+
}
118+
agg.impact += additions + deletions
119+
120+
hoursAgo := time.Now().Sub(commit.Author.When).Hours()
121+
agg.score += float64(additions+deletions) * math.Exp2(-hoursAgo/float64(decayHours))
122+
return nil
123+
})
124+
125+
sort.SliceStable(authorEmails, func(i, j int) bool {
126+
return authors[authorEmails[j]].score < authors[authorEmails[i]].score
127+
})
128+
129+
w := tabwriter.NewWriter(os.Stdout, 0, 0, 3, ' ', tabwriter.TabIndent)
130+
for rank, authorEmail := range authorEmails {
131+
agg := authors[authorEmail]
132+
if agg.score < 1 {
133+
continue
134+
}
135+
fmt.Fprintf(w, "%d\t%s\t%s\t%d\t%d commits\t%d\t%f\n", rank+1, authorEmail, agg.name, int(math.Round(agg.score)), len(agg.commits), agg.impact, float64(agg.impact)/agg.score)
136+
}
137+
w.Flush()
138+
},
139+
}
140+
141+
// Execute runs the root command
142+
func Execute() {
143+
if err := rootCmd.Execute(); err != nil {
144+
fmt.Println(err)
145+
os.Exit(1)
146+
}
147+
}

gitpert.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
package main
2+
3+
import (
4+
"github.com/augmentable-dev/gitpert/cmd"
5+
)
6+
7+
func main() {
8+
cmd.Execute()
9+
}

go.mod

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
module github.com/augmentable-dev/gitpert
2+
3+
go 1.13
4+
5+
require (
6+
github.com/go-git/go-git/v5 v5.0.0
7+
github.com/spf13/cobra v0.0.7
8+
github.com/src-d/enry/v2 v2.1.0
9+
)

0 commit comments

Comments
 (0)