Skip to content

Commit bffd776

Browse files
committed
internal/function: add cache to commit_has_tree
Adds an LRU cache to commit_has_tree so it does not have to go to disk all the time to check if a tree is inside a commit or not. Before: BenchmarkCommitHasTree/commit_has_tree-4 3000 519234 ns/op 297322 B/op 1276 allocs/op PASS ok github.com/src-d/gitquery/internal/function 2.154s After: BenchmarkCommitHasTree/commit_has_tree-4 1000000 1036 ns/op 240 B/op 7 allocs/op PASS ok github.com/src-d/gitquery/internal/function 1.593s Signed-off-by: Miguel Molina <[email protected]>
1 parent 6513ca1 commit bffd776

File tree

2 files changed

+69
-6
lines changed

2 files changed

+69
-6
lines changed

internal/function/commit_has_tree.go

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ import (
44
"fmt"
55
"io"
66

7+
"github.com/hashicorp/golang-lru"
8+
79
"gopkg.in/src-d/go-git.v4/plumbing/filemode"
810
"gopkg.in/src-d/go-git.v4/plumbing/object"
911

@@ -18,14 +20,21 @@ import (
1820
// or not.
1921
type CommitHasTree struct {
2022
expression.BinaryExpression
23+
cache *lru.ARCCache
2124
}
2225

26+
// TODO: set as config
27+
const commitHasTreeCacheSize = 100
28+
2329
// NewCommitHasTree creates a new CommitHasTree function.
2430
func NewCommitHasTree(commit, tree sql.Expression) sql.Expression {
31+
// NewARC can only fail if size is negative, and we know it is not,
32+
// so it is safe to ignore the error here.
33+
cache, _ := lru.NewARC(commitHasTreeCacheSize)
2534
return &CommitHasTree{expression.BinaryExpression{
2635
Left: commit,
2736
Right: tree,
28-
}}
37+
}, cache}
2938
}
3039

3140
func (f CommitHasTree) String() string {
@@ -73,6 +82,10 @@ func (f *CommitHasTree) Eval(ctx *sql.Context, row sql.Row) (interface{}, error)
7382
commitHash := plumbing.NewHash(left.(string))
7483
treeHash := plumbing.NewHash(right.(string))
7584

85+
if val, ok := f.cache.Get(cacheKey{commitHash, treeHash}); ok {
86+
return val.(bool), nil
87+
}
88+
7689
iter, err := s.Pool.RepoIter()
7790
if err != nil {
7891
return nil, err
@@ -88,7 +101,7 @@ func (f *CommitHasTree) Eval(ctx *sql.Context, row sql.Row) (interface{}, error)
88101
return nil, err
89102
}
90103

91-
ok, err := commitHasTree(repo.Repo, commitHash, treeHash)
104+
ok, err := f.commitHasTree(repo.Repo, commitHash, treeHash)
92105
if err == plumbing.ErrObjectNotFound {
93106
continue
94107
}
@@ -97,7 +110,12 @@ func (f *CommitHasTree) Eval(ctx *sql.Context, row sql.Row) (interface{}, error)
97110
}
98111
}
99112

100-
func commitHasTree(
113+
type cacheKey struct {
114+
commit plumbing.Hash
115+
tree plumbing.Hash
116+
}
117+
118+
func (f *CommitHasTree) commitHasTree(
101119
repo *git.Repository,
102120
commitHash, treeHash plumbing.Hash,
103121
) (bool, error) {
@@ -106,6 +124,8 @@ func commitHasTree(
106124
return false, err
107125
}
108126

127+
f.cache.Add(cacheKey{commitHash, commit.TreeHash}, true)
128+
109129
if commit.TreeHash == treeHash {
110130
return true, nil
111131
}
@@ -115,13 +135,13 @@ func commitHasTree(
115135
return false, err
116136
}
117137

118-
return treeInEntries(repo, tree.Entries, treeHash)
138+
return f.treeInEntries(repo, tree.Entries, commitHash, treeHash)
119139
}
120140

121-
func treeInEntries(
141+
func (f *CommitHasTree) treeInEntries(
122142
repo *git.Repository,
123143
entries []object.TreeEntry,
124-
hash plumbing.Hash,
144+
commitHash, hash plumbing.Hash,
125145
) (bool, error) {
126146
type stackFrame struct {
127147
pos int
@@ -131,6 +151,7 @@ func treeInEntries(
131151

132152
for {
133153
if len(stack) == 0 {
154+
f.cache.Add(cacheKey{commitHash, hash}, false)
134155
return false, nil
135156
}
136157

@@ -143,6 +164,7 @@ func treeInEntries(
143164
entry := frame.entries[frame.pos]
144165
frame.pos++
145166
if entry.Mode == filemode.Dir {
167+
f.cache.Add(cacheKey{commitHash, entry.Hash}, true)
146168
if entry.Hash == hash {
147169
return true, nil
148170
}

internal/function/commit_has_tree_test.go

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,3 +56,44 @@ func TestCommitHasTree(t *testing.T) {
5656
})
5757
}
5858
}
59+
60+
func BenchmarkCommitHasTree(b *testing.B) {
61+
require.NoError(b, fixtures.Init())
62+
defer func() {
63+
require.NoError(b, fixtures.Clean())
64+
}()
65+
66+
f := NewCommitHasTree(
67+
expression.NewGetField(0, sql.Text, "commit_hash", true),
68+
expression.NewGetField(1, sql.Text, "tree_hash", true),
69+
)
70+
71+
pool := gitquery.NewRepositoryPool()
72+
for _, f := range fixtures.ByTag("worktree") {
73+
pool.AddGit(f.Worktree().Root())
74+
}
75+
76+
session := gitquery.NewSession(&pool)
77+
ctx := sql.NewContext(context.TODO(), session)
78+
79+
rows := []sql.Row{
80+
// tree is not on commit
81+
sql.NewRow("6ecf0ef2c2dffb796033e5a02219af86ec6584e5", "c2d30fa8ef288618f65f6eed6e168e0d514886f4"),
82+
// subtree is on commit
83+
sql.NewRow("6ecf0ef2c2dffb796033e5a02219af86ec6584e5", "5a877e6a906a2743ad6e45d99c1793642aaf8eda"),
84+
}
85+
86+
b.Run("commit_has_tree", func(b *testing.B) {
87+
require := require.New(b)
88+
89+
for i := 0; i < b.N; i++ {
90+
val, err := f.Eval(ctx, rows[i%2])
91+
require.NoError(err)
92+
if i%2 == 1 {
93+
require.Equal(true, val)
94+
} else {
95+
require.Equal(false, val)
96+
}
97+
}
98+
})
99+
}

0 commit comments

Comments
 (0)