Skip to content

Commit 9d458be

Browse files
authored
Merge pull request #196 from erizocosmico/perf/commit-has-tree
perf: add caches to commit_has_blob and commit_has_tree
2 parents a989023 + a252b53 commit 9d458be

File tree

4 files changed

+135
-10
lines changed

4 files changed

+135
-10
lines changed

internal/function/commit_has_blob.go

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ import (
44
"fmt"
55
"io"
66

7+
"github.com/hashicorp/golang-lru"
8+
79
"github.com/src-d/gitquery"
810
"gopkg.in/src-d/go-git.v4/plumbing"
911
"gopkg.in/src-d/go-git.v4/plumbing/object"
@@ -14,13 +16,18 @@ import (
1416
type CommitHasBlob struct {
1517
commitHash sql.Expression
1618
blob sql.Expression
19+
cache *lru.TwoQueueCache
1720
}
1821

22+
const commitHasBlobCacheSize = 200
23+
1924
// NewCommitHasBlob creates a new commit_has_blob function.
2025
func NewCommitHasBlob(commitHash, blob sql.Expression) sql.Expression {
26+
cache, _ := lru.New2Q(commitHasBlobCacheSize)
2127
return &CommitHasBlob{
2228
commitHash: commitHash,
2329
blob: blob,
30+
cache: cache,
2431
}
2532
}
2633

@@ -71,10 +78,18 @@ func (f *CommitHasBlob) Eval(ctx *sql.Context, row sql.Row) (interface{}, error)
7178
)
7279
}
7380

81+
type commitBlobKey struct {
82+
commit, blob plumbing.Hash
83+
}
84+
7485
func (f *CommitHasBlob) commitHasBlob(
7586
pool *gitquery.RepositoryPool,
7687
commitHash, blob plumbing.Hash,
7788
) (bool, error) {
89+
if val, ok := f.cache.Get(commitBlobKey{commitHash, blob}); ok {
90+
return val.(bool), nil
91+
}
92+
7893
iter, err := pool.RepoIter()
7994
if err != nil {
8095
return false, err
@@ -105,23 +120,29 @@ func (f *CommitHasBlob) commitHasBlob(
105120
return false, err
106121
}
107122

108-
contained, err := hashInTree(blob, tree)
123+
contained, err := f.hashInTree(blob, commitHash, tree)
109124
if err != nil {
110125
return false, err
111126
}
112127

113128
if contained {
114129
return true, nil
115130
}
131+
f.cache.Add(commitBlobKey{commitHash, blob}, false)
116132
}
117133

118134
return false, nil
119135
}
120136

121-
func hashInTree(hash plumbing.Hash, tree *object.Tree) (bool, error) {
137+
func (f *CommitHasBlob) hashInTree(
138+
hash plumbing.Hash,
139+
commit plumbing.Hash,
140+
tree *object.Tree,
141+
) (bool, error) {
122142
var contained bool
123-
err := tree.Files().ForEach(func(f *object.File) error {
124-
if f.Blob.Hash == hash {
143+
err := tree.Files().ForEach(func(fi *object.File) error {
144+
f.cache.Add(commitBlobKey{commit, fi.Blob.Hash}, true)
145+
if fi.Blob.Hash == hash {
125146
contained = true
126147
return io.EOF
127148
}

internal/function/commit_has_blob_test.go

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,3 +55,44 @@ func TestCommitHasBlob(t *testing.T) {
5555
})
5656
}
5757
}
58+
59+
func BenchmarkCommitHasBlob(b *testing.B) {
60+
require.NoError(b, fixtures.Init())
61+
defer func() {
62+
require.NoError(b, fixtures.Clean())
63+
}()
64+
65+
f := NewCommitHasBlob(
66+
expression.NewGetField(0, sql.Text, "commit_hash", true),
67+
expression.NewGetField(1, sql.Text, "blob_hash", true),
68+
)
69+
70+
pool := gitquery.NewRepositoryPool()
71+
for _, f := range fixtures.ByTag("worktree") {
72+
pool.AddGit(f.Worktree().Root())
73+
}
74+
75+
session := gitquery.NewSession(&pool)
76+
ctx := sql.NewContext(context.TODO(), session)
77+
78+
rows := []sql.Row{
79+
// blob is not on commit
80+
sql.NewRow("35e85108805c84807bc66a02d91535e1e24b38b9", "9dea2395f5403188298c1dabe8bdafe562c491e3"),
81+
// blob is on commit
82+
sql.NewRow("6ecf0ef2c2dffb796033e5a02219af86ec6584e5", "9dea2395f5403188298c1dabe8bdafe562c491e3"),
83+
}
84+
85+
b.Run("commit_has_blob", func(b *testing.B) {
86+
require := require.New(b)
87+
88+
for i := 0; i < b.N; i++ {
89+
val, err := f.Eval(ctx, rows[i%2])
90+
require.NoError(err)
91+
if i%2 == 1 {
92+
require.Equal(true, val)
93+
} else {
94+
require.Equal(false, val)
95+
}
96+
}
97+
})
98+
}

internal/function/commit_has_tree.go

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ import (
44
"fmt"
55
"io"
66

7+
"github.com/hashicorp/golang-lru"
8+
79
"gopkg.in/src-d/go-git.v4/plumbing/filemode"
810
"gopkg.in/src-d/go-git.v4/plumbing/object"
911

@@ -18,14 +20,21 @@ import (
1820
// or not.
1921
type CommitHasTree struct {
2022
expression.BinaryExpression
23+
cache *lru.TwoQueueCache
2124
}
2225

26+
// TODO: set as config
27+
const commitHasTreeCacheSize = 100
28+
2329
// NewCommitHasTree creates a new CommitHasTree function.
2430
func NewCommitHasTree(commit, tree sql.Expression) sql.Expression {
31+
// NewARC can only fail if size is negative, and we know it is not,
32+
// so it is safe to ignore the error here.
33+
cache, _ := lru.New2Q(commitHasTreeCacheSize)
2534
return &CommitHasTree{expression.BinaryExpression{
2635
Left: commit,
2736
Right: tree,
28-
}}
37+
}, cache}
2938
}
3039

3140
func (f CommitHasTree) String() string {
@@ -73,6 +82,10 @@ func (f *CommitHasTree) Eval(ctx *sql.Context, row sql.Row) (interface{}, error)
7382
commitHash := plumbing.NewHash(left.(string))
7483
treeHash := plumbing.NewHash(right.(string))
7584

85+
if val, ok := f.cache.Get(commitTreeKey{commitHash, treeHash}); ok {
86+
return val.(bool), nil
87+
}
88+
7689
iter, err := s.Pool.RepoIter()
7790
if err != nil {
7891
return nil, err
@@ -88,7 +101,7 @@ func (f *CommitHasTree) Eval(ctx *sql.Context, row sql.Row) (interface{}, error)
88101
return nil, err
89102
}
90103

91-
ok, err := commitHasTree(repo.Repo, commitHash, treeHash)
104+
ok, err := f.commitHasTree(repo.Repo, commitHash, treeHash)
92105
if err == plumbing.ErrObjectNotFound {
93106
continue
94107
}
@@ -97,7 +110,12 @@ func (f *CommitHasTree) Eval(ctx *sql.Context, row sql.Row) (interface{}, error)
97110
}
98111
}
99112

100-
func commitHasTree(
113+
type commitTreeKey struct {
114+
commit plumbing.Hash
115+
tree plumbing.Hash
116+
}
117+
118+
func (f *CommitHasTree) commitHasTree(
101119
repo *git.Repository,
102120
commitHash, treeHash plumbing.Hash,
103121
) (bool, error) {
@@ -106,6 +124,8 @@ func commitHasTree(
106124
return false, err
107125
}
108126

127+
f.cache.Add(commitTreeKey{commitHash, commit.TreeHash}, true)
128+
109129
if commit.TreeHash == treeHash {
110130
return true, nil
111131
}
@@ -115,13 +135,13 @@ func commitHasTree(
115135
return false, err
116136
}
117137

118-
return treeInEntries(repo, tree.Entries, treeHash)
138+
return f.treeInEntries(repo, tree.Entries, commitHash, treeHash)
119139
}
120140

121-
func treeInEntries(
141+
func (f *CommitHasTree) treeInEntries(
122142
repo *git.Repository,
123143
entries []object.TreeEntry,
124-
hash plumbing.Hash,
144+
commitHash, hash plumbing.Hash,
125145
) (bool, error) {
126146
type stackFrame struct {
127147
pos int
@@ -131,6 +151,7 @@ func treeInEntries(
131151

132152
for {
133153
if len(stack) == 0 {
154+
f.cache.Add(commitTreeKey{commitHash, hash}, false)
134155
return false, nil
135156
}
136157

@@ -143,6 +164,7 @@ func treeInEntries(
143164
entry := frame.entries[frame.pos]
144165
frame.pos++
145166
if entry.Mode == filemode.Dir {
167+
f.cache.Add(commitTreeKey{commitHash, entry.Hash}, true)
146168
if entry.Hash == hash {
147169
return true, nil
148170
}

internal/function/commit_has_tree_test.go

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,3 +56,44 @@ func TestCommitHasTree(t *testing.T) {
5656
})
5757
}
5858
}
59+
60+
func BenchmarkCommitHasTree(b *testing.B) {
61+
require.NoError(b, fixtures.Init())
62+
defer func() {
63+
require.NoError(b, fixtures.Clean())
64+
}()
65+
66+
f := NewCommitHasTree(
67+
expression.NewGetField(0, sql.Text, "commit_hash", true),
68+
expression.NewGetField(1, sql.Text, "tree_hash", true),
69+
)
70+
71+
pool := gitquery.NewRepositoryPool()
72+
for _, f := range fixtures.ByTag("worktree") {
73+
pool.AddGit(f.Worktree().Root())
74+
}
75+
76+
session := gitquery.NewSession(&pool)
77+
ctx := sql.NewContext(context.TODO(), session)
78+
79+
rows := []sql.Row{
80+
// tree is not on commit
81+
sql.NewRow("6ecf0ef2c2dffb796033e5a02219af86ec6584e5", "c2d30fa8ef288618f65f6eed6e168e0d514886f4"),
82+
// subtree is on commit
83+
sql.NewRow("6ecf0ef2c2dffb796033e5a02219af86ec6584e5", "5a877e6a906a2743ad6e45d99c1793642aaf8eda"),
84+
}
85+
86+
b.Run("commit_has_tree", func(b *testing.B) {
87+
require := require.New(b)
88+
89+
for i := 0; i < b.N; i++ {
90+
val, err := f.Eval(ctx, rows[i%2])
91+
require.NoError(err)
92+
if i%2 == 1 {
93+
require.Equal(true, val)
94+
} else {
95+
require.Equal(false, val)
96+
}
97+
}
98+
})
99+
}

0 commit comments

Comments
 (0)