Skip to content

Commit a252b53

Browse files
committed
internal/function: add cache to commit_has_blob
This commit adds an LRU cache to commit_has_blob, which avoids hitting disk many times, drastically improving performance of the UDF. Before: BenchmarkCommitHasBlob/commit_has_blob-4 500 2605190 ns/op 1143268 B/op 1768 allocs/op PASS ok github.com/src-d/gitquery/internal/function 2.105s After: BenchmarkCommitHasBlob/commit_has_blob-4 1000000 1013 ns/op 240 B/op 7 allocs/op PASS ok github.com/src-d/gitquery/internal/function 1.559s Signed-off-by: Miguel Molina <[email protected]>
1 parent bffd776 commit a252b53

File tree

3 files changed

+73
-11
lines changed

3 files changed

+73
-11
lines changed

internal/function/commit_has_blob.go

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ import (
44
"fmt"
55
"io"
66

7+
"github.com/hashicorp/golang-lru"
8+
79
"github.com/src-d/gitquery"
810
"gopkg.in/src-d/go-git.v4/plumbing"
911
"gopkg.in/src-d/go-git.v4/plumbing/object"
@@ -14,13 +16,18 @@ import (
1416
type CommitHasBlob struct {
1517
commitHash sql.Expression
1618
blob sql.Expression
19+
cache *lru.TwoQueueCache
1720
}
1821

22+
const commitHasBlobCacheSize = 200
23+
1924
// NewCommitHasBlob creates a new commit_has_blob function.
2025
func NewCommitHasBlob(commitHash, blob sql.Expression) sql.Expression {
26+
cache, _ := lru.New2Q(commitHasBlobCacheSize)
2127
return &CommitHasBlob{
2228
commitHash: commitHash,
2329
blob: blob,
30+
cache: cache,
2431
}
2532
}
2633

@@ -71,10 +78,18 @@ func (f *CommitHasBlob) Eval(ctx *sql.Context, row sql.Row) (interface{}, error)
7178
)
7279
}
7380

81+
type commitBlobKey struct {
82+
commit, blob plumbing.Hash
83+
}
84+
7485
func (f *CommitHasBlob) commitHasBlob(
7586
pool *gitquery.RepositoryPool,
7687
commitHash, blob plumbing.Hash,
7788
) (bool, error) {
89+
if val, ok := f.cache.Get(commitBlobKey{commitHash, blob}); ok {
90+
return val.(bool), nil
91+
}
92+
7893
iter, err := pool.RepoIter()
7994
if err != nil {
8095
return false, err
@@ -105,23 +120,29 @@ func (f *CommitHasBlob) commitHasBlob(
105120
return false, err
106121
}
107122

108-
contained, err := hashInTree(blob, tree)
123+
contained, err := f.hashInTree(blob, commitHash, tree)
109124
if err != nil {
110125
return false, err
111126
}
112127

113128
if contained {
114129
return true, nil
115130
}
131+
f.cache.Add(commitBlobKey{commitHash, blob}, false)
116132
}
117133

118134
return false, nil
119135
}
120136

121-
func hashInTree(hash plumbing.Hash, tree *object.Tree) (bool, error) {
137+
func (f *CommitHasBlob) hashInTree(
138+
hash plumbing.Hash,
139+
commit plumbing.Hash,
140+
tree *object.Tree,
141+
) (bool, error) {
122142
var contained bool
123-
err := tree.Files().ForEach(func(f *object.File) error {
124-
if f.Blob.Hash == hash {
143+
err := tree.Files().ForEach(func(fi *object.File) error {
144+
f.cache.Add(commitBlobKey{commit, fi.Blob.Hash}, true)
145+
if fi.Blob.Hash == hash {
125146
contained = true
126147
return io.EOF
127148
}

internal/function/commit_has_blob_test.go

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,3 +55,44 @@ func TestCommitHasBlob(t *testing.T) {
5555
})
5656
}
5757
}
58+
59+
func BenchmarkCommitHasBlob(b *testing.B) {
60+
require.NoError(b, fixtures.Init())
61+
defer func() {
62+
require.NoError(b, fixtures.Clean())
63+
}()
64+
65+
f := NewCommitHasBlob(
66+
expression.NewGetField(0, sql.Text, "commit_hash", true),
67+
expression.NewGetField(1, sql.Text, "blob_hash", true),
68+
)
69+
70+
pool := gitquery.NewRepositoryPool()
71+
for _, f := range fixtures.ByTag("worktree") {
72+
pool.AddGit(f.Worktree().Root())
73+
}
74+
75+
session := gitquery.NewSession(&pool)
76+
ctx := sql.NewContext(context.TODO(), session)
77+
78+
rows := []sql.Row{
79+
// blob is not on commit
80+
sql.NewRow("35e85108805c84807bc66a02d91535e1e24b38b9", "9dea2395f5403188298c1dabe8bdafe562c491e3"),
81+
// blob is on commit
82+
sql.NewRow("6ecf0ef2c2dffb796033e5a02219af86ec6584e5", "9dea2395f5403188298c1dabe8bdafe562c491e3"),
83+
}
84+
85+
b.Run("commit_has_blob", func(b *testing.B) {
86+
require := require.New(b)
87+
88+
for i := 0; i < b.N; i++ {
89+
val, err := f.Eval(ctx, rows[i%2])
90+
require.NoError(err)
91+
if i%2 == 1 {
92+
require.Equal(true, val)
93+
} else {
94+
require.Equal(false, val)
95+
}
96+
}
97+
})
98+
}

internal/function/commit_has_tree.go

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ import (
2020
// or not.
2121
type CommitHasTree struct {
2222
expression.BinaryExpression
23-
cache *lru.ARCCache
23+
cache *lru.TwoQueueCache
2424
}
2525

2626
// TODO: set as config
@@ -30,7 +30,7 @@ const commitHasTreeCacheSize = 100
3030
func NewCommitHasTree(commit, tree sql.Expression) sql.Expression {
3131
// NewARC can only fail if size is negative, and we know it is not,
3232
// so it is safe to ignore the error here.
33-
cache, _ := lru.NewARC(commitHasTreeCacheSize)
33+
cache, _ := lru.New2Q(commitHasTreeCacheSize)
3434
return &CommitHasTree{expression.BinaryExpression{
3535
Left: commit,
3636
Right: tree,
@@ -82,7 +82,7 @@ func (f *CommitHasTree) Eval(ctx *sql.Context, row sql.Row) (interface{}, error)
8282
commitHash := plumbing.NewHash(left.(string))
8383
treeHash := plumbing.NewHash(right.(string))
8484

85-
if val, ok := f.cache.Get(cacheKey{commitHash, treeHash}); ok {
85+
if val, ok := f.cache.Get(commitTreeKey{commitHash, treeHash}); ok {
8686
return val.(bool), nil
8787
}
8888

@@ -110,7 +110,7 @@ func (f *CommitHasTree) Eval(ctx *sql.Context, row sql.Row) (interface{}, error)
110110
}
111111
}
112112

113-
type cacheKey struct {
113+
type commitTreeKey struct {
114114
commit plumbing.Hash
115115
tree plumbing.Hash
116116
}
@@ -124,7 +124,7 @@ func (f *CommitHasTree) commitHasTree(
124124
return false, err
125125
}
126126

127-
f.cache.Add(cacheKey{commitHash, commit.TreeHash}, true)
127+
f.cache.Add(commitTreeKey{commitHash, commit.TreeHash}, true)
128128

129129
if commit.TreeHash == treeHash {
130130
return true, nil
@@ -151,7 +151,7 @@ func (f *CommitHasTree) treeInEntries(
151151

152152
for {
153153
if len(stack) == 0 {
154-
f.cache.Add(cacheKey{commitHash, hash}, false)
154+
f.cache.Add(commitTreeKey{commitHash, hash}, false)
155155
return false, nil
156156
}
157157

@@ -164,7 +164,7 @@ func (f *CommitHasTree) treeInEntries(
164164
entry := frame.entries[frame.pos]
165165
frame.pos++
166166
if entry.Mode == filemode.Dir {
167-
f.cache.Add(cacheKey{commitHash, entry.Hash}, true)
167+
f.cache.Add(commitTreeKey{commitHash, entry.Hash}, true)
168168
if entry.Hash == hash {
169169
return true, nil
170170
}

0 commit comments

Comments
 (0)